Source code for util.number_interval_generator

import logging
import math
from dataclasses import dataclass

import numpy as np
from pydantic import BaseModel, Field

NEGATIVE_INFINITY = -(10 ** 100)
POSITIVE_INFINITY = 10 ** 100


[docs] @dataclass class NumberInterval: """ Represents a numeric interval with a lower and upper bound. Attributes: lower_bound (int): The lower limit of the interval. upper_bound (int): The upper limit of the interval. """ lower_bound: int upper_bound: int
[docs] @staticmethod def create_unbounded_interval(): """ Creates a NumberInterval with no bounds, spanning from negative to positive infinity. """ return NumberInterval(NEGATIVE_INFINITY, POSITIVE_INFINITY)
[docs] @staticmethod def create_positive_unbounded_interval(): """ Creates a NumberInterval, spanning from 0 to positive infinity. """ return NumberInterval(0, POSITIVE_INFINITY)
def __post_init__(self): if self.lower_bound > self.upper_bound: raise ValueError("Lower bound must be less than or equal to upper bound") @property def range(self) -> int: """ Returns the range of the interval. """ return self.upper_bound - self.lower_bound def __contains__(self, item: float | int): """ Checks if a given number is within the interval. """ return self.lower_bound <= item <= self.upper_bound def __eq__(self, other): return ( self.lower_bound == other.lower_bound and self.upper_bound == other.upper_bound ) def __hash__(self): return hash((self.lower_bound, self.upper_bound))
[docs] class NormalizedNumberGenerator(BaseModel): """ Generates random numbers based on a normal distribution, constrained by a numeric interval. Attributes: mean (float): The mean of the normal distribution. number_bounds (NumberInterval): The bounds within which generated numbers must fall. standard_deviation (float): The standard deviation of the normal distribution. """ mean: float number_bounds: NumberInterval = NumberInterval.create_unbounded_interval() standard_deviation: float = Field(gt=0) _logger = logging.getLogger(__name__) def model_post_init(self, context: any): """ Validates that the mean lies within the specified bounds. Raises: ValueError: If the mean is outside the number bounds. """ if self.mean not in self.number_bounds: raise ValueError("Mean must be within the number bounds") def _generate_random_normal_distribution_number(self) -> int: """ Generates a random number based on a normal distribution and rounds it to an integer. """ return round(np.random.normal(self.mean, self.standard_deviation))
[docs] def generate_bounded_number(self) -> int: """ Generates a random number that falls within the specified bounds. """ if self.number_bounds.range < self.standard_deviation: self._logge.warning( "This calculation might take a long time due to the small range of the number bounds." ) number = self._generate_random_normal_distribution_number() repetitions = 0 while number not in self.number_bounds: repetitions += 1 number = self._generate_random_normal_distribution_number() if repetitions > 100: self._logger.warning( f"Number generation took {repetitions} repetitions; this could lead to performance problems." ) return number
[docs] class NumberIntervalGenerator(BaseModel): """ Generates a random numeric interval based on a normal distribution. difference between lower and upper bound is calculated on a logarithmic scale so that larger values get higher difference: when log_base = 2 and factor=5 text_length=8 -> log(8, 2) * 5 = 3 * 5 = 15 text_length=256 -> log(256, 2) * 5 = 8 * 5 = 40 Attributes: mean (float): The mean of the normal distribution. standard_deviation (float): The standard deviation of the normal distribution. upper_bound_difference_log_factor (float): Factor used to determine the upper bound relative to the lower bound. min_upper_bound_log_base (float): Base for the logarithmic calculation of the upper bound. lower_number_bounds (NumberInterval): Bounds for generating the lower number. lower_number_generator (NormalizedNumberGenerator): Generator for the lower bound value. """ mean: float standard_deviation: float = Field(gt=0) upper_bound_difference_log_factor: float = Field(default=5, ge=1) min_upper_bound_log_base: float = Field(default=2, gt=1) lower_number_bounds: NumberInterval = ( NumberInterval.create_positive_unbounded_interval() ) lower_number_generator: NormalizedNumberGenerator = None
[docs] def model_post_init(self, __context: any) -> None: """ Initializes the lower number generator if it is not provided. """ if self.lower_number_generator is None: self.lower_number_generator: NormalizedNumberGenerator = ( NormalizedNumberGenerator( number_bounds=self.lower_number_bounds, mean=self.mean, standard_deviation=self.standard_deviation, ) )
def _generate_upper_bound(self, lower_bound: int) -> int: """ Calculates the upper bound based on the lower bound using logarithmic scaling. """ lower_bound_log = math.log( max(float(lower_bound), self.min_upper_bound_log_base), self.min_upper_bound_log_base, ) upper_bound_total_factor = ( lower_bound_log * self.upper_bound_difference_log_factor ) return round(lower_bound + upper_bound_total_factor)
[docs] def generate_interval(self) -> NumberInterval: """ Generates a random numeric interval consisting of a lower and upper bound. Returns: NumberInterval: A randomly generated numeric interval. """ lower_bound = self.lower_number_generator.generate_bounded_number() upper_bound = self._generate_upper_bound(lower_bound) return NumberInterval(lower_bound=lower_bound, upper_bound=upper_bound)