import logging
import math
from dataclasses import dataclass
import numpy as np
from pydantic import BaseModel, Field
NEGATIVE_INFINITY = -(10 ** 100)
POSITIVE_INFINITY = 10 ** 100
[docs]
@dataclass
class NumberInterval:
"""
Represents a numeric interval with a lower and upper bound.
Attributes:
lower_bound (int): The lower limit of the interval.
upper_bound (int): The upper limit of the interval.
"""
lower_bound: int
upper_bound: int
[docs]
@staticmethod
def create_unbounded_interval():
"""
Creates a NumberInterval with no bounds, spanning from negative to positive infinity.
"""
return NumberInterval(NEGATIVE_INFINITY, POSITIVE_INFINITY)
[docs]
@staticmethod
def create_positive_unbounded_interval():
"""
Creates a NumberInterval, spanning from 0 to positive infinity.
"""
return NumberInterval(0, POSITIVE_INFINITY)
def __post_init__(self):
if self.lower_bound > self.upper_bound:
raise ValueError("Lower bound must be less than or equal to upper bound")
@property
def range(self) -> int:
"""
Returns the range of the interval.
"""
return self.upper_bound - self.lower_bound
def __contains__(self, item: float | int):
"""
Checks if a given number is within the interval.
"""
return self.lower_bound <= item <= self.upper_bound
def __eq__(self, other):
return (
self.lower_bound == other.lower_bound
and self.upper_bound == other.upper_bound
)
def __hash__(self):
return hash((self.lower_bound, self.upper_bound))
[docs]
class NormalizedNumberGenerator(BaseModel):
"""
Generates random numbers based on a normal distribution, constrained by a numeric interval.
Attributes:
mean (float): The mean of the normal distribution.
number_bounds (NumberInterval): The bounds within which generated numbers must fall.
standard_deviation (float): The standard deviation of the normal distribution.
"""
mean: float
number_bounds: NumberInterval = NumberInterval.create_unbounded_interval()
standard_deviation: float = Field(gt=0)
_logger = logging.getLogger(__name__)
def model_post_init(self, context: any):
"""
Validates that the mean lies within the specified bounds.
Raises:
ValueError: If the mean is outside the number bounds.
"""
if self.mean not in self.number_bounds:
raise ValueError("Mean must be within the number bounds")
def _generate_random_normal_distribution_number(self) -> int:
"""
Generates a random number based on a normal distribution and rounds it to an integer.
"""
return round(np.random.normal(self.mean, self.standard_deviation))
[docs]
def generate_bounded_number(self) -> int:
"""
Generates a random number that falls within the specified bounds.
"""
if self.number_bounds.range < self.standard_deviation:
self._logge.warning(
"This calculation might take a long time due to the small range of the number bounds."
)
number = self._generate_random_normal_distribution_number()
repetitions = 0
while number not in self.number_bounds:
repetitions += 1
number = self._generate_random_normal_distribution_number()
if repetitions > 100:
self._logger.warning(
f"Number generation took {repetitions} repetitions; this could lead to performance problems."
)
return number
[docs]
class NumberIntervalGenerator(BaseModel):
"""
Generates a random numeric interval based on a normal distribution.
difference between lower and upper bound is calculated on a logarithmic scale so that larger values get higher difference:
when log_base = 2 and factor=5
text_length=8 -> log(8, 2) * 5 = 3 * 5 = 15
text_length=256 -> log(256, 2) * 5 = 8 * 5 = 40
Attributes:
mean (float): The mean of the normal distribution.
standard_deviation (float): The standard deviation of the normal distribution.
upper_bound_difference_log_factor (float): Factor used to determine the upper bound relative to the lower bound.
min_upper_bound_log_base (float): Base for the logarithmic calculation of the upper bound.
lower_number_bounds (NumberInterval): Bounds for generating the lower number.
lower_number_generator (NormalizedNumberGenerator): Generator for the lower bound value.
"""
mean: float
standard_deviation: float = Field(gt=0)
upper_bound_difference_log_factor: float = Field(default=5, ge=1)
min_upper_bound_log_base: float = Field(default=2, gt=1)
lower_number_bounds: NumberInterval = (
NumberInterval.create_positive_unbounded_interval()
)
lower_number_generator: NormalizedNumberGenerator = None
[docs]
def model_post_init(self, __context: any) -> None:
"""
Initializes the lower number generator if it is not provided.
"""
if self.lower_number_generator is None:
self.lower_number_generator: NormalizedNumberGenerator = (
NormalizedNumberGenerator(
number_bounds=self.lower_number_bounds,
mean=self.mean,
standard_deviation=self.standard_deviation,
)
)
def _generate_upper_bound(self, lower_bound: int) -> int:
"""
Calculates the upper bound based on the lower bound using logarithmic scaling.
"""
lower_bound_log = math.log(
max(float(lower_bound), self.min_upper_bound_log_base),
self.min_upper_bound_log_base,
)
upper_bound_total_factor = (
lower_bound_log * self.upper_bound_difference_log_factor
)
return round(lower_bound + upper_bound_total_factor)
[docs]
def generate_interval(self) -> NumberInterval:
"""
Generates a random numeric interval consisting of a lower and upper bound.
Returns:
NumberInterval: A randomly generated numeric interval.
"""
lower_bound = self.lower_number_generator.generate_bounded_number()
upper_bound = self._generate_upper_bound(lower_bound)
return NumberInterval(lower_bound=lower_bound, upper_bound=upper_bound)