File size: 5,688 Bytes
f8f42e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
"""
Numeric Data Generator
Generates various types of numeric data including integers, floats, percentages, etc.
"""
import random
import numpy as np
from typing import Any, Dict, List, Optional, Union
from .base_generator import BaseGenerator
class NumericGenerator(BaseGenerator):
"""Generator for numeric data types."""
def __init__(self, seed: Optional[int] = None):
super().__init__(seed)
self.numeric_types = {
'integer': self._generate_integer,
'float': self._generate_float,
'percentage': self._generate_percentage,
'currency': self._generate_currency,
'id': self._generate_id,
'transaction_amount': self._generate_transaction_amount,
'salary': self._generate_salary,
'age': self._generate_age,
'temperature': self._generate_temperature,
'humidity': self._generate_humidity,
'latitude': self._generate_latitude,
'longitude': self._generate_longitude,
'rating': self._generate_rating,
'score': self._generate_score
}
def generate(self, count: int, numeric_type: str = 'integer', **kwargs) -> List[Union[int, float]]:
"""Generate numeric data of specified type."""
if numeric_type not in self.numeric_types:
raise ValueError(f"Unknown numeric type: {numeric_type}")
generator_func = self.numeric_types[numeric_type]
data = []
for _ in range(count):
try:
value = generator_func(**kwargs)
data.append(value)
except Exception as e:
# Fallback to basic integer generation
data.append(random.randint(1, 100))
# Apply constraints
data = self.apply_constraints(data, kwargs)
# Apply outliers if specified
if 'outlier_percentage' in kwargs:
data = self.introduce_outliers(data, kwargs['outlier_percentage'])
return data
def _generate_integer(self, min_val: int = 0, max_val: int = 100, **kwargs) -> int:
"""Generate a random integer within range."""
return random.randint(min_val, max_val)
def _generate_float(self, min_val: float = 0.0, max_val: float = 100.0,
decimal_places: int = 2, **kwargs) -> float:
"""Generate a random float within range."""
value = random.uniform(min_val, max_val)
return round(value, decimal_places)
def _generate_percentage(self, min_val: float = 0.0, max_val: float = 100.0, **kwargs) -> float:
"""Generate a percentage value."""
return round(random.uniform(min_val, max_val), 2)
def _generate_currency(self, min_val: float = 0.0, max_val: float = 10000.0, **kwargs) -> float:
"""Generate a currency amount."""
return round(random.uniform(min_val, max_val), 2)
def _generate_id(self, prefix: str = '', min_val: int = 1, max_val: int = 999999, **kwargs) -> int:
"""Generate a numeric ID."""
return random.randint(min_val, max_val)
def _generate_transaction_amount(self, min_val: float = 0.01, max_val: float = 10000.0, **kwargs) -> float:
"""Generate a transaction amount."""
# Use log-normal distribution for more realistic transaction amounts
mu = np.log(100) # Mean of log
sigma = 1.0 # Standard deviation of log
value = np.random.lognormal(mu, sigma)
return round(min(max(value, min_val), max_val), 2)
def _generate_salary(self, min_val: float = 30000.0, max_val: float = 200000.0, **kwargs) -> float:
"""Generate a salary amount."""
# Use normal distribution for salaries
mean = (min_val + max_val) / 2
std = (max_val - min_val) / 6
value = np.random.normal(mean, std)
return round(max(min(value, max_val), min_val), 2)
def _generate_age(self, min_val: int = 18, max_val: int = 80, **kwargs) -> int:
"""Generate an age value."""
# Use normal distribution centered around 35
mean = 35
std = 15
value = int(np.random.normal(mean, std))
return max(min(value, max_val), min_val)
def _generate_temperature(self, min_val: float = -10.0, max_val: float = 40.0, **kwargs) -> float:
"""Generate a temperature value."""
return round(random.uniform(min_val, max_val), 1)
def _generate_humidity(self, min_val: float = 0.0, max_val: float = 100.0, **kwargs) -> float:
"""Generate a humidity percentage."""
return round(random.uniform(min_val, max_val), 1)
def _generate_latitude(self, min_val: float = -90.0, max_val: float = 90.0, **kwargs) -> float:
"""Generate a latitude value."""
return round(random.uniform(min_val, max_val), 6)
def _generate_longitude(self, min_val: float = -180.0, max_val: float = 180.0, **kwargs) -> float:
"""Generate a longitude value."""
return round(random.uniform(min_val, max_val), 6)
def _generate_rating(self, min_val: float = 1.0, max_val: float = 5.0, **kwargs) -> float:
"""Generate a rating value."""
return round(random.uniform(min_val, max_val), 1)
def _generate_score(self, min_val: float = 0.0, max_val: float = 100.0, **kwargs) -> float:
"""Generate a score value."""
# Use normal distribution for scores
mean = (min_val + max_val) / 2
std = (max_val - min_val) / 6
value = np.random.normal(mean, std)
return round(max(min(value, max_val), min_val), 1)
|