synthetic-data-generator / generators /numeric_generator.py
SherinJosephRoy's picture
Upload generators/numeric_generator.py with huggingface_hub
f8f42e3 verified
"""
Numeric Data Generator
Generates various types of numeric data including integers, floats, percentages, etc.
"""
import random
import numpy as np
from typing import Any, Dict, List, Optional, Union
from .base_generator import BaseGenerator
class NumericGenerator(BaseGenerator):
"""Generator for numeric data types."""
def __init__(self, seed: Optional[int] = None):
super().__init__(seed)
self.numeric_types = {
'integer': self._generate_integer,
'float': self._generate_float,
'percentage': self._generate_percentage,
'currency': self._generate_currency,
'id': self._generate_id,
'transaction_amount': self._generate_transaction_amount,
'salary': self._generate_salary,
'age': self._generate_age,
'temperature': self._generate_temperature,
'humidity': self._generate_humidity,
'latitude': self._generate_latitude,
'longitude': self._generate_longitude,
'rating': self._generate_rating,
'score': self._generate_score
}
def generate(self, count: int, numeric_type: str = 'integer', **kwargs) -> List[Union[int, float]]:
"""Generate numeric data of specified type."""
if numeric_type not in self.numeric_types:
raise ValueError(f"Unknown numeric type: {numeric_type}")
generator_func = self.numeric_types[numeric_type]
data = []
for _ in range(count):
try:
value = generator_func(**kwargs)
data.append(value)
except Exception as e:
# Fallback to basic integer generation
data.append(random.randint(1, 100))
# Apply constraints
data = self.apply_constraints(data, kwargs)
# Apply outliers if specified
if 'outlier_percentage' in kwargs:
data = self.introduce_outliers(data, kwargs['outlier_percentage'])
return data
def _generate_integer(self, min_val: int = 0, max_val: int = 100, **kwargs) -> int:
"""Generate a random integer within range."""
return random.randint(min_val, max_val)
def _generate_float(self, min_val: float = 0.0, max_val: float = 100.0,
decimal_places: int = 2, **kwargs) -> float:
"""Generate a random float within range."""
value = random.uniform(min_val, max_val)
return round(value, decimal_places)
def _generate_percentage(self, min_val: float = 0.0, max_val: float = 100.0, **kwargs) -> float:
"""Generate a percentage value."""
return round(random.uniform(min_val, max_val), 2)
def _generate_currency(self, min_val: float = 0.0, max_val: float = 10000.0, **kwargs) -> float:
"""Generate a currency amount."""
return round(random.uniform(min_val, max_val), 2)
def _generate_id(self, prefix: str = '', min_val: int = 1, max_val: int = 999999, **kwargs) -> int:
"""Generate a numeric ID."""
return random.randint(min_val, max_val)
def _generate_transaction_amount(self, min_val: float = 0.01, max_val: float = 10000.0, **kwargs) -> float:
"""Generate a transaction amount."""
# Use log-normal distribution for more realistic transaction amounts
mu = np.log(100) # Mean of log
sigma = 1.0 # Standard deviation of log
value = np.random.lognormal(mu, sigma)
return round(min(max(value, min_val), max_val), 2)
def _generate_salary(self, min_val: float = 30000.0, max_val: float = 200000.0, **kwargs) -> float:
"""Generate a salary amount."""
# Use normal distribution for salaries
mean = (min_val + max_val) / 2
std = (max_val - min_val) / 6
value = np.random.normal(mean, std)
return round(max(min(value, max_val), min_val), 2)
def _generate_age(self, min_val: int = 18, max_val: int = 80, **kwargs) -> int:
"""Generate an age value."""
# Use normal distribution centered around 35
mean = 35
std = 15
value = int(np.random.normal(mean, std))
return max(min(value, max_val), min_val)
def _generate_temperature(self, min_val: float = -10.0, max_val: float = 40.0, **kwargs) -> float:
"""Generate a temperature value."""
return round(random.uniform(min_val, max_val), 1)
def _generate_humidity(self, min_val: float = 0.0, max_val: float = 100.0, **kwargs) -> float:
"""Generate a humidity percentage."""
return round(random.uniform(min_val, max_val), 1)
def _generate_latitude(self, min_val: float = -90.0, max_val: float = 90.0, **kwargs) -> float:
"""Generate a latitude value."""
return round(random.uniform(min_val, max_val), 6)
def _generate_longitude(self, min_val: float = -180.0, max_val: float = 180.0, **kwargs) -> float:
"""Generate a longitude value."""
return round(random.uniform(min_val, max_val), 6)
def _generate_rating(self, min_val: float = 1.0, max_val: float = 5.0, **kwargs) -> float:
"""Generate a rating value."""
return round(random.uniform(min_val, max_val), 1)
def _generate_score(self, min_val: float = 0.0, max_val: float = 100.0, **kwargs) -> float:
"""Generate a score value."""
# Use normal distribution for scores
mean = (min_val + max_val) / 2
std = (max_val - min_val) / 6
value = np.random.normal(mean, std)
return round(max(min(value, max_val), min_val), 1)