Spaces:
Paused
Paused
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| def generate_synthetic_data(num_farms=10, days=365): | |
| np.random.seed(42) # For reproducibility | |
| # Generate farm IDs | |
| farm_ids = [f"Farm_{i:03d}" for i in range(1, num_farms + 1)] | |
| # Generate dates | |
| end_date = datetime.now().date() | |
| start_date = end_date - timedelta(days=days-1) | |
| date_range = pd.date_range(start=start_date, end=end_date, freq='D') | |
| data = [] | |
| for farm_id in farm_ids: | |
| for date in date_range: | |
| # Generate feed composition data | |
| protein_content = np.random.uniform(14, 18) # % | |
| fiber_content = np.random.uniform(17, 23) # % | |
| energy_content = np.random.uniform(1.5, 1.8) # Mcal/kg | |
| # Generate cattle health indicators | |
| body_condition_score = np.random.uniform(2.5, 4.5) | |
| somatic_cell_count = np.random.lognormal(mean=5, sigma=0.5) | |
| # Generate environmental conditions | |
| temperature = np.random.normal(15, 5) # °C | |
| humidity = np.random.uniform(40, 80) # % | |
| # Generate milk production | |
| base_production = np.random.uniform(20, 35) # Liters per cow | |
| num_cows = np.random.randint(50, 500) | |
| # Add some seasonal variation | |
| seasonal_factor = 1 + 0.1 * np.sin(2 * np.pi * date.dayofyear / 365) | |
| # Calculate total milk production with some randomness | |
| milk_production = base_production * num_cows * seasonal_factor * np.random.uniform(0.9, 1.1) | |
| data.append({ | |
| 'farm_id': farm_id, | |
| 'date': date, | |
| 'protein_content': protein_content, | |
| 'fiber_content': fiber_content, | |
| 'energy_content': energy_content, | |
| 'body_condition_score': body_condition_score, | |
| 'somatic_cell_count': somatic_cell_count, | |
| 'temperature': temperature, | |
| 'humidity': humidity, | |
| 'num_cows': num_cows, | |
| 'milk_production': milk_production | |
| }) | |
| df = pd.DataFrame(data) | |
| return df | |
| # Generate the synthetic data | |
| synthetic_data = generate_synthetic_data(num_farms=10, days=365) | |
| # Display the first few rows and basic statistics | |
| print(synthetic_data.head()) | |
| print(synthetic_data.describe()) | |
| # Save the data to a CSV file | |
| synthetic_data.to_csv('dairy_farm_synthetic_data.csv', index=False) | |
| print("Data saved to 'dairy_farm_synthetic_data.csv'") |