import pandas as pd import numpy as np from datetime import datetime, timedelta def generate_synthetic_data(num_farms=10, days=365): np.random.seed(42) # For reproducibility # Generate farm IDs farm_ids = [f"Farm_{i:03d}" for i in range(1, num_farms + 1)] # Generate dates end_date = datetime.now().date() start_date = end_date - timedelta(days=days-1) date_range = pd.date_range(start=start_date, end=end_date, freq='D') data = [] for farm_id in farm_ids: for date in date_range: # Generate feed composition data protein_content = np.random.uniform(14, 18) # % fiber_content = np.random.uniform(17, 23) # % energy_content = np.random.uniform(1.5, 1.8) # Mcal/kg # Generate cattle health indicators body_condition_score = np.random.uniform(2.5, 4.5) somatic_cell_count = np.random.lognormal(mean=5, sigma=0.5) # Generate environmental conditions temperature = np.random.normal(15, 5) # °C humidity = np.random.uniform(40, 80) # % # Generate milk production base_production = np.random.uniform(20, 35) # Liters per cow num_cows = np.random.randint(50, 500) # Add some seasonal variation seasonal_factor = 1 + 0.1 * np.sin(2 * np.pi * date.dayofyear / 365) # Calculate total milk production with some randomness milk_production = base_production * num_cows * seasonal_factor * np.random.uniform(0.9, 1.1) data.append({ 'farm_id': farm_id, 'date': date, 'protein_content': protein_content, 'fiber_content': fiber_content, 'energy_content': energy_content, 'body_condition_score': body_condition_score, 'somatic_cell_count': somatic_cell_count, 'temperature': temperature, 'humidity': humidity, 'num_cows': num_cows, 'milk_production': milk_production }) df = pd.DataFrame(data) return df # Generate the synthetic data synthetic_data = generate_synthetic_data(num_farms=10, days=365) # Display the first few rows and basic statistics print(synthetic_data.head()) print(synthetic_data.describe()) # Save the data to a CSV file synthetic_data.to_csv('dairy_farm_synthetic_data.csv', index=False) print("Data saved to 'dairy_farm_synthetic_data.csv'")