farms / create.py
poemsforaphrodite's picture
Upload folder using huggingface_hub
8dbb260 verified
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
def generate_synthetic_data(num_farms=10, days=365):
np.random.seed(42) # For reproducibility
# Generate farm IDs
farm_ids = [f"Farm_{i:03d}" for i in range(1, num_farms + 1)]
# Generate dates
end_date = datetime.now().date()
start_date = end_date - timedelta(days=days-1)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
data = []
for farm_id in farm_ids:
for date in date_range:
# Generate feed composition data
protein_content = np.random.uniform(14, 18) # %
fiber_content = np.random.uniform(17, 23) # %
energy_content = np.random.uniform(1.5, 1.8) # Mcal/kg
# Generate cattle health indicators
body_condition_score = np.random.uniform(2.5, 4.5)
somatic_cell_count = np.random.lognormal(mean=5, sigma=0.5)
# Generate environmental conditions
temperature = np.random.normal(15, 5) # °C
humidity = np.random.uniform(40, 80) # %
# Generate milk production
base_production = np.random.uniform(20, 35) # Liters per cow
num_cows = np.random.randint(50, 500)
# Add some seasonal variation
seasonal_factor = 1 + 0.1 * np.sin(2 * np.pi * date.dayofyear / 365)
# Calculate total milk production with some randomness
milk_production = base_production * num_cows * seasonal_factor * np.random.uniform(0.9, 1.1)
data.append({
'farm_id': farm_id,
'date': date,
'protein_content': protein_content,
'fiber_content': fiber_content,
'energy_content': energy_content,
'body_condition_score': body_condition_score,
'somatic_cell_count': somatic_cell_count,
'temperature': temperature,
'humidity': humidity,
'num_cows': num_cows,
'milk_production': milk_production
})
df = pd.DataFrame(data)
return df
# Generate the synthetic data
synthetic_data = generate_synthetic_data(num_farms=10, days=365)
# Display the first few rows and basic statistics
print(synthetic_data.head())
print(synthetic_data.describe())
# Save the data to a CSV file
synthetic_data.to_csv('dairy_farm_synthetic_data.csv', index=False)
print("Data saved to 'dairy_farm_synthetic_data.csv'")