Spaces:
No application file
No application file
| import pandas as pd | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| import random | |
| def generate_data(num_rows, filename): | |
| regions = ["North", "South", "East", "West"] | |
| products = ["Product_A", "Product_B", "Product_C"] | |
| categories = ["Electronics", "Clothing", "Home"] | |
| segments = ["Premium", "Standard"] | |
| start_date = datetime(2024, 1, 1) | |
| data = [] | |
| for i in range(num_rows): | |
| date = start_date + timedelta(days=i % 90) | |
| region = random.choice(regions) | |
| product = random.choice(products) | |
| category = random.choice(categories) | |
| segment = random.choice(segments) | |
| base_orders = random.randint(50, 150) | |
| # 🔥 Inject anomaly | |
| if date.day in [10, 11] and product == "Product_A" and region == "North": | |
| orders = int(base_orders * 0.6) # drop | |
| refunds = int(base_orders * 0.2) # spike | |
| else: | |
| orders = base_orders | |
| refunds = random.randint(2, 8) | |
| price = random.randint(80, 120) | |
| revenue = orders * price - refunds * price | |
| data.append([ | |
| date.strftime("%Y-%m-%d"), | |
| region, | |
| product, | |
| category, | |
| orders, | |
| revenue, | |
| refunds, | |
| price, | |
| segment | |
| ]) | |
| df = pd.DataFrame(data, columns=[ | |
| "date", "region", "product", "category", | |
| "orders", "revenue", "refunds", "price", "customer_segment" | |
| ]) | |
| df.to_csv(filename, index=False) | |
| print(f"Generated {filename}") | |
| if __name__ == "__main__": | |
| generate_data(300, "data/ai-operations-copilot-data/data_small.csv") | |
| generate_data(10000, "data/ai-operations-copilot-data/data_medium.csv") | |
| generate_data(1000000, "data/ai-operations-copilot-data/data_large.csv") | |