Spaces:
No application file
No application file
File size: 1,815 Bytes
e77665f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
def generate_data(num_rows, filename):
regions = ["North", "South", "East", "West"]
products = ["Product_A", "Product_B", "Product_C"]
categories = ["Electronics", "Clothing", "Home"]
segments = ["Premium", "Standard"]
start_date = datetime(2024, 1, 1)
data = []
for i in range(num_rows):
date = start_date + timedelta(days=i % 90)
region = random.choice(regions)
product = random.choice(products)
category = random.choice(categories)
segment = random.choice(segments)
base_orders = random.randint(50, 150)
# 🔥 Inject anomaly
if date.day in [10, 11] and product == "Product_A" and region == "North":
orders = int(base_orders * 0.6) # drop
refunds = int(base_orders * 0.2) # spike
else:
orders = base_orders
refunds = random.randint(2, 8)
price = random.randint(80, 120)
revenue = orders * price - refunds * price
data.append([
date.strftime("%Y-%m-%d"),
region,
product,
category,
orders,
revenue,
refunds,
price,
segment
])
df = pd.DataFrame(data, columns=[
"date", "region", "product", "category",
"orders", "revenue", "refunds", "price", "customer_segment"
])
df.to_csv(filename, index=False)
print(f"Generated {filename}")
if __name__ == "__main__":
generate_data(300, "data/ai-operations-copilot-data/data_small.csv")
generate_data(10000, "data/ai-operations-copilot-data/data_medium.csv")
generate_data(1000000, "data/ai-operations-copilot-data/data_large.csv")
|