Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import random | |
| from faker import Faker | |
| fake = Faker() | |
| # Fixed customers | |
| customers = [ | |
| {"customer_id": 1, "customer_name": "John", "customer_email": "john@example.com"}, | |
| {"customer_id": 2, "customer_name": "Alice", "customer_email": "alice@example.com"}, | |
| {"customer_id": 3, "customer_name": "Bob", "customer_email": "bob@example.com"}, | |
| {"customer_id": 4, "customer_name": "Mark", "customer_email": "mark@example.com"}, | |
| {"customer_id": 5, "customer_name": "Maxwell", "customer_email": "maxwell@example.com"}, | |
| ] | |
| # Product catalog | |
| products = ["Laptop", "Smartphone", "Headphones", "Tablet", "Camera", "Smartwatch", "Monitor"] | |
| # Order statuses | |
| statuses = ["In Progress", "Delivered", "Cancelled", "Returned", "Refunded"] | |
| def generate_orders(num_orders, start_order_id=1000): | |
| data = [] | |
| for i in range(num_orders): | |
| order_id = start_order_id + i | |
| order_date = fake.date_between_dates(date_start=pd.to_datetime("2025-01-01"), | |
| date_end=pd.to_datetime("2025-08-31")) | |
| customer = random.choice(customers) | |
| ordered_product = random.choice(products) | |
| quantity = random.randint(1, 5) | |
| price_per_unit = round(random.uniform(50, 2000), 2) # product price | |
| price = round(price_per_unit * quantity, 2) | |
| status = random.choice(statuses) | |
| data.append({ | |
| "order_id": order_id, | |
| "order_date": order_date, | |
| "customer_id": customer["customer_id"], | |
| "customer_name": customer["customer_name"], | |
| "customer_email": customer["customer_email"], | |
| "ordered_products": ordered_product, | |
| "order_quantity": 1, # since we keep one product per row | |
| "quantity": quantity, | |
| "price": price, | |
| "status": status, | |
| }) | |
| return pd.DataFrame(data) | |
| # Generate datasets | |
| df_small = generate_orders(100) | |
| df_large = generate_orders(10000) | |
| # Save to CSV | |
| df_small.to_csv("orders_100.csv", index=False) | |
| df_large.to_csv("orders_10000.csv", index=False) | |
| print("Generated orders_100.csv and orders_10000.csv ✅") | |