Spaces:
Sleeping
Sleeping
| # generate the 100K record CSV file | |
| # | |
| import polars as pl | |
| import numpy as np | |
| from datetime import datetime, timedelta | |
| def generate(nrows: int, filename: str): | |
| names = np.asarray( | |
| [ | |
| "Laptop", | |
| "Smartphone", | |
| "Desk", | |
| "Chair", | |
| "Monitor", | |
| "Printer", | |
| "Paper", | |
| "Pen", | |
| "Notebook", | |
| "Coffee Maker", | |
| "Cabinet", | |
| "Plastic Cups", | |
| ] | |
| ) | |
| categories = np.asarray( | |
| [ | |
| "Electronics", | |
| "Electronics", | |
| "Office", | |
| "Office", | |
| "Electronics", | |
| "Electronics", | |
| "Stationery", | |
| "Stationery", | |
| "Stationery", | |
| "Electronics", | |
| "Office", | |
| "Sundry", | |
| ] | |
| ) | |
| product_id = np.random.randint(len(names), size=nrows) | |
| quantity = np.random.randint(1, 11, size=nrows) | |
| price = np.random.randint(199, 10000, size=nrows) / 100 | |
| # Generate random dates between 2010-01-01 and 2023-12-31 | |
| start_date = datetime(2010, 1, 1) | |
| end_date = datetime(2023, 12, 31) | |
| date_range = (end_date - start_date).days | |
| # Create random dates as np.array and convert to string format | |
| order_dates = np.array([(start_date + timedelta(days=np.random.randint(0, date_range))).strftime('%Y-%m-%d') for _ in range(nrows)]) | |
| # Define columns | |
| columns = { | |
| "order_id": np.arange(nrows), | |
| "order_date": order_dates, | |
| "customer_id": np.random.randint(100, 1000, size=nrows), | |
| "customer_name": [f"Customer_{i}" for i in np.random.randint(2**15, size=nrows)], | |
| "product_id": product_id + 200, | |
| "product_names": names[product_id], | |
| "categories": categories[product_id], | |
| "quantity": quantity, | |
| "price": price, | |
| "total": price * quantity, | |
| } | |
| # Create Polars DataFrame and write to CSV with explicit delimiter | |
| df = pl.DataFrame(columns) | |
| df.write_csv(filename, separator=',',include_header=True) # Ensure comma is used as the delimiter | |
| # Generate 100,000 rows of data with random order_date and save to CSV | |
| generate(100_000, "D:/Python_Projects/dashboard/sales_data.csv") |