Spaces:

EdwardSamuel13
/

business-analytics-dashboard

Running

File size: 5,439 Bytes

8f69dec

import pandas as pd
import numpy as np
import json
import random
from faker import Faker
from datetime import datetime, timedelta
import os

fake = Faker()
Faker.seed(42)
np.random.seed(42)

DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
os.makedirs(DATA_DIR, exist_ok=True)

def generate_sales_data(num_records=1000):
    print("Generating Sales Data...")
    products = {
        'Laptop': {'category': 'Electronics', 'price_range': (800, 2500)},
        'Smartphone': {'category': 'Electronics', 'price_range': (400, 1200)},
        'Headphones': {'category': 'Accessories', 'price_range': (50, 300)},
        'Monitor': {'category': 'Electronics', 'price_range': (150, 600)},
        'Mouse': {'category': 'Accessories', 'price_range': (20, 100)},
        'Keyboard': {'category': 'Accessories', 'price_range': (30, 150)},
        'Desk Chair': {'category': 'Furniture', 'price_range': (100, 500)},
        'Coffee Maker': {'category': 'Appliances', 'price_range': (40, 200)}
    }
    
    data = []
    start_date = datetime.now() - timedelta(days=365)
    
    for _ in range(num_records):
        product_name = random.choice(list(products.keys()))
        info = products[product_name]
        
        date = start_date + timedelta(days=random.randint(0, 365), hours=random.randint(0, 23))
        quantity = random.randint(1, 5)
        price_unit = round(random.uniform(*info['price_range']), 2)
        total_price = quantity * price_unit
        
        data.append({
            'TransactionID': fake.uuid4(),
            'Date': date.strftime('%Y-%m-%d %H:%M:%S'),
            'CustomerID': fake.random_int(min=1001, max=1200),
            'Product': product_name,
            'Category': info['category'],
            'Quantity': quantity,
            'UnitPrice': price_unit,
            'TotalPrice': round(total_price, 2),
            'Region': random.choice(['North', 'South', 'East', 'West']),
            'PaymentMethod': random.choice(['Credit Card', 'PayPal', 'Debit Card', 'Bank Transfer'])
        })
        
    df = pd.DataFrame(data)
    df.to_csv(os.path.join(DATA_DIR, 'sales_data.csv'), index=False)
    print(f"Saved {num_records} sales records.")

def generate_web_logs(num_records=500):
    print("Generating Web Logs...")
    logs = []
    start_date = datetime.now() - timedelta(days=7)
    
    endpoints = ['/home', '/products', '/cart', '/checkout', '/login', '/support', '/search']
    status_codes = [200, 200, 200, 201, 302, 404, 500]
    
    for _ in range(num_records):
        timestamp = start_date + timedelta(minutes=random.randint(0, 7*24*60))
        
        log = {
            'timestamp': timestamp.isoformat(),
            'ip_address': fake.ipv4(),
            'request_method': random.choice(['GET', 'POST']),
            'endpoint': random.choice(endpoints),
            'status_code': random.choices(status_codes, weights=[60, 10, 5, 5, 10, 8, 2])[0],
            'user_agent': fake.user_agent(),
            'response_time_ms': random.randint(20, 1500)
        }
        logs.append(log)
        
    with open(os.path.join(DATA_DIR, 'web_logs.json'), 'w') as f:
        json.dump(logs, f, indent=4)
    print(f"Saved {num_records} web logs.")

def generate_reviews(num_records=300):
    print("Generating Customer Reviews...")
    reviews = []
    
    # Simple templates for negative/positive reviews
    positive_templates = [
        "I love this {product}! It works great.",
        "Excellent quality, highly recommended.",
        "Good value for money. The {product} exceeded my expectations.",
        "Fast shipping and great service.",
        "The {product} is amazing, 5 stars!"
    ]
    
    negative_templates = [
        "Terrible experience with this {product}.",
        "Broken on arrival. Very disappointed.",
        "Not worth the money. Analyzing the {product} shows poor build quality.",
        "Customer service was unhelpful.",
        "The {product} stopped working after two days."
    ]
    
    products = ['Laptop', 'Smartphone', 'Headphones', 'Monitor', 'Mouse', 'Keyboard', 'Desk Chair', 'Coffee Maker']
    
    for _ in range(num_records):
        product = random.choice(products)
        sentiment = random.choice(['positive', 'negative', 'neutral'])
        
        if sentiment == 'positive':
            rating = random.randint(4, 5)
            text = random.choice(positive_templates).format(product=product)
        elif sentiment == 'negative':
            rating = random.randint(1, 2)
            text = random.choice(negative_templates).format(product=product)
        else:
            rating = 3
            text = f"The {product} is okay, but could be better."
            
        reviews.append({
            'ReviewID': fake.uuid4(),
            'CustomerID': fake.random_int(min=1001, max=1200),
            'Product': product,
            'Rating': rating,
            'ReviewText': text,
            'Date': fake.date_this_year().strftime('%Y-%m-%d')
        })
        
    df = pd.DataFrame(reviews)
    df.to_csv(os.path.join(DATA_DIR, 'customer_reviews.csv'), index=False)
    print(f"Saved {num_records} reviews.")

if __name__ == "__main__":
    generate_sales_data(2000)
    generate_web_logs(1000)
    generate_reviews(500)