EdwardSamuel13's picture
Upload 14 files
8f69dec verified
import pandas as pd
import json
import os
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
def load_sales_data():
path = os.path.join(DATA_DIR, 'sales_data.csv')
if not os.path.exists(path):
raise FileNotFoundError(f"{path} not found.")
df = pd.read_csv(path)
df['Date'] = pd.to_datetime(df['Date'])
return df
def load_web_logs():
path = os.path.join(DATA_DIR, 'web_logs.json')
if not os.path.exists(path):
raise FileNotFoundError(f"{path} not found.")
with open(path, 'r') as f:
data = json.load(f)
df = pd.DataFrame(data)
df['timestamp'] = pd.to_datetime(df['timestamp'])
return df
def load_reviews():
path = os.path.join(DATA_DIR, 'customer_reviews.csv')
if not os.path.exists(path):
raise FileNotFoundError(f"{path} not found.")
df = pd.read_csv(path)
df['Date'] = pd.to_datetime(df['Date'])
return df
def get_integrated_data():
"""
Simulates integration by merging Sales and Reviews on Product?
Or mostly just providing a unified access point.
Returns a dictionary of dataframes.
"""
sales = load_sales_data()
logs = load_web_logs()
reviews = load_reviews()
return {
'sales': sales,
'logs': logs,
'reviews': reviews
}
def clean_sales_data(df):
# Example cleaning: Remove transactions with 0 or negative quantity/price (not expected in synthetic but good for "Real" scenario)
df = df[(df['Quantity'] > 0) & (df['UnitPrice'] > 0)]
return df