|
|
import pandas as pd
|
|
|
import json
|
|
|
import os
|
|
|
|
|
|
DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
|
|
|
|
|
|
def load_sales_data():
|
|
|
path = os.path.join(DATA_DIR, 'sales_data.csv')
|
|
|
if not os.path.exists(path):
|
|
|
raise FileNotFoundError(f"{path} not found.")
|
|
|
df = pd.read_csv(path)
|
|
|
df['Date'] = pd.to_datetime(df['Date'])
|
|
|
return df
|
|
|
|
|
|
def load_web_logs():
|
|
|
path = os.path.join(DATA_DIR, 'web_logs.json')
|
|
|
if not os.path.exists(path):
|
|
|
raise FileNotFoundError(f"{path} not found.")
|
|
|
with open(path, 'r') as f:
|
|
|
data = json.load(f)
|
|
|
df = pd.DataFrame(data)
|
|
|
df['timestamp'] = pd.to_datetime(df['timestamp'])
|
|
|
return df
|
|
|
|
|
|
def load_reviews():
|
|
|
path = os.path.join(DATA_DIR, 'customer_reviews.csv')
|
|
|
if not os.path.exists(path):
|
|
|
raise FileNotFoundError(f"{path} not found.")
|
|
|
df = pd.read_csv(path)
|
|
|
df['Date'] = pd.to_datetime(df['Date'])
|
|
|
return df
|
|
|
|
|
|
def get_integrated_data():
|
|
|
"""
|
|
|
Simulates integration by merging Sales and Reviews on Product?
|
|
|
Or mostly just providing a unified access point.
|
|
|
Returns a dictionary of dataframes.
|
|
|
"""
|
|
|
sales = load_sales_data()
|
|
|
logs = load_web_logs()
|
|
|
reviews = load_reviews()
|
|
|
|
|
|
return {
|
|
|
'sales': sales,
|
|
|
'logs': logs,
|
|
|
'reviews': reviews
|
|
|
}
|
|
|
|
|
|
def clean_sales_data(df):
|
|
|
|
|
|
df = df[(df['Quantity'] > 0) & (df['UnitPrice'] > 0)]
|
|
|
return df
|
|
|
|