|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import gradio as gr |
|
|
import plotly.express as px |
|
|
import plotly.graph_objects as go |
|
|
from sklearn.ensemble import IsolationForest |
|
|
from datetime import datetime |
|
|
import nltk |
|
|
from nltk.tokenize import word_tokenize |
|
|
|
|
|
|
|
|
nltk.download('punkt') |
|
|
nltk.download('stopwords') |
|
|
nltk.download('averaged_perceptron_tagger') |
|
|
|
|
|
class AugmentedAnalytics: |
|
|
def __init__(self): |
|
|
self.df = None |
|
|
self.date_column = None |
|
|
self.numeric_columns = [] |
|
|
|
|
|
def load_data(self, file): |
|
|
"""Load and preprocess the CSV data""" |
|
|
try: |
|
|
|
|
|
self.df = pd.read_csv(file.name) |
|
|
|
|
|
|
|
|
self.numeric_columns = [] |
|
|
self.date_column = None |
|
|
|
|
|
|
|
|
for col in self.df.columns: |
|
|
if self.df[col].dtype in ['float64', 'int64']: |
|
|
self.numeric_columns.append(col) |
|
|
elif self.df[col].dtype == 'object': |
|
|
try: |
|
|
pd.to_datetime(self.df[col]) |
|
|
self.date_column = col |
|
|
self.df[col] = pd.to_datetime(self.df[col]) |
|
|
except: |
|
|
continue |
|
|
|
|
|
|
|
|
self.df = self.df.fillna(method='ffill') |
|
|
|
|
|
|
|
|
sales_summary = self.get_sales_summary() |
|
|
sales_viz = self.create_sales_overview() |
|
|
status = f"Data loaded successfully! Found {len(self.numeric_columns)} numeric columns and {self.date_column if self.date_column else 'no'} date column." |
|
|
|
|
|
return sales_summary, sales_viz, status |
|
|
|
|
|
except Exception as e: |
|
|
return ( |
|
|
"Error in data loading. Please check your CSV file.", |
|
|
None, |
|
|
f"Error: {str(e)}" |
|
|
) |
|
|
|
|
|
def get_sales_summary(self): |
|
|
"""Generate a summary of sales metrics""" |
|
|
try: |
|
|
if 'sales' not in self.df.columns: |
|
|
return "No sales data found in the dataset" |
|
|
|
|
|
summary = f"""Sales Summary: |
|
|
- Total Sales: {self.df['sales'].sum():,.2f} |
|
|
- Average Daily Sales: {self.df['sales'].mean():,.2f} |
|
|
- Highest Sales Day: {self.df['sales'].max():,.2f} |
|
|
- Lowest Sales Day: {self.df['sales'].min():,.2f} |
|
|
- Total Revenue: ${self.df['revenue'].sum():,.2f} |
|
|
- Average Profit Margin: {((self.df['revenue'] - self.df['costs'])/self.df['revenue']).mean()*100:.1f}%""" |
|
|
return summary |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error generating summary: {str(e)}" |
|
|
|
|
|
def create_sales_overview(self): |
|
|
"""Create an overview visualization of sales trends""" |
|
|
try: |
|
|
if self.df is None or len(self.df) == 0: |
|
|
return None |
|
|
|
|
|
fig = go.Figure() |
|
|
|
|
|
|
|
|
if 'sales' in self.df.columns: |
|
|
fig.add_trace(go.Scatter( |
|
|
x=self.df[self.date_column] if self.date_column else self.df.index, |
|
|
y=self.df['sales'], |
|
|
name='Sales', |
|
|
line=dict(color='blue') |
|
|
)) |
|
|
|
|
|
|
|
|
if 'revenue' in self.df.columns: |
|
|
fig.add_trace(go.Scatter( |
|
|
x=self.df[self.date_column] if self.date_column else self.df.index, |
|
|
y=self.df['revenue'], |
|
|
name='Revenue', |
|
|
line=dict(color='green') |
|
|
)) |
|
|
|
|
|
|
|
|
if 'sales' in self.df.columns: |
|
|
fig.add_trace(go.Scatter( |
|
|
x=self.df[self.date_column] if self.date_column else self.df.index, |
|
|
y=self.df['sales'].rolling(7).mean(), |
|
|
name='7-day Moving Average', |
|
|
line=dict(color='red', dash='dash') |
|
|
)) |
|
|
|
|
|
fig.update_layout( |
|
|
title='Sales and Revenue Overview', |
|
|
xaxis_title='Date', |
|
|
yaxis_title='Amount', |
|
|
hovermode='x unified' |
|
|
) |
|
|
|
|
|
return fig |
|
|
|
|
|
except Exception as e: |
|
|
return None |
|
|
|
|
|
def answer_sales_query(self, query): |
|
|
"""Process natural language queries about sales""" |
|
|
try: |
|
|
if self.df is None: |
|
|
return "Please load data first." |
|
|
|
|
|
query = query.lower() |
|
|
|
|
|
|
|
|
time_period = 'all' |
|
|
if 'today' in query: |
|
|
time_period = 'today' |
|
|
elif 'week' in query: |
|
|
time_period = 'week' |
|
|
elif 'month' in query: |
|
|
time_period = 'month' |
|
|
elif 'year' in query: |
|
|
time_period = 'year' |
|
|
|
|
|
|
|
|
metric = 'sales' |
|
|
if 'revenue' in query: |
|
|
metric = 'revenue' |
|
|
elif 'profit' in query: |
|
|
metric = 'profit' |
|
|
elif 'cost' in query: |
|
|
metric = 'costs' |
|
|
|
|
|
if metric not in self.df.columns: |
|
|
return f"No {metric} data found in the dataset" |
|
|
|
|
|
|
|
|
if time_period == 'today': |
|
|
value = self.df[metric].iloc[-1] |
|
|
elif time_period == 'week': |
|
|
value = self.df[metric].tail(7).mean() |
|
|
elif time_period == 'month': |
|
|
value = self.df[metric].tail(30).mean() |
|
|
elif time_period == 'year': |
|
|
value = self.df[metric].mean() |
|
|
else: |
|
|
value = self.df[metric].sum() |
|
|
|
|
|
return f"{time_period.capitalize()} {metric}: {value:,.2f}" |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error processing query: {str(e)}" |
|
|
|
|
|
def create_gradio_interface(): |
|
|
"""Create the Gradio interface""" |
|
|
analytics = AugmentedAnalytics() |
|
|
|
|
|
with gr.Blocks() as interface: |
|
|
gr.Markdown("# Augmented Analytics Dashboard") |
|
|
|
|
|
with gr.Row(): |
|
|
file_input = gr.File(label="Upload CSV File") |
|
|
load_status = gr.Textbox(label="Status", interactive=False) |
|
|
|
|
|
with gr.Row(): |
|
|
sales_summary = gr.Textbox( |
|
|
label="Sales Summary", |
|
|
lines=8, |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
query_input = gr.Textbox( |
|
|
label="Ask about sales (e.g., 'How much sales this week?' or 'Show monthly revenue')", |
|
|
placeholder="Type your question here...", |
|
|
interactive=True |
|
|
) |
|
|
query_output = gr.Textbox(label="Answer", interactive=False) |
|
|
|
|
|
with gr.Row(): |
|
|
output_plot = gr.Plot(label="Sales Visualization") |
|
|
|
|
|
def process_query(query, file): |
|
|
try: |
|
|
if analytics.df is None and file is not None: |
|
|
analytics.load_data(file) |
|
|
return analytics.answer_sales_query(query) |
|
|
except Exception as e: |
|
|
return f"Error: {str(e)}" |
|
|
|
|
|
def load_data_callback(file): |
|
|
if file is None: |
|
|
return "Please upload a file.", "", None |
|
|
return analytics.load_data(file) |
|
|
|
|
|
|
|
|
file_input.change( |
|
|
load_data_callback, |
|
|
inputs=[file_input], |
|
|
outputs=[sales_summary, output_plot, load_status] |
|
|
) |
|
|
|
|
|
query_input.change( |
|
|
process_query, |
|
|
inputs=[query_input, file_input], |
|
|
outputs=[query_output] |
|
|
) |
|
|
|
|
|
return interface |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
interface = create_gradio_interface() |
|
|
interface.launch(share=True) |