import pandas as pd import numpy as np import gradio as gr import plotly.express as px import plotly.graph_objects as go from sklearn.ensemble import IsolationForest from datetime import datetime import nltk from nltk.tokenize import word_tokenize # Download required NLTK data nltk.download('punkt') nltk.download('stopwords') nltk.download('averaged_perceptron_tagger') class AugmentedAnalytics: def __init__(self): self.df = None self.date_column = None self.numeric_columns = [] def load_data(self, file): """Load and preprocess the CSV data""" try: # Read the CSV file self.df = pd.read_csv(file.name) # Reset columns self.numeric_columns = [] self.date_column = None # Identify date and numeric columns for col in self.df.columns: if self.df[col].dtype in ['float64', 'int64']: self.numeric_columns.append(col) elif self.df[col].dtype == 'object': try: pd.to_datetime(self.df[col]) self.date_column = col self.df[col] = pd.to_datetime(self.df[col]) except: continue # Handle missing values self.df = self.df.fillna(method='ffill') # Generate summary and visualization sales_summary = self.get_sales_summary() sales_viz = self.create_sales_overview() status = f"Data loaded successfully! Found {len(self.numeric_columns)} numeric columns and {self.date_column if self.date_column else 'no'} date column." return sales_summary, sales_viz, status except Exception as e: return ( "Error in data loading. Please check your CSV file.", None, f"Error: {str(e)}" ) def get_sales_summary(self): """Generate a summary of sales metrics""" try: if 'sales' not in self.df.columns: return "No sales data found in the dataset" summary = f"""Sales Summary: - Total Sales: {self.df['sales'].sum():,.2f} - Average Daily Sales: {self.df['sales'].mean():,.2f} - Highest Sales Day: {self.df['sales'].max():,.2f} - Lowest Sales Day: {self.df['sales'].min():,.2f} - Total Revenue: ${self.df['revenue'].sum():,.2f} - Average Profit Margin: {((self.df['revenue'] - self.df['costs'])/self.df['revenue']).mean()*100:.1f}%""" return summary except Exception as e: return f"Error generating summary: {str(e)}" def create_sales_overview(self): """Create an overview visualization of sales trends""" try: if self.df is None or len(self.df) == 0: return None fig = go.Figure() # Add sales line if exists if 'sales' in self.df.columns: fig.add_trace(go.Scatter( x=self.df[self.date_column] if self.date_column else self.df.index, y=self.df['sales'], name='Sales', line=dict(color='blue') )) # Add revenue line if exists if 'revenue' in self.df.columns: fig.add_trace(go.Scatter( x=self.df[self.date_column] if self.date_column else self.df.index, y=self.df['revenue'], name='Revenue', line=dict(color='green') )) # Add moving average if sales exists if 'sales' in self.df.columns: fig.add_trace(go.Scatter( x=self.df[self.date_column] if self.date_column else self.df.index, y=self.df['sales'].rolling(7).mean(), name='7-day Moving Average', line=dict(color='red', dash='dash') )) fig.update_layout( title='Sales and Revenue Overview', xaxis_title='Date', yaxis_title='Amount', hovermode='x unified' ) return fig except Exception as e: return None def answer_sales_query(self, query): """Process natural language queries about sales""" try: if self.df is None: return "Please load data first." query = query.lower() # Parse time period from query time_period = 'all' if 'today' in query: time_period = 'today' elif 'week' in query: time_period = 'week' elif 'month' in query: time_period = 'month' elif 'year' in query: time_period = 'year' # Parse metric from query metric = 'sales' if 'revenue' in query: metric = 'revenue' elif 'profit' in query: metric = 'profit' elif 'cost' in query: metric = 'costs' if metric not in self.df.columns: return f"No {metric} data found in the dataset" # Calculate the requested value if time_period == 'today': value = self.df[metric].iloc[-1] elif time_period == 'week': value = self.df[metric].tail(7).mean() elif time_period == 'month': value = self.df[metric].tail(30).mean() elif time_period == 'year': value = self.df[metric].mean() else: value = self.df[metric].sum() return f"{time_period.capitalize()} {metric}: {value:,.2f}" except Exception as e: return f"Error processing query: {str(e)}" def create_gradio_interface(): """Create the Gradio interface""" analytics = AugmentedAnalytics() with gr.Blocks() as interface: gr.Markdown("# Augmented Analytics Dashboard") with gr.Row(): file_input = gr.File(label="Upload CSV File") load_status = gr.Textbox(label="Status", interactive=False) with gr.Row(): sales_summary = gr.Textbox( label="Sales Summary", lines=8, interactive=False ) with gr.Row(): query_input = gr.Textbox( label="Ask about sales (e.g., 'How much sales this week?' or 'Show monthly revenue')", placeholder="Type your question here...", interactive=True ) query_output = gr.Textbox(label="Answer", interactive=False) with gr.Row(): output_plot = gr.Plot(label="Sales Visualization") def process_query(query, file): try: if analytics.df is None and file is not None: analytics.load_data(file) return analytics.answer_sales_query(query) except Exception as e: return f"Error: {str(e)}" def load_data_callback(file): if file is None: return "Please upload a file.", "", None return analytics.load_data(file) # Set up event handlers file_input.change( load_data_callback, inputs=[file_input], outputs=[sales_summary, output_plot, load_status] ) query_input.change( process_query, inputs=[query_input, file_input], outputs=[query_output] ) return interface # Launch the interface if __name__ == "__main__": interface = create_gradio_interface() interface.launch(share=True)