Anupam202224 commited on
Commit
25951ad
·
verified ·
1 Parent(s): 057047b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -0
app.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import gradio as gr
4
+ import plotly.express as px
5
+ import plotly.graph_objects as go
6
+ from sklearn.ensemble import IsolationForest
7
+ from datetime import datetime
8
+ import nltk
9
+ from nltk.tokenize import word_tokenize
10
+
11
+ # Download required NLTK data
12
+ nltk.download('punkt')
13
+ nltk.download('stopwords')
14
+ nltk.download('averaged_perceptron_tagger')
15
+
16
+ class AugmentedAnalytics:
17
+ def __init__(self):
18
+ self.df = None
19
+ self.date_column = None
20
+ self.numeric_columns = []
21
+
22
+ def load_data(self, file):
23
+ """Load and preprocess the CSV data"""
24
+ try:
25
+ # Read the CSV file
26
+ self.df = pd.read_csv(file.name)
27
+
28
+ # Reset columns
29
+ self.numeric_columns = []
30
+ self.date_column = None
31
+
32
+ # Identify date and numeric columns
33
+ for col in self.df.columns:
34
+ if self.df[col].dtype in ['float64', 'int64']:
35
+ self.numeric_columns.append(col)
36
+ elif self.df[col].dtype == 'object':
37
+ try:
38
+ pd.to_datetime(self.df[col])
39
+ self.date_column = col
40
+ self.df[col] = pd.to_datetime(self.df[col])
41
+ except:
42
+ continue
43
+
44
+ # Handle missing values
45
+ self.df = self.df.fillna(method='ffill')
46
+
47
+ # Generate summary and visualization
48
+ sales_summary = self.get_sales_summary()
49
+ sales_viz = self.create_sales_overview()
50
+ status = f"Data loaded successfully! Found {len(self.numeric_columns)} numeric columns and {self.date_column if self.date_column else 'no'} date column."
51
+
52
+ return sales_summary, sales_viz, status
53
+
54
+ except Exception as e:
55
+ return (
56
+ "Error in data loading. Please check your CSV file.",
57
+ None,
58
+ f"Error: {str(e)}"
59
+ )
60
+
61
+ def get_sales_summary(self):
62
+ """Generate a summary of sales metrics"""
63
+ try:
64
+ if 'sales' not in self.df.columns:
65
+ return "No sales data found in the dataset"
66
+
67
+ summary = f"""Sales Summary:
68
+ - Total Sales: {self.df['sales'].sum():,.2f}
69
+ - Average Daily Sales: {self.df['sales'].mean():,.2f}
70
+ - Highest Sales Day: {self.df['sales'].max():,.2f}
71
+ - Lowest Sales Day: {self.df['sales'].min():,.2f}
72
+ - Total Revenue: ${self.df['revenue'].sum():,.2f}
73
+ - Average Profit Margin: {((self.df['revenue'] - self.df['costs'])/self.df['revenue']).mean()*100:.1f}%"""
74
+ return summary
75
+
76
+ except Exception as e:
77
+ return f"Error generating summary: {str(e)}"
78
+
79
+ def create_sales_overview(self):
80
+ """Create an overview visualization of sales trends"""
81
+ try:
82
+ if self.df is None or len(self.df) == 0:
83
+ return None
84
+
85
+ fig = go.Figure()
86
+
87
+ # Add sales line if exists
88
+ if 'sales' in self.df.columns:
89
+ fig.add_trace(go.Scatter(
90
+ x=self.df[self.date_column] if self.date_column else self.df.index,
91
+ y=self.df['sales'],
92
+ name='Sales',
93
+ line=dict(color='blue')
94
+ ))
95
+
96
+ # Add revenue line if exists
97
+ if 'revenue' in self.df.columns:
98
+ fig.add_trace(go.Scatter(
99
+ x=self.df[self.date_column] if self.date_column else self.df.index,
100
+ y=self.df['revenue'],
101
+ name='Revenue',
102
+ line=dict(color='green')
103
+ ))
104
+
105
+ # Add moving average if sales exists
106
+ if 'sales' in self.df.columns:
107
+ fig.add_trace(go.Scatter(
108
+ x=self.df[self.date_column] if self.date_column else self.df.index,
109
+ y=self.df['sales'].rolling(7).mean(),
110
+ name='7-day Moving Average',
111
+ line=dict(color='red', dash='dash')
112
+ ))
113
+
114
+ fig.update_layout(
115
+ title='Sales and Revenue Overview',
116
+ xaxis_title='Date',
117
+ yaxis_title='Amount',
118
+ hovermode='x unified'
119
+ )
120
+
121
+ return fig
122
+
123
+ except Exception as e:
124
+ return None
125
+
126
+ def answer_sales_query(self, query):
127
+ """Process natural language queries about sales"""
128
+ try:
129
+ if self.df is None:
130
+ return "Please load data first."
131
+
132
+ query = query.lower()
133
+
134
+ # Parse time period from query
135
+ time_period = 'all'
136
+ if 'today' in query:
137
+ time_period = 'today'
138
+ elif 'week' in query:
139
+ time_period = 'week'
140
+ elif 'month' in query:
141
+ time_period = 'month'
142
+ elif 'year' in query:
143
+ time_period = 'year'
144
+
145
+ # Parse metric from query
146
+ metric = 'sales'
147
+ if 'revenue' in query:
148
+ metric = 'revenue'
149
+ elif 'profit' in query:
150
+ metric = 'profit'
151
+ elif 'cost' in query:
152
+ metric = 'costs'
153
+
154
+ if metric not in self.df.columns:
155
+ return f"No {metric} data found in the dataset"
156
+
157
+ # Calculate the requested value
158
+ if time_period == 'today':
159
+ value = self.df[metric].iloc[-1]
160
+ elif time_period == 'week':
161
+ value = self.df[metric].tail(7).mean()
162
+ elif time_period == 'month':
163
+ value = self.df[metric].tail(30).mean()
164
+ elif time_period == 'year':
165
+ value = self.df[metric].mean()
166
+ else:
167
+ value = self.df[metric].sum()
168
+
169
+ return f"{time_period.capitalize()} {metric}: {value:,.2f}"
170
+
171
+ except Exception as e:
172
+ return f"Error processing query: {str(e)}"
173
+
174
+ def create_gradio_interface():
175
+ """Create the Gradio interface"""
176
+ analytics = AugmentedAnalytics()
177
+
178
+ with gr.Blocks() as interface:
179
+ gr.Markdown("# Augmented Analytics Dashboard")
180
+
181
+ with gr.Row():
182
+ file_input = gr.File(label="Upload CSV File")
183
+ load_status = gr.Textbox(label="Status", interactive=False)
184
+
185
+ with gr.Row():
186
+ sales_summary = gr.Textbox(
187
+ label="Sales Summary",
188
+ lines=8,
189
+ interactive=False
190
+ )
191
+
192
+ with gr.Row():
193
+ query_input = gr.Textbox(
194
+ label="Ask about sales (e.g., 'How much sales this week?' or 'Show monthly revenue')",
195
+ placeholder="Type your question here...",
196
+ interactive=True
197
+ )
198
+ query_output = gr.Textbox(label="Answer", interactive=False)
199
+
200
+ with gr.Row():
201
+ output_plot = gr.Plot(label="Sales Visualization")
202
+
203
+ def process_query(query, file):
204
+ try:
205
+ if analytics.df is None and file is not None:
206
+ analytics.load_data(file)
207
+ return analytics.answer_sales_query(query)
208
+ except Exception as e:
209
+ return f"Error: {str(e)}"
210
+
211
+ def load_data_callback(file):
212
+ if file is None:
213
+ return "Please upload a file.", "", None
214
+ return analytics.load_data(file)
215
+
216
+ # Set up event handlers
217
+ file_input.change(
218
+ load_data_callback,
219
+ inputs=[file_input],
220
+ outputs=[sales_summary, output_plot, load_status]
221
+ )
222
+
223
+ query_input.change(
224
+ process_query,
225
+ inputs=[query_input, file_input],
226
+ outputs=[query_output]
227
+ )
228
+
229
+ return interface
230
+
231
+ # Launch the interface
232
+ if __name__ == "__main__":
233
+ interface = create_gradio_interface()
234
+ interface.launch(share=True)