entropy25 commited on
Commit
49f0473
Β·
verified Β·
1 Parent(s): 3c88859

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +247 -177
app.py CHANGED
@@ -1,213 +1,283 @@
1
- import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
- import plotly.graph_objects as go
5
  import plotly.express as px
 
6
  from datetime import datetime
7
- import io
 
8
 
9
- def process_data(file):
10
- """Process uploaded CSV file and generate comprehensive analysis"""
11
- if file is None:
12
- return "Please upload a CSV file", None, None, None, None, None
13
-
14
- try:
15
- # Read the uploaded file
16
- df = pd.read_csv(file.name, sep='\t')
17
-
18
- # Data preprocessing
19
- df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
20
- if 'original_date' in df.columns:
21
- df['original_date'] = pd.to_datetime(df['original_date'], format='%d/%m/%Y', errors='ignore')
22
-
23
- df['day_of_week'] = df['date'].dt.day_name()
24
- df['week'] = df['date'].dt.isocalendar().week
25
- df['month'] = df['date'].dt.month
26
- df['is_weekend'] = df['day_of_week'].isin(['Saturday', 'Sunday'])
27
-
28
- # Generate all analyses
29
- summary_text = generate_summary(df)
30
- overview_plot = create_overview_plot(df)
31
- material_plot = create_material_analysis(df)
32
- correlation_plot = create_correlation_analysis(df)
33
- time_analysis_plot = create_time_analysis(df)
34
- anomaly_report = detect_anomalies_report(df)
35
-
36
- return summary_text, overview_plot, material_plot, correlation_plot, time_analysis_plot, anomaly_report
37
-
38
- except Exception as e:
39
- error_msg = f"Error processing file: {str(e)}\n\nPlease ensure your CSV file has the required columns: date, weight_kg, material_type"
40
- return error_msg, None, None, None, None, None
41
 
42
  def generate_summary(df):
43
- """Generate comprehensive summary statistics"""
44
  total_production = df['weight_kg'].sum()
45
  total_items = len(df)
46
  daily_avg = df.groupby('date')['weight_kg'].sum().mean()
47
 
48
- summary = f"""# Production Data Analysis Report
49
- Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
50
-
51
- ## Dataset Overview
52
- - **Total Records**: {total_items:,}
53
- - **Date Range**: {df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}
54
- - **Production Days**: {df['date'].nunique()}
55
- - **Total Production**: {total_production:,.0f} kg
56
- - **Daily Average**: {daily_avg:,.0f} kg
57
-
58
- ## Material Type Breakdown"""
59
 
 
 
60
  for material in df['material_type'].unique():
61
  mat_data = df[df['material_type'] == material]
62
- mat_total = mat_data['weight_kg'].sum()
63
- mat_pct = mat_total / total_production * 100
64
- mat_count = len(mat_data)
65
- summary += f"\n- **{material.title()}**: {mat_total:,.0f} kg ({mat_pct:.1f}%) - {mat_count:,} records"
66
-
67
- # Shift analysis
68
- if 'shift' in df.columns:
69
- shift_data = df.groupby('shift')['weight_kg'].agg(['sum', 'mean', 'count'])
70
- summary += f"\n\n## Shift Performance"
71
- for shift in shift_data.index:
72
- summary += f"\n- **Shift {shift}**: {shift_data.loc[shift, 'sum']:,.0f} kg total, {shift_data.loc[shift, 'mean']:.1f} kg avg"
73
 
 
74
  return summary
75
 
76
- def create_overview_plot(df):
77
- """Create overall production trend plot"""
78
- daily_total = df.groupby('date')['weight_kg'].sum().reset_index()
79
-
80
- fig = px.line(daily_total, x='date', y='weight_kg',
81
- title='Daily Production Trend',
82
- labels={'weight_kg': 'Total Weight (kg)', 'date': 'Date'},
83
- template='plotly_white')
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- fig.update_layout(height=400, showlegend=False)
86
- return fig
87
 
88
- def create_material_analysis(df):
89
- """Create material type comparison plots"""
90
- daily_by_material = df.groupby(['date', 'material_type'])['weight_kg'].sum().reset_index()
91
 
92
- fig = px.line(daily_by_material, x='date', y='weight_kg', color='material_type',
93
- title='Daily Production by Material Type',
94
- labels={'weight_kg': 'Weight (kg)', 'date': 'Date'},
95
- template='plotly_white')
 
 
 
96
 
97
- fig.update_layout(height=400)
98
- return fig
99
-
100
- def create_correlation_analysis(df):
101
- """Create correlation matrix plot"""
102
- daily_by_material = df.groupby(['date', 'material_type'])['weight_kg'].sum().unstack(fill_value=0)
103
 
104
- if len(daily_by_material.columns) > 1:
105
- correlation_matrix = daily_by_material.corr()
106
-
107
- fig = px.imshow(correlation_matrix,
108
- title='Material Type Correlation Matrix',
109
- template='plotly_white',
110
- color_continuous_scale='RdBu',
111
- aspect='auto')
112
- fig.update_layout(height=400)
113
- return fig
114
- else:
115
- fig = go.Figure()
116
- fig.add_annotation(text="Only one material type - correlation analysis not applicable",
117
- xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
118
- fig.update_layout(title="Material Type Correlation Matrix", height=400)
119
- return fig
120
-
121
- def create_time_analysis(df):
122
- """Create time pattern analysis"""
123
  weekly_pattern = df.groupby(['day_of_week', 'material_type'])['weight_kg'].mean().reset_index()
124
-
125
  day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
126
  weekly_pattern['day_of_week'] = pd.Categorical(weekly_pattern['day_of_week'], categories=day_order, ordered=True)
127
  weekly_pattern = weekly_pattern.sort_values('day_of_week')
128
 
129
- fig = px.bar(weekly_pattern, x='day_of_week', y='weight_kg', color='material_type',
130
- title='Weekly Production Pattern (Average by Day)',
131
- labels={'weight_kg': 'Average Weight (kg)', 'day_of_week': 'Day of Week'},
132
- template='plotly_white')
 
 
 
 
 
 
 
 
 
133
 
134
- fig.update_layout(height=400)
135
- return fig
136
 
137
- def detect_anomalies_report(df):
138
- """Generate anomaly detection report"""
139
- def detect_outliers(data, column='weight_kg'):
140
- Q1 = data[column].quantile(0.25)
141
- Q3 = data[column].quantile(0.75)
142
- IQR = Q3 - Q1
143
- lower_bound = Q1 - 1.5 * IQR
144
- upper_bound = Q3 + 1.5 * IQR
145
- anomalies = data[(data[column] < lower_bound) | (data[column] > upper_bound)]
146
- return anomalies, lower_bound, upper_bound
147
 
148
- report = "# Anomaly Detection Report\n\n"
 
 
 
 
149
 
150
- for material in df['material_type'].unique():
151
- material_data = df[df['material_type'] == material]
152
- anomalies, lower, upper = detect_outliers(material_data)
153
-
154
- report += f"## {material.title()} Material\n"
155
- report += f"- **Normal Range**: {lower:.1f} - {upper:.1f} kg\n"
156
- report += f"- **Anomalies Detected**: {len(anomalies)}\n"
157
-
158
- if len(anomalies) > 0:
159
- dates_list = anomalies['date'].dt.strftime('%Y-%m-%d').head(10).tolist()
160
- report += f"- **Anomaly Dates**: {', '.join(dates_list)}\n"
161
- if len(anomalies) > 10:
162
- report += f" ... and {len(anomalies) - 10} more\n"
163
- report += "\n"
164
-
165
- return report
166
 
167
- # Create Gradio interface
168
- with gr.Blocks(title="Production Data Analysis", theme=gr.themes.Soft()) as demo:
169
- gr.Markdown("# 🏭 Production Data Analysis Dashboard")
170
- gr.Markdown("Upload your production data CSV file to generate comprehensive analysis reports and visualizations.")
171
-
172
- with gr.Row():
173
- file_input = gr.File(
174
- label="Upload CSV File",
175
- file_types=[".csv"],
176
- type="filepath"
177
- )
178
 
179
- analyze_btn = gr.Button("Analyze Data", variant="primary", size="lg")
180
-
181
- with gr.Row():
182
- with gr.Column(scale=1):
183
- summary_output = gr.Markdown(label="Summary Report")
184
- anomaly_output = gr.Markdown(label="Anomaly Report")
185
-
186
- with gr.Row():
187
- with gr.Column():
188
- overview_plot = gr.Plot(label="Production Overview")
189
- correlation_plot = gr.Plot(label="Correlation Analysis")
190
- with gr.Column():
191
- material_plot = gr.Plot(label="Material Analysis")
192
- time_plot = gr.Plot(label="Time Pattern Analysis")
193
-
194
- analyze_btn.click(
195
- fn=process_data,
196
- inputs=[file_input],
197
- outputs=[summary_output, overview_plot, material_plot, correlation_plot, time_plot, anomaly_output]
198
- )
199
 
200
- gr.Markdown("""
201
- ## Data Format Requirements
202
- Your CSV file should contain the following columns:
203
- - `date`: Date in MM/DD/YYYY format
204
- - `weight_kg`: Production weight in kilograms
205
- - `material_type`: Type of material (e.g., liquid, solid, waste_water)
206
- - `shift`: Shift number (optional)
207
- - `number`: Item number (optional)
208
-
209
- The file should be tab-separated (TSV format with .csv extension).
210
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  if __name__ == "__main__":
213
- demo.launch()
 
1
+ import streamlit as st
2
  import pandas as pd
3
  import numpy as np
 
4
  import plotly.express as px
5
+ import plotly.graph_objects as go
6
  from datetime import datetime
7
+ import google.generativeai as genai
8
+ import json
9
 
10
+ # Page config
11
+ st.set_page_config(
12
+ page_title="Production Data Analysis",
13
+ page_icon="🏭",
14
+ layout="wide"
15
+ )
16
+
17
+ # Initialize Gemini
18
+ @st.cache_resource
19
+ def init_gemini():
20
+ api_key = st.secrets.get("GOOGLE_API_KEY", "")
21
+ if api_key:
22
+ genai.configure(api_key=api_key)
23
+ return genai.GenerativeModel('gemini-1.5-flash')
24
+ return None
25
+
26
+ # Data processing functions
27
+ @st.cache_data
28
+ def process_data(df):
29
+ """Process and analyze production data"""
30
+ df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
31
+ df['day_of_week'] = df['date'].dt.day_name()
32
+ df['week'] = df['date'].dt.isocalendar().week
33
+ df['month'] = df['date'].dt.month
34
+ df['is_weekend'] = df['day_of_week'].isin(['Saturday', 'Sunday'])
35
+ return df
 
 
 
 
 
 
36
 
37
  def generate_summary(df):
38
+ """Generate summary statistics"""
39
  total_production = df['weight_kg'].sum()
40
  total_items = len(df)
41
  daily_avg = df.groupby('date')['weight_kg'].sum().mean()
42
 
43
+ summary = {
44
+ 'total_production': total_production,
45
+ 'total_items': total_items,
46
+ 'daily_avg': daily_avg,
47
+ 'date_range': f"{df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}",
48
+ 'production_days': df['date'].nunique()
49
+ }
 
 
 
 
50
 
51
+ # Material breakdown
52
+ material_stats = {}
53
  for material in df['material_type'].unique():
54
  mat_data = df[df['material_type'] == material]
55
+ material_stats[material] = {
56
+ 'total': mat_data['weight_kg'].sum(),
57
+ 'percentage': mat_data['weight_kg'].sum() / total_production * 100,
58
+ 'count': len(mat_data)
59
+ }
 
 
 
 
 
 
60
 
61
+ summary['materials'] = material_stats
62
  return summary
63
 
64
+ def detect_anomalies(df):
65
+ """Detect production anomalies"""
66
+ anomalies = {}
67
+ for material in df['material_type'].unique():
68
+ mat_data = df[df['material_type'] == material]
69
+ Q1 = mat_data['weight_kg'].quantile(0.25)
70
+ Q3 = mat_data['weight_kg'].quantile(0.75)
71
+ IQR = Q3 - Q1
72
+ lower_bound = Q1 - 1.5 * IQR
73
+ upper_bound = Q3 + 1.5 * IQR
74
+
75
+ outliers = mat_data[(mat_data['weight_kg'] < lower_bound) |
76
+ (mat_data['weight_kg'] > upper_bound)]
77
+
78
+ anomalies[material] = {
79
+ 'count': len(outliers),
80
+ 'normal_range': f"{lower_bound:.1f} - {upper_bound:.1f} kg",
81
+ 'dates': outliers['date'].dt.strftime('%Y-%m-%d').tolist()[:5]
82
+ }
83
 
84
+ return anomalies
 
85
 
86
+ def create_plots(df):
87
+ """Create all visualization plots"""
88
+ plots = {}
89
 
90
+ # Daily production trend
91
+ daily_total = df.groupby('date')['weight_kg'].sum().reset_index()
92
+ plots['overview'] = px.line(
93
+ daily_total, x='date', y='weight_kg',
94
+ title='Daily Production Trend',
95
+ labels={'weight_kg': 'Total Weight (kg)', 'date': 'Date'}
96
+ )
97
 
98
+ # Material comparison
99
+ daily_by_material = df.groupby(['date', 'material_type'])['weight_kg'].sum().reset_index()
100
+ plots['materials'] = px.line(
101
+ daily_by_material, x='date', y='weight_kg', color='material_type',
102
+ title='Production by Material Type'
103
+ )
104
 
105
+ # Weekly pattern
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  weekly_pattern = df.groupby(['day_of_week', 'material_type'])['weight_kg'].mean().reset_index()
 
107
  day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
108
  weekly_pattern['day_of_week'] = pd.Categorical(weekly_pattern['day_of_week'], categories=day_order, ordered=True)
109
  weekly_pattern = weekly_pattern.sort_values('day_of_week')
110
 
111
+ plots['weekly'] = px.bar(
112
+ weekly_pattern, x='day_of_week', y='weight_kg', color='material_type',
113
+ title='Weekly Production Pattern'
114
+ )
115
+
116
+ # Correlation matrix
117
+ daily_pivot = df.groupby(['date', 'material_type'])['weight_kg'].sum().unstack(fill_value=0)
118
+ if len(daily_pivot.columns) > 1:
119
+ corr_matrix = daily_pivot.corr()
120
+ plots['correlation'] = px.imshow(
121
+ corr_matrix, title='Material Type Correlation Matrix',
122
+ color_continuous_scale='RdBu'
123
+ )
124
 
125
+ return plots
 
126
 
127
+ def query_llm(model, data_summary, user_question):
128
+ """Query Gemini with production data context"""
129
+ context = f"""
130
+ You are a production data analyst. Here's the current production data summary:
 
 
 
 
 
 
131
 
132
+ Production Overview:
133
+ - Total Production: {data_summary['total_production']:,.0f} kg
134
+ - Production Period: {data_summary['date_range']}
135
+ - Daily Average: {data_summary['daily_avg']:,.0f} kg
136
+ - Production Days: {data_summary['production_days']}
137
 
138
+ Material Breakdown:
139
+ """
140
+
141
+ for material, stats in data_summary['materials'].items():
142
+ context += f"- {material.title()}: {stats['total']:,.0f} kg ({stats['percentage']:.1f}%)\n"
143
+
144
+ context += f"\nUser Question: {user_question}\n\nPlease provide a concise, data-driven answer based on this production data."
145
+
146
+ try:
147
+ response = model.generate_content(context)
148
+ return response.text
149
+ except Exception as e:
150
+ return f"Error querying AI: {str(e)}"
 
 
 
151
 
152
+ # Main app
153
+ def main():
154
+ st.title("🏭 Production Data Analysis Dashboard")
155
+ st.markdown("Upload your production data and get AI-powered insights")
 
 
 
 
 
 
 
156
 
157
+ # Initialize Gemini
158
+ model = init_gemini()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ # Sidebar
161
+ with st.sidebar:
162
+ st.header("πŸ“Š Data Upload")
163
+ uploaded_file = st.file_uploader("Choose CSV file", type=['csv'])
164
+
165
+ if model:
166
+ st.success("πŸ€– AI Assistant Ready")
167
+ else:
168
+ st.warning("⚠️ AI Assistant unavailable (API key needed)")
169
+
170
+ if uploaded_file is not None:
171
+ # Load and process data
172
+ try:
173
+ df = pd.read_csv(uploaded_file, sep='\t')
174
+ df = process_data(df)
175
+
176
+ # Generate analysis
177
+ summary = generate_summary(df)
178
+ anomalies = detect_anomalies(df)
179
+ plots = create_plots(df)
180
+
181
+ # Display results
182
+ col1, col2, col3, col4 = st.columns(4)
183
+
184
+ with col1:
185
+ st.metric("Total Production", f"{summary['total_production']:,.0f} kg")
186
+ with col2:
187
+ st.metric("Daily Average", f"{summary['daily_avg']:,.0f} kg")
188
+ with col3:
189
+ st.metric("Production Days", summary['production_days'])
190
+ with col4:
191
+ st.metric("Material Types", len(summary['materials']))
192
+
193
+ # Charts
194
+ st.subheader("πŸ“ˆ Production Trends")
195
+ col1, col2 = st.columns(2)
196
+
197
+ with col1:
198
+ st.plotly_chart(plots['overview'], use_container_width=True)
199
+ with col2:
200
+ st.plotly_chart(plots['materials'], use_container_width=True)
201
+
202
+ col3, col4 = st.columns(2)
203
+ with col3:
204
+ st.plotly_chart(plots['weekly'], use_container_width=True)
205
+ with col4:
206
+ if 'correlation' in plots:
207
+ st.plotly_chart(plots['correlation'], use_container_width=True)
208
+
209
+ # Material breakdown
210
+ st.subheader("πŸ“‹ Material Analysis")
211
+ for material, stats in summary['materials'].items():
212
+ with st.expander(f"{material.title()} - {stats['total']:,.0f} kg ({stats['percentage']:.1f}%)"):
213
+ col1, col2, col3 = st.columns(3)
214
+ with col1:
215
+ st.metric("Total Weight", f"{stats['total']:,.0f} kg")
216
+ with col2:
217
+ st.metric("Percentage", f"{stats['percentage']:.1f}%")
218
+ with col3:
219
+ st.metric("Records", stats['count'])
220
+
221
+ # Anomaly detection
222
+ st.subheader("⚠️ Anomaly Detection")
223
+ for material, anom in anomalies.items():
224
+ if anom['count'] > 0:
225
+ st.warning(f"**{material.title()}**: {anom['count']} anomalies detected")
226
+ st.caption(f"Normal range: {anom['normal_range']}")
227
+ if anom['dates']:
228
+ st.caption(f"Recent anomaly dates: {', '.join(anom['dates'])}")
229
+ else:
230
+ st.success(f"**{material.title()}**: No anomalies detected")
231
+
232
+ # AI Chat Interface
233
+ if model:
234
+ st.subheader("πŸ€– AI Production Assistant")
235
+
236
+ # Predefined questions
237
+ st.markdown("**Quick Questions:**")
238
+ quick_questions = [
239
+ "What are the key production trends?",
240
+ "Which material type shows the best consistency?",
241
+ "Are there any concerning patterns in the data?",
242
+ "What recommendations do you have for optimization?"
243
+ ]
244
+
245
+ cols = st.columns(2)
246
+ for i, question in enumerate(quick_questions):
247
+ with cols[i % 2]:
248
+ if st.button(question, key=f"q_{i}"):
249
+ with st.spinner("AI analyzing..."):
250
+ answer = query_llm(model, summary, question)
251
+ st.success(f"**Q:** {question}")
252
+ st.write(f"**A:** {answer}")
253
+
254
+ # Custom question
255
+ st.markdown("**Ask a Custom Question:**")
256
+ user_question = st.text_input("Your question about the production data:")
257
+
258
+ if user_question and st.button("Get AI Answer"):
259
+ with st.spinner("AI analyzing..."):
260
+ answer = query_llm(model, summary, user_question)
261
+ st.success(f"**Q:** {user_question}")
262
+ st.write(f"**A:** {answer}")
263
+
264
+ except Exception as e:
265
+ st.error(f"Error processing file: {str(e)}")
266
+ st.info("Please ensure your CSV file has columns: date, weight_kg, material_type")
267
+
268
+ else:
269
+ st.info("πŸ‘† Please upload a CSV file to begin analysis")
270
+
271
+ st.markdown("""
272
+ ### πŸ“‹ Data Format Requirements
273
+ Your CSV file should contain:
274
+ - `date`: Date in MM/DD/YYYY format
275
+ - `weight_kg`: Production weight in kilograms
276
+ - `material_type`: Type of material (liquid, solid, waste_water, etc.)
277
+ - `shift`: Shift number (optional)
278
+
279
+ The file should be tab-separated (TSV format with .csv extension).
280
+ """)
281
 
282
  if __name__ == "__main__":
283
+ main()