entropy25 commited on
Commit
6a4a796
·
verified ·
1 Parent(s): 161ff28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +537 -12
app.py CHANGED
@@ -1,18 +1,543 @@
 
 
 
 
 
 
 
 
1
 
2
- 🔧 API Configuration Help
 
 
 
 
 
3
 
4
- 🏭 Production Data Analysis Dashboard
5
- Upload your production data and get AI-powered insights
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- API 配额已用完 - 请检查使用限制
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- 👆 Please upload a CSV file to begin analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- 📋 Data Format Requirements
12
- Your CSV file should contain:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- date: Date in MM/DD/YYYY format
15
- weight_kg: Production weight in kilograms
16
- material_type: Type of material (liquid, solid, waste_water, etc.)
17
- shift: Shift number (optional)
18
- The file should be tab-separated (TSV format with .csv extension).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import plotly.express as px
5
+ import plotly.graph_objects as go
6
+ from datetime import datetime
7
+ import json
8
+ import os
9
 
10
+ # Page config
11
+ st.set_page_config(
12
+ page_title="Production Data Analysis",
13
+ page_icon="🏭",
14
+ layout="wide"
15
+ )
16
 
17
+ # Initialize Gemini 1.5 Pro with better error handling
18
+ @st.cache_resource
19
+ def init_gemini():
20
+ try:
21
+ import google.generativeai as genai
22
+
23
+ # Try multiple ways to get API key
24
+ api_key = None
25
+
26
+ # Method 1: Streamlit secrets
27
+ try:
28
+ api_key = st.secrets.get("GOOGLE_API_KEY", "")
29
+ except:
30
+ pass
31
+
32
+ # Method 2: Environment variable
33
+ if not api_key:
34
+ api_key = os.environ.get("GOOGLE_API_KEY", "")
35
+
36
+ # Method 3: Streamlit secrets alternative format
37
+ if not api_key:
38
+ try:
39
+ api_key = st.secrets["GOOGLE_API_KEY"]
40
+ except:
41
+ pass
42
+
43
+ # Method 4: Direct input fallback
44
+ if not api_key:
45
+ try:
46
+ api_key = st.secrets.get("api_key", "")
47
+ except:
48
+ pass
49
+
50
+ if api_key and api_key.strip():
51
+ # Configure with API key
52
+ genai.configure(api_key=api_key.strip())
53
+
54
+ # Use Gemini 1.5 Pro model
55
+ model = genai.GenerativeModel('gemini-1.5-pro-latest')
56
+
57
+ # Configure safety settings to avoid blocking
58
+ safety_settings = [
59
+ {
60
+ "category": "HARM_CATEGORY_HARASSMENT",
61
+ "threshold": "BLOCK_NONE"
62
+ },
63
+ {
64
+ "category": "HARM_CATEGORY_HATE_SPEECH",
65
+ "threshold": "BLOCK_NONE"
66
+ },
67
+ {
68
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
69
+ "threshold": "BLOCK_NONE"
70
+ },
71
+ {
72
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
73
+ "threshold": "BLOCK_NONE"
74
+ }
75
+ ]
76
+
77
+ # Test the model with a simple query to verify it works
78
+ try:
79
+ test_response = model.generate_content(
80
+ "Hello, respond with 'Gemini 1.5 Pro API working'",
81
+ safety_settings=safety_settings,
82
+ generation_config={
83
+ 'temperature': 0.1,
84
+ 'top_p': 0.8,
85
+ 'top_k': 40,
86
+ 'max_output_tokens': 100,
87
+ }
88
+ )
89
+ if test_response and test_response.text:
90
+ st.success(f"✅ {test_response.text}")
91
+ return model
92
+ except Exception as e:
93
+ error_msg = str(e).lower()
94
+ if "403" in error_msg:
95
+ st.error("❌ API Key permission denied - Please check Google AI Studio API settings")
96
+ elif "quota" in error_msg:
97
+ st.error("❌ API quota exceeded - Please check usage limits")
98
+ elif "billing" in error_msg:
99
+ st.error("❌ Billing required - Gemini 1.5 Pro may need paid account")
100
+ else:
101
+ st.error(f"❌ Model test failed: {str(e)}")
102
+ return None
103
+ else:
104
+ st.warning("⚠️ GOOGLE_API_KEY not found")
105
+ return None
106
+
107
+ except ImportError:
108
+ st.error("❌ Google Generative AI 库未安装")
109
+ return None
110
+ except Exception as e:
111
+ st.error(f"❌ 初始化 Gemini 时出错: {str(e)}")
112
+ return None
113
 
114
+ # Data processing functions
115
+ @st.cache_data
116
+ def process_data(df):
117
+ """Process and analyze production data"""
118
+ try:
119
+ # Handle different date formats more robustly
120
+ if 'date' in df.columns:
121
+ # Try multiple date formats
122
+ for date_format in ['%m/%d/%Y', '%Y-%m-%d', '%d/%m/%Y', '%m-%d-%Y']:
123
+ try:
124
+ df['date'] = pd.to_datetime(df['date'], format=date_format)
125
+ break
126
+ except:
127
+ continue
128
+
129
+ # If all formats failed, try pandas automatic parsing
130
+ if df['date'].dtype == 'object':
131
+ df['date'] = pd.to_datetime(df['date'], errors='coerce')
132
+
133
+ # Add time-based features
134
+ df['day_of_week'] = df['date'].dt.day_name()
135
+ df['week'] = df['date'].dt.isocalendar().week
136
+ df['month'] = df['date'].dt.month
137
+ df['is_weekend'] = df['day_of_week'].isin(['Saturday', 'Sunday'])
138
+
139
+ return df
140
+ except Exception as e:
141
+ st.error(f"Error processing data: {str(e)}")
142
+ return df
143
 
144
+ def generate_summary(df):
145
+ """Generate summary statistics"""
146
+ try:
147
+ total_production = df['weight_kg'].sum()
148
+ total_items = len(df)
149
+ daily_avg = df.groupby('date')['weight_kg'].sum().mean()
150
+
151
+ summary = {
152
+ 'total_production': total_production,
153
+ 'total_items': total_items,
154
+ 'daily_avg': daily_avg,
155
+ 'date_range': f"{df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}",
156
+ 'production_days': df['date'].nunique()
157
+ }
158
+
159
+ # Material breakdown
160
+ material_stats = {}
161
+ for material in df['material_type'].unique():
162
+ mat_data = df[df['material_type'] == material]
163
+ material_stats[material] = {
164
+ 'total': mat_data['weight_kg'].sum(),
165
+ 'percentage': mat_data['weight_kg'].sum() / total_production * 100,
166
+ 'count': len(mat_data)
167
+ }
168
+
169
+ summary['materials'] = material_stats
170
+ return summary
171
+ except Exception as e:
172
+ st.error(f"Error generating summary: {str(e)}")
173
+ return {}
174
 
175
+ def detect_anomalies(df):
176
+ """Detect production anomalies"""
177
+ anomalies = {}
178
+ try:
179
+ for material in df['material_type'].unique():
180
+ mat_data = df[df['material_type'] == material]
181
+ if len(mat_data) > 0:
182
+ Q1 = mat_data['weight_kg'].quantile(0.25)
183
+ Q3 = mat_data['weight_kg'].quantile(0.75)
184
+ IQR = Q3 - Q1
185
+ lower_bound = Q1 - 1.5 * IQR
186
+ upper_bound = Q3 + 1.5 * IQR
187
+
188
+ outliers = mat_data[(mat_data['weight_kg'] < lower_bound) |
189
+ (mat_data['weight_kg'] > upper_bound)]
190
+
191
+ anomalies[material] = {
192
+ 'count': len(outliers),
193
+ 'normal_range': f"{lower_bound:.1f} - {upper_bound:.1f} kg",
194
+ 'dates': outliers['date'].dt.strftime('%Y-%m-%d').tolist()[:5]
195
+ }
196
+ except Exception as e:
197
+ st.error(f"Error detecting anomalies: {str(e)}")
198
+
199
+ return anomalies
200
 
201
+ def create_plots(df):
202
+ """Create all visualization plots"""
203
+ plots = {}
204
+
205
+ try:
206
+ # Daily production trend
207
+ daily_total = df.groupby('date')['weight_kg'].sum().reset_index()
208
+ plots['overview'] = px.line(
209
+ daily_total, x='date', y='weight_kg',
210
+ title='Daily Production Trend',
211
+ labels={'weight_kg': 'Total Weight (kg)', 'date': 'Date'}
212
+ )
213
+
214
+ # Material comparison
215
+ daily_by_material = df.groupby(['date', 'material_type'])['weight_kg'].sum().reset_index()
216
+ plots['materials'] = px.line(
217
+ daily_by_material, x='date', y='weight_kg', color='material_type',
218
+ title='Production by Material Type'
219
+ )
220
+
221
+ # Weekly pattern
222
+ weekly_pattern = df.groupby(['day_of_week', 'material_type'])['weight_kg'].mean().reset_index()
223
+ day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
224
+ weekly_pattern['day_of_week'] = pd.Categorical(weekly_pattern['day_of_week'], categories=day_order, ordered=True)
225
+ weekly_pattern = weekly_pattern.sort_values('day_of_week')
226
+
227
+ plots['weekly'] = px.bar(
228
+ weekly_pattern, x='day_of_week', y='weight_kg', color='material_type',
229
+ title='Weekly Production Pattern'
230
+ )
231
+
232
+ # Correlation matrix
233
+ daily_pivot = df.groupby(['date', 'material_type'])['weight_kg'].sum().unstack(fill_value=0)
234
+ if len(daily_pivot.columns) > 1:
235
+ corr_matrix = daily_pivot.corr()
236
+ plots['correlation'] = px.imshow(
237
+ corr_matrix, title='Material Type Correlation Matrix',
238
+ color_continuous_scale='RdBu'
239
+ )
240
+ except Exception as e:
241
+ st.error(f"Error creating plots: {str(e)}")
242
+
243
+ return plots
244
+
245
+ def query_llm(model, data_summary, user_question):
246
+ """Query Gemini 1.5 Pro with production data context"""
247
+ if not model:
248
+ return "AI Assistant is not available. Please check API configuration."
249
+
250
+ try:
251
+ context = f"""
252
+ You are a production data analyst. Here's the current production data summary:
253
+
254
+ Production Overview:
255
+ - Total Production: {data_summary['total_production']:,.0f} kg
256
+ - Production Period: {data_summary['date_range']}
257
+ - Daily Average: {data_summary['daily_avg']:,.0f} kg
258
+ - Production Days: {data_summary['production_days']}
259
+
260
+ Material Breakdown:
261
+ """
262
+
263
+ for material, stats in data_summary['materials'].items():
264
+ context += f"- {material.title()}: {stats['total']:,.0f} kg ({stats['percentage']:.1f}%)\n"
265
+
266
+ context += f"\nUser Question: {user_question}\n\nPlease provide a concise, data-driven answer based on this production data."
267
+
268
+ # Configure safety settings for Gemini 1.5 Pro
269
+ safety_settings = [
270
+ {
271
+ "category": "HARM_CATEGORY_HARASSMENT",
272
+ "threshold": "BLOCK_NONE"
273
+ },
274
+ {
275
+ "category": "HARM_CATEGORY_HATE_SPEECH",
276
+ "threshold": "BLOCK_NONE"
277
+ },
278
+ {
279
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
280
+ "threshold": "BLOCK_NONE"
281
+ },
282
+ {
283
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
284
+ "threshold": "BLOCK_NONE"
285
+ }
286
+ ]
287
+
288
+ # Gemini 1.5 Pro generation config
289
+ generation_config = {
290
+ 'temperature': 0.2,
291
+ 'top_p': 0.8,
292
+ 'top_k': 40,
293
+ 'max_output_tokens': 2048,
294
+ 'candidate_count': 1
295
+ }
296
+
297
+ response = model.generate_content(
298
+ context,
299
+ safety_settings=safety_settings,
300
+ generation_config=generation_config
301
+ )
302
+
303
+ if response and response.text:
304
+ return response.text
305
+ elif response and hasattr(response, 'candidates') and response.candidates:
306
+ return "Response was blocked by safety filters. Please try rephrasing your question."
307
+ else:
308
+ return "No response received from Gemini 1.5 Pro."
309
+
310
+ except Exception as e:
311
+ error_msg = str(e).lower()
312
+ if "403" in error_msg or "forbidden" in error_msg:
313
+ return "❌ API access denied. Please check:\n1. API Key validity\n2. Gemini API is enabled\n3. Account has sufficient permissions"
314
+ elif "quota" in error_msg or "resource_exhausted" in error_msg:
315
+ return "❌ API quota exceeded. Please try again later or upgrade your account."
316
+ elif "timeout" in error_msg:
317
+ return "⏱️ Request timeout. Please try again."
318
+ elif "billing" in error_msg:
319
+ return "💳 Billing account required for Gemini 1.5 Pro."
320
+ elif "safety" in error_msg:
321
+ return "🛡️ Content blocked by safety filters. Please rephrase your question."
322
+ else:
323
+ return f"❌ AI service error: {str(e)}"
324
+
325
+ # Load data with better error handling
326
+ def load_data(uploaded_file):
327
+ """Load data with robust error handling"""
328
+ try:
329
+ # Try different separators and encodings
330
+ for sep in ['\t', ',', ';']:
331
+ for encoding in ['utf-8', 'latin-1', 'cp1252']:
332
+ try:
333
+ df = pd.read_csv(uploaded_file, sep=sep, encoding=encoding)
334
+ if len(df.columns) >= 3: # Minimum expected columns
335
+ return df
336
+ except:
337
+ continue
338
+
339
+ # If all attempts fail, try with default settings
340
+ return pd.read_csv(uploaded_file)
341
+
342
+ except Exception as e:
343
+ st.error(f"Error loading file: {str(e)}")
344
+ return None
345
+
346
+ # Main app
347
+ def main():
348
+ st.title("🏭 Production Data Analysis Dashboard")
349
+ st.markdown("Upload your production data and get AI-powered insights")
350
+
351
+ # Initialize Gemini
352
+ model = init_gemini()
353
+
354
+ # Sidebar
355
+ with st.sidebar:
356
+ st.header("📊 Data Upload")
357
+ uploaded_file = st.file_uploader("Choose CSV file", type=['csv'])
358
+
359
+ if model:
360
+ st.success("🤖 Gemini AI Assistant Ready")
361
+ else:
362
+ st.warning("⚠️ Gemini AI Assistant unavailable")
363
+ with st.expander("🔧 API Configuration Help"):
364
+ st.markdown("""
365
+ **Steps to enable Gemini AI:**
366
+
367
+ 1. **Get FREE API Key**:
368
+ - Visit [Google AI Studio](https://aistudio.google.com/app/apikey)
369
+ - Sign in with Google account
370
+ - Create a new API Key (FREE)
371
+
372
+ 2. **Set API Key**:
373
+ ```bash
374
+ # Environment variable
375
+ export GOOGLE_API_KEY="your_api_key_here"
376
+ ```
377
+
378
+ 3. **Free Tier Limits**:
379
+ - ✅ Gemini 1.5 Flash: 15 requests/minute (FREE)
380
+ - ⚠️ Gemini 1.5 Pro: May require billing
381
+ - 📊 Daily quota: Generous for testing
382
+
383
+ 4. **Troubleshooting 403 Errors**:
384
+ - Check API Key is correctly copied
385
+ - Ensure no extra spaces in key
386
+ - Verify Google AI Studio access
387
+ - Try creating a new API Key
388
+ - Check if region is supported
389
+ """)
390
+
391
+ # Simplified API Key testing
392
+ st.markdown("**🧪 Quick API Test**")
393
+ test_key = st.text_input("Paste API Key to test:", type="password", key="api_test")
394
+ if st.button("Test Key", key="test_btn") and test_key:
395
+ with st.spinner("Testing..."):
396
+ try:
397
+ import google.generativeai as genai
398
+ genai.configure(api_key=test_key.strip())
399
+ test_model = genai.GenerativeModel('gemini-1.5-flash-latest')
400
+ response = test_model.generate_content("Hello")
401
+ if response.text:
402
+ st.success("✅ API Key works!")
403
+ else:
404
+ st.error("❌ No response")
405
+ except Exception as e:
406
+ if "403" in str(e):
407
+ st.error("❌ Access denied - Check key validity")
408
+ elif "quota" in str(e).lower():
409
+ st.error("❌ Quota exceeded - Try again later")
410
+ else:
411
+ st.error(f"❌ Error: {str(e)}")
412
+
413
+ if uploaded_file is not None:
414
+ # Load and process data
415
+ df = load_data(uploaded_file)
416
+
417
+ if df is not None:
418
+ try:
419
+ df = process_data(df)
420
+
421
+ # Validate required columns
422
+ required_cols = ['date', 'weight_kg', 'material_type']
423
+ missing_cols = [col for col in required_cols if col not in df.columns]
424
+
425
+ if missing_cols:
426
+ st.error(f"Missing required columns: {missing_cols}")
427
+ st.info("Available columns: " + ", ".join(df.columns.tolist()))
428
+ return
429
+
430
+ # Generate analysis
431
+ summary = generate_summary(df)
432
+ if not summary:
433
+ return
434
+
435
+ anomalies = detect_anomalies(df)
436
+ plots = create_plots(df)
437
+
438
+ # Display results
439
+ col1, col2, col3, col4 = st.columns(4)
440
+
441
+ with col1:
442
+ st.metric("Total Production", f"{summary['total_production']:,.0f} kg")
443
+ with col2:
444
+ st.metric("Daily Average", f"{summary['daily_avg']:,.0f} kg")
445
+ with col3:
446
+ st.metric("Production Days", summary['production_days'])
447
+ with col4:
448
+ st.metric("Material Types", len(summary['materials']))
449
+
450
+ # Charts
451
+ st.subheader("📈 Production Trends")
452
+ col1, col2 = st.columns(2)
453
+
454
+ with col1:
455
+ if 'overview' in plots:
456
+ st.plotly_chart(plots['overview'], use_container_width=True)
457
+ with col2:
458
+ if 'materials' in plots:
459
+ st.plotly_chart(plots['materials'], use_container_width=True)
460
+
461
+ col3, col4 = st.columns(2)
462
+ with col3:
463
+ if 'weekly' in plots:
464
+ st.plotly_chart(plots['weekly'], use_container_width=True)
465
+ with col4:
466
+ if 'correlation' in plots:
467
+ st.plotly_chart(plots['correlation'], use_container_width=True)
468
+
469
+ # Material breakdown
470
+ st.subheader("📋 Material Analysis")
471
+ for material, stats in summary['materials'].items():
472
+ with st.expander(f"{material.title()} - {stats['total']:,.0f} kg ({stats['percentage']:.1f}%)"):
473
+ col1, col2, col3 = st.columns(3)
474
+ with col1:
475
+ st.metric("Total Weight", f"{stats['total']:,.0f} kg")
476
+ with col2:
477
+ st.metric("Percentage", f"{stats['percentage']:.1f}%")
478
+ with col3:
479
+ st.metric("Records", stats['count'])
480
+
481
+ # Anomaly detection
482
+ st.subheader("⚠️ Anomaly Detection")
483
+ for material, anom in anomalies.items():
484
+ if anom['count'] > 0:
485
+ st.warning(f"**{material.title()}**: {anom['count']} anomalies detected")
486
+ st.caption(f"Normal range: {anom['normal_range']}")
487
+ if anom['dates']:
488
+ st.caption(f"Recent anomaly dates: {', '.join(anom['dates'])}")
489
+ else:
490
+ st.success(f"**{material.title()}**: No anomalies detected")
491
+
492
+ # AI Chat Interface
493
+ if model:
494
+ st.subheader("🤖 AI Production Assistant")
495
+
496
+ # Predefined questions
497
+ st.markdown("**Quick Questions:**")
498
+ quick_questions = [
499
+ "What are the key production trends?",
500
+ "Which material type shows the best consistency?",
501
+ "Are there any concerning patterns in the data?",
502
+ "What recommendations do you have for optimization?"
503
+ ]
504
+
505
+ cols = st.columns(2)
506
+ for i, question in enumerate(quick_questions):
507
+ with cols[i % 2]:
508
+ if st.button(question, key=f"q_{i}"):
509
+ with st.spinner("AI analyzing..."):
510
+ answer = query_llm(model, summary, question)
511
+ st.success(f"**Q:** {question}")
512
+ st.write(f"**A:** {answer}")
513
+
514
+ # Custom question
515
+ st.markdown("**Ask a Custom Question:**")
516
+ user_question = st.text_input("Your question about the production data:")
517
+
518
+ if user_question and st.button("Get AI Answer"):
519
+ with st.spinner("AI analyzing..."):
520
+ answer = query_llm(model, summary, user_question)
521
+ st.success(f"**Q:** {user_question}")
522
+ st.write(f"**A:** {answer}")
523
+
524
+ except Exception as e:
525
+ st.error(f"Error processing file: {str(e)}")
526
+ st.info("Please ensure your CSV file has the required format.")
527
+
528
+ else:
529
+ st.info("👆 Please upload a CSV file to begin analysis")
530
+
531
+ st.markdown("""
532
+ ### 📋 Data Format Requirements
533
+ Your CSV file should contain:
534
+ - `date`: Date in MM/DD/YYYY format
535
+ - `weight_kg`: Production weight in kilograms
536
+ - `material_type`: Type of material (liquid, solid, waste_water, etc.)
537
+ - `shift`: Shift number (optional)
538
+
539
+ The file should be tab-separated (TSV format with .csv extension).
540
+ """)
541
+
542
+ if __name__ == "__main__":
543
+ main()