entropy25 commited on
Commit
161ff28
·
verified ·
1 Parent(s): 4db8774

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -535
app.py CHANGED
@@ -1,541 +1,18 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- import plotly.express as px
5
- import plotly.graph_objects as go
6
- from datetime import datetime
7
- import json
8
- import os
9
 
10
- # Page config
11
- st.set_page_config(
12
- page_title="Production Data Analysis",
13
- page_icon="🏭",
14
- layout="wide"
15
- )
16
 
17
- # Initialize Gemini 1.5 Pro with better error handling
18
- @st.cache_resource
19
- def init_gemini():
20
- try:
21
- import google.generativeai as genai
22
-
23
- # Try multiple ways to get API key
24
- api_key = None
25
-
26
- # Method 1: Streamlit secrets
27
- try:
28
- api_key = st.secrets.get("GOOGLE_API_KEY", "")
29
- except:
30
- pass
31
-
32
- # Method 2: Environment variable
33
- if not api_key:
34
- api_key = os.environ.get("GOOGLE_API_KEY", "")
35
-
36
- # Method 3: Streamlit secrets alternative format
37
- if not api_key:
38
- try:
39
- api_key = st.secrets["GOOGLE_API_KEY"]
40
- except:
41
- pass
42
-
43
- # Method 4: Direct input fallback
44
- if not api_key:
45
- try:
46
- api_key = st.secrets.get("api_key", "")
47
- except:
48
- pass
49
-
50
- if api_key and api_key.strip():
51
- # Configure with API key
52
- genai.configure(api_key=api_key.strip())
53
-
54
- # Use Gemini 1.5 Pro model
55
- model = genai.GenerativeModel('gemini-1.5-pro-latest')
56
-
57
- # Configure safety settings to avoid blocking
58
- safety_settings = [
59
- {
60
- "category": "HARM_CATEGORY_HARASSMENT",
61
- "threshold": "BLOCK_NONE"
62
- },
63
- {
64
- "category": "HARM_CATEGORY_HATE_SPEECH",
65
- "threshold": "BLOCK_NONE"
66
- },
67
- {
68
- "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
69
- "threshold": "BLOCK_NONE"
70
- },
71
- {
72
- "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
73
- "threshold": "BLOCK_NONE"
74
- }
75
- ]
76
-
77
- # Test the model with a simple query to verify it works
78
- try:
79
- test_response = model.generate_content(
80
- "Hello, respond with 'Gemini 1.5 Pro API working'",
81
- safety_settings=safety_settings,
82
- generation_config={
83
- 'temperature': 0.1,
84
- 'top_p': 0.8,
85
- 'top_k': 40,
86
- 'max_output_tokens': 100,
87
- }
88
- )
89
- if test_response and test_response.text:
90
- st.success(f"✅ {test_response.text}")
91
- return model
92
- except Exception as e:
93
- error_msg = str(e).lower()
94
- if "403" in error_msg:
95
- st.error("❌ API Key 权限不足 - 请检查 Google AI Studio 中的 API 设置")
96
- elif "quota" in error_msg:
97
- st.error("❌ API 配额已用完 - 请检查使用限制")
98
- elif "billing" in error_msg:
99
- st.error("❌ 需要启用计费 - Gemini 1.5 Pro 可能需要付费账户")
100
- else:
101
- st.error(f"❌ 模型测试失败: {str(e)}")
102
- return None
103
- else:
104
- st.warning("⚠️ 未找到 GOOGLE_API_KEY")
105
- return None
106
-
107
- except ImportError:
108
- st.error("❌ Google Generative AI 库未安装")
109
- return None
110
- except Exception as e:
111
- st.error(f"❌ 初始化 Gemini 时出错: {str(e)}")
112
- return None
113
 
114
- # Data processing functions
115
- @st.cache_data
116
- def process_data(df):
117
- """Process and analyze production data"""
118
- try:
119
- # Handle different date formats more robustly
120
- if 'date' in df.columns:
121
- # Try multiple date formats
122
- for date_format in ['%m/%d/%Y', '%Y-%m-%d', '%d/%m/%Y', '%m-%d-%Y']:
123
- try:
124
- df['date'] = pd.to_datetime(df['date'], format=date_format)
125
- break
126
- except:
127
- continue
128
-
129
- # If all formats failed, try pandas automatic parsing
130
- if df['date'].dtype == 'object':
131
- df['date'] = pd.to_datetime(df['date'], errors='coerce')
132
-
133
- # Add time-based features
134
- df['day_of_week'] = df['date'].dt.day_name()
135
- df['week'] = df['date'].dt.isocalendar().week
136
- df['month'] = df['date'].dt.month
137
- df['is_weekend'] = df['day_of_week'].isin(['Saturday', 'Sunday'])
138
-
139
- return df
140
- except Exception as e:
141
- st.error(f"Error processing data: {str(e)}")
142
- return df
143
 
144
- def generate_summary(df):
145
- """Generate summary statistics"""
146
- try:
147
- total_production = df['weight_kg'].sum()
148
- total_items = len(df)
149
- daily_avg = df.groupby('date')['weight_kg'].sum().mean()
150
-
151
- summary = {
152
- 'total_production': total_production,
153
- 'total_items': total_items,
154
- 'daily_avg': daily_avg,
155
- 'date_range': f"{df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}",
156
- 'production_days': df['date'].nunique()
157
- }
158
-
159
- # Material breakdown
160
- material_stats = {}
161
- for material in df['material_type'].unique():
162
- mat_data = df[df['material_type'] == material]
163
- material_stats[material] = {
164
- 'total': mat_data['weight_kg'].sum(),
165
- 'percentage': mat_data['weight_kg'].sum() / total_production * 100,
166
- 'count': len(mat_data)
167
- }
168
-
169
- summary['materials'] = material_stats
170
- return summary
171
- except Exception as e:
172
- st.error(f"Error generating summary: {str(e)}")
173
- return {}
174
 
175
- def detect_anomalies(df):
176
- """Detect production anomalies"""
177
- anomalies = {}
178
- try:
179
- for material in df['material_type'].unique():
180
- mat_data = df[df['material_type'] == material]
181
- if len(mat_data) > 0:
182
- Q1 = mat_data['weight_kg'].quantile(0.25)
183
- Q3 = mat_data['weight_kg'].quantile(0.75)
184
- IQR = Q3 - Q1
185
- lower_bound = Q1 - 1.5 * IQR
186
- upper_bound = Q3 + 1.5 * IQR
187
-
188
- outliers = mat_data[(mat_data['weight_kg'] < lower_bound) |
189
- (mat_data['weight_kg'] > upper_bound)]
190
-
191
- anomalies[material] = {
192
- 'count': len(outliers),
193
- 'normal_range': f"{lower_bound:.1f} - {upper_bound:.1f} kg",
194
- 'dates': outliers['date'].dt.strftime('%Y-%m-%d').tolist()[:5]
195
- }
196
- except Exception as e:
197
- st.error(f"Error detecting anomalies: {str(e)}")
198
-
199
- return anomalies
200
 
201
- def create_plots(df):
202
- """Create all visualization plots"""
203
- plots = {}
204
-
205
- try:
206
- # Daily production trend
207
- daily_total = df.groupby('date')['weight_kg'].sum().reset_index()
208
- plots['overview'] = px.line(
209
- daily_total, x='date', y='weight_kg',
210
- title='Daily Production Trend',
211
- labels={'weight_kg': 'Total Weight (kg)', 'date': 'Date'}
212
- )
213
-
214
- # Material comparison
215
- daily_by_material = df.groupby(['date', 'material_type'])['weight_kg'].sum().reset_index()
216
- plots['materials'] = px.line(
217
- daily_by_material, x='date', y='weight_kg', color='material_type',
218
- title='Production by Material Type'
219
- )
220
-
221
- # Weekly pattern
222
- weekly_pattern = df.groupby(['day_of_week', 'material_type'])['weight_kg'].mean().reset_index()
223
- day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
224
- weekly_pattern['day_of_week'] = pd.Categorical(weekly_pattern['day_of_week'], categories=day_order, ordered=True)
225
- weekly_pattern = weekly_pattern.sort_values('day_of_week')
226
-
227
- plots['weekly'] = px.bar(
228
- weekly_pattern, x='day_of_week', y='weight_kg', color='material_type',
229
- title='Weekly Production Pattern'
230
- )
231
-
232
- # Correlation matrix
233
- daily_pivot = df.groupby(['date', 'material_type'])['weight_kg'].sum().unstack(fill_value=0)
234
- if len(daily_pivot.columns) > 1:
235
- corr_matrix = daily_pivot.corr()
236
- plots['correlation'] = px.imshow(
237
- corr_matrix, title='Material Type Correlation Matrix',
238
- color_continuous_scale='RdBu'
239
- )
240
- except Exception as e:
241
- st.error(f"Error creating plots: {str(e)}")
242
-
243
- return plots
244
-
245
- def query_llm(model, data_summary, user_question):
246
- """Query Gemini 1.5 Pro with production data context"""
247
- if not model:
248
- return "AI Assistant is not available. Please check API configuration."
249
-
250
- try:
251
- context = f"""
252
- You are a production data analyst. Here's the current production data summary:
253
-
254
- Production Overview:
255
- - Total Production: {data_summary['total_production']:,.0f} kg
256
- - Production Period: {data_summary['date_range']}
257
- - Daily Average: {data_summary['daily_avg']:,.0f} kg
258
- - Production Days: {data_summary['production_days']}
259
-
260
- Material Breakdown:
261
- """
262
-
263
- for material, stats in data_summary['materials'].items():
264
- context += f"- {material.title()}: {stats['total']:,.0f} kg ({stats['percentage']:.1f}%)\n"
265
-
266
- context += f"\nUser Question: {user_question}\n\nPlease provide a concise, data-driven answer based on this production data."
267
-
268
- # Configure safety settings for Gemini 1.5 Pro
269
- safety_settings = [
270
- {
271
- "category": "HARM_CATEGORY_HARASSMENT",
272
- "threshold": "BLOCK_NONE"
273
- },
274
- {
275
- "category": "HARM_CATEGORY_HATE_SPEECH",
276
- "threshold": "BLOCK_NONE"
277
- },
278
- {
279
- "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
280
- "threshold": "BLOCK_NONE"
281
- },
282
- {
283
- "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
284
- "threshold": "BLOCK_NONE"
285
- }
286
- ]
287
-
288
- # Gemini 1.5 Pro generation config
289
- generation_config = {
290
- 'temperature': 0.2,
291
- 'top_p': 0.8,
292
- 'top_k': 40,
293
- 'max_output_tokens': 2048,
294
- 'candidate_count': 1
295
- }
296
-
297
- response = model.generate_content(
298
- context,
299
- safety_settings=safety_settings,
300
- generation_config=generation_config
301
- )
302
-
303
- if response and response.text:
304
- return response.text
305
- elif response and hasattr(response, 'candidates') and response.candidates:
306
- return "Response was blocked by safety filters. Please try rephrasing your question."
307
- else:
308
- return "No response received from Gemini 1.5 Pro."
309
-
310
- except Exception as e:
311
- error_msg = str(e).lower()
312
- if "403" in error_msg or "forbidden" in error_msg:
313
- return "❌ API access denied. Please check:\n1. API Key validity\n2. Gemini API is enabled\n3. Account has sufficient permissions"
314
- elif "quota" in error_msg or "resource_exhausted" in error_msg:
315
- return "❌ API quota exceeded. Please try again later or upgrade your account."
316
- elif "timeout" in error_msg:
317
- return "⏱️ Request timeout. Please try again."
318
- elif "billing" in error_msg:
319
- return "💳 Billing account required for Gemini 1.5 Pro."
320
- elif "safety" in error_msg:
321
- return "🛡️ Content blocked by safety filters. Please rephrase your question."
322
- else:
323
- return f"❌ AI service error: {str(e)}"
324
-
325
- # Load data with better error handling
326
- def load_data(uploaded_file):
327
- """Load data with robust error handling"""
328
- try:
329
- # Try different separators and encodings
330
- for sep in ['\t', ',', ';']:
331
- for encoding in ['utf-8', 'latin-1', 'cp1252']:
332
- try:
333
- df = pd.read_csv(uploaded_file, sep=sep, encoding=encoding)
334
- if len(df.columns) >= 3: # Minimum expected columns
335
- return df
336
- except:
337
- continue
338
-
339
- # If all attempts fail, try with default settings
340
- return pd.read_csv(uploaded_file)
341
-
342
- except Exception as e:
343
- st.error(f"Error loading file: {str(e)}")
344
- return None
345
-
346
- # Main app
347
- def main():
348
- st.title("🏭 Production Data Analysis Dashboard")
349
- st.markdown("Upload your production data and get AI-powered insights")
350
-
351
- # Initialize Gemini
352
- model = init_gemini()
353
-
354
- # Sidebar
355
- with st.sidebar:
356
- st.header("📊 Data Upload")
357
- uploaded_file = st.file_uploader("Choose CSV file", type=['csv'])
358
-
359
- if model:
360
- st.success("🤖 Gemini 1.5 Pro AI Assistant Ready")
361
- else:
362
- st.warning("⚠️ Gemini 1.5 Pro AI Assistant unavailable")
363
- with st.expander("🔧 API Configuration Help"):
364
- st.markdown("""
365
- **Steps to enable Gemini 1.5 Pro:**
366
-
367
- 1. **Get API Key**:
368
- - Visit [Google AI Studio](https://aistudio.google.com/app/apikey)
369
- - Create a new API Key
370
-
371
- 2. **Set API Key**:
372
- ```bash
373
- # Environment variable
374
- export GOOGLE_API_KEY="your_api_key_here"
375
- ```
376
- Or in Streamlit secrets.toml:
377
- ```toml
378
- GOOGLE_API_KEY = "your_api_key_here"
379
- ```
380
-
381
- 3. **Common 403 Error Causes**:
382
- - ❌ Invalid or expired API Key
383
- - ❌ Generative AI API not enabled
384
- - ❌ Insufficient account permissions
385
- - ❌ Billing account required (1.5 Pro may need payment)
386
- - ❌ Geographic restrictions
387
-
388
- 4. **Solutions**:
389
- - Verify API Key is correctly copied
390
- - Enable API in Google Cloud Console
391
- - Check account billing status
392
- - Try VPN if geographic restrictions apply
393
- """)
394
-
395
- # API Key testing tool
396
- st.markdown("**🧪 API Key Testing Tool**")
397
- test_api_key = st.text_input("Enter your API Key to test:", type="password")
398
- if st.button("Test API Key") and test_api_key:
399
- try:
400
- import google.generativeai as genai
401
- genai.configure(api_key=test_api_key)
402
- test_model = genai.GenerativeModel('gemini-1.5-pro-latest')
403
- test_response = test_model.generate_content("Test message")
404
- if test_response.text:
405
- st.success("✅ API Key works correctly!")
406
- else:
407
- st.error("❌ API Key test failed")
408
- except Exception as e:
409
- st.error(f"❌ Error: {str(e)}")
410
-
411
- if uploaded_file is not None:
412
- # Load and process data
413
- df = load_data(uploaded_file)
414
-
415
- if df is not None:
416
- try:
417
- df = process_data(df)
418
-
419
- # Validate required columns
420
- required_cols = ['date', 'weight_kg', 'material_type']
421
- missing_cols = [col for col in required_cols if col not in df.columns]
422
-
423
- if missing_cols:
424
- st.error(f"Missing required columns: {missing_cols}")
425
- st.info("Available columns: " + ", ".join(df.columns.tolist()))
426
- return
427
-
428
- # Generate analysis
429
- summary = generate_summary(df)
430
- if not summary:
431
- return
432
-
433
- anomalies = detect_anomalies(df)
434
- plots = create_plots(df)
435
-
436
- # Display results
437
- col1, col2, col3, col4 = st.columns(4)
438
-
439
- with col1:
440
- st.metric("Total Production", f"{summary['total_production']:,.0f} kg")
441
- with col2:
442
- st.metric("Daily Average", f"{summary['daily_avg']:,.0f} kg")
443
- with col3:
444
- st.metric("Production Days", summary['production_days'])
445
- with col4:
446
- st.metric("Material Types", len(summary['materials']))
447
-
448
- # Charts
449
- st.subheader("📈 Production Trends")
450
- col1, col2 = st.columns(2)
451
-
452
- with col1:
453
- if 'overview' in plots:
454
- st.plotly_chart(plots['overview'], use_container_width=True)
455
- with col2:
456
- if 'materials' in plots:
457
- st.plotly_chart(plots['materials'], use_container_width=True)
458
-
459
- col3, col4 = st.columns(2)
460
- with col3:
461
- if 'weekly' in plots:
462
- st.plotly_chart(plots['weekly'], use_container_width=True)
463
- with col4:
464
- if 'correlation' in plots:
465
- st.plotly_chart(plots['correlation'], use_container_width=True)
466
-
467
- # Material breakdown
468
- st.subheader("📋 Material Analysis")
469
- for material, stats in summary['materials'].items():
470
- with st.expander(f"{material.title()} - {stats['total']:,.0f} kg ({stats['percentage']:.1f}%)"):
471
- col1, col2, col3 = st.columns(3)
472
- with col1:
473
- st.metric("Total Weight", f"{stats['total']:,.0f} kg")
474
- with col2:
475
- st.metric("Percentage", f"{stats['percentage']:.1f}%")
476
- with col3:
477
- st.metric("Records", stats['count'])
478
-
479
- # Anomaly detection
480
- st.subheader("⚠️ Anomaly Detection")
481
- for material, anom in anomalies.items():
482
- if anom['count'] > 0:
483
- st.warning(f"**{material.title()}**: {anom['count']} anomalies detected")
484
- st.caption(f"Normal range: {anom['normal_range']}")
485
- if anom['dates']:
486
- st.caption(f"Recent anomaly dates: {', '.join(anom['dates'])}")
487
- else:
488
- st.success(f"**{material.title()}**: No anomalies detected")
489
-
490
- # AI Chat Interface
491
- if model:
492
- st.subheader("🤖 AI Production Assistant")
493
-
494
- # Predefined questions
495
- st.markdown("**Quick Questions:**")
496
- quick_questions = [
497
- "What are the key production trends?",
498
- "Which material type shows the best consistency?",
499
- "Are there any concerning patterns in the data?",
500
- "What recommendations do you have for optimization?"
501
- ]
502
-
503
- cols = st.columns(2)
504
- for i, question in enumerate(quick_questions):
505
- with cols[i % 2]:
506
- if st.button(question, key=f"q_{i}"):
507
- with st.spinner("AI analyzing..."):
508
- answer = query_llm(model, summary, question)
509
- st.success(f"**Q:** {question}")
510
- st.write(f"**A:** {answer}")
511
-
512
- # Custom question
513
- st.markdown("**Ask a Custom Question:**")
514
- user_question = st.text_input("Your question about the production data:")
515
-
516
- if user_question and st.button("Get AI Answer"):
517
- with st.spinner("AI analyzing..."):
518
- answer = query_llm(model, summary, user_question)
519
- st.success(f"**Q:** {user_question}")
520
- st.write(f"**A:** {answer}")
521
-
522
- except Exception as e:
523
- st.error(f"Error processing file: {str(e)}")
524
- st.info("Please ensure your CSV file has the required format.")
525
-
526
- else:
527
- st.info("👆 Please upload a CSV file to begin analysis")
528
-
529
- st.markdown("""
530
- ### 📋 Data Format Requirements
531
- Your CSV file should contain:
532
- - `date`: Date in MM/DD/YYYY format
533
- - `weight_kg`: Production weight in kilograms
534
- - `material_type`: Type of material (liquid, solid, waste_water, etc.)
535
- - `shift`: Shift number (optional)
536
-
537
- The file should be tab-separated (TSV format with .csv extension).
538
- """)
539
-
540
- if __name__ == "__main__":
541
- main()
 
 
 
 
 
 
 
 
 
1
 
2
+ 🔧 API Configuration Help
 
 
 
 
 
3
 
4
+ 🏭 Production Data Analysis Dashboard
5
+ Upload your production data and get AI-powered insights
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ API 配额已用完 - 请检查使用限制
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ 👆 Please upload a CSV file to begin analysis
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ 📋 Data Format Requirements
12
+ Your CSV file should contain:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ date: Date in MM/DD/YYYY format
15
+ weight_kg: Production weight in kilograms
16
+ material_type: Type of material (liquid, solid, waste_water, etc.)
17
+ shift: Shift number (optional)
18
+ The file should be tab-separated (TSV format with .csv extension).