Spaces:

entropy25
/

production-data-analysis

Sleeping

App Files Files Community

entropy25 commited on Aug 13, 2025

Commit

6a4a796

verified ·

1 Parent(s): 161ff28

Update app.py

Browse files

Files changed (1) hide show

app.py +537 -12

app.py CHANGED Viewed

@@ -1,18 +1,543 @@
-🔧 API Configuration Help
-🏭 Production Data Analysis Dashboard
-Upload your production data and get AI-powered insights
-❌ API 配额已用完 - 请检查使用限制
-👆 Please upload a CSV file to begin analysis
-📋 Data Format Requirements
-Your CSV file should contain:
-date: Date in MM/DD/YYYY format
-weight_kg: Production weight in kilograms
-material_type: Type of material (liquid, solid, waste_water, etc.)
-shift: Shift number (optional)
-The file should be tab-separated (TSV format with .csv extension).

+import streamlit as st
+import pandas as pd
+import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
+from datetime import datetime
+import json
+import os
+# Page config
+st.set_page_config(
+    page_title="Production Data Analysis",
+    page_icon="🏭",
+    layout="wide"
+)
+# Initialize Gemini 1.5 Pro with better error handling
+@st.cache_resource
+def init_gemini():
+    try:
+        import google.generativeai as genai
+        # Try multiple ways to get API key
+        api_key = None
+        # Method 1: Streamlit secrets
+        try:
+            api_key = st.secrets.get("GOOGLE_API_KEY", "")
+        except:
+            pass
+        # Method 2: Environment variable
+        if not api_key:
+            api_key = os.environ.get("GOOGLE_API_KEY", "")
+        # Method 3: Streamlit secrets alternative format
+        if not api_key:
+            try:
+                api_key = st.secrets["GOOGLE_API_KEY"]
+            except:
+                pass
+        # Method 4: Direct input fallback
+        if not api_key:
+            try:
+                api_key = st.secrets.get("api_key", "")
+            except:
+                pass
+        if api_key and api_key.strip():
+            # Configure with API key
+            genai.configure(api_key=api_key.strip())
+            # Use Gemini 1.5 Pro model
+            model = genai.GenerativeModel('gemini-1.5-pro-latest')
+            # Configure safety settings to avoid blocking
+            safety_settings = [
+                {
+                    "category": "HARM_CATEGORY_HARASSMENT",
+                    "threshold": "BLOCK_NONE"
+                },
+                {
+                    "category": "HARM_CATEGORY_HATE_SPEECH",
+                    "threshold": "BLOCK_NONE"
+                },
+                {
+                    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                    "threshold": "BLOCK_NONE"
+                },
+                {
+                    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                    "threshold": "BLOCK_NONE"
+                }
+            ]
+            # Test the model with a simple query to verify it works
+            try:
+                test_response = model.generate_content(
+                    "Hello, respond with 'Gemini 1.5 Pro API working'",
+                    safety_settings=safety_settings,
+                    generation_config={
+                        'temperature': 0.1,
+                        'top_p': 0.8,
+                        'top_k': 40,
+                        'max_output_tokens': 100,
+                    }
+                )
+                if test_response and test_response.text:
+                    st.success(f"✅ {test_response.text}")
+                    return model
+            except Exception as e:
+                error_msg = str(e).lower()
+                if "403" in error_msg:
+                    st.error("❌ API Key permission denied - Please check Google AI Studio API settings")
+                elif "quota" in error_msg:
+                    st.error("❌ API quota exceeded - Please check usage limits")
+                elif "billing" in error_msg:
+                    st.error("❌ Billing required - Gemini 1.5 Pro may need paid account")
+                else:
+                    st.error(f"❌ Model test failed: {str(e)}")
+                return None
+        else:
+            st.warning("⚠️ GOOGLE_API_KEY not found")
+            return None
+    except ImportError:
+        st.error("❌ Google Generative AI 库未安装")
+        return None
+    except Exception as e:
+        st.error(f"❌ 初始化 Gemini 时出错: {str(e)}")
+        return None
+# Data processing functions
+@st.cache_data
+def process_data(df):
+    """Process and analyze production data"""
+    try:
+        # Handle different date formats more robustly
+        if 'date' in df.columns:
+            # Try multiple date formats
+            for date_format in ['%m/%d/%Y', '%Y-%m-%d', '%d/%m/%Y', '%m-%d-%Y']:
+                try:
+                    df['date'] = pd.to_datetime(df['date'], format=date_format)
+                    break
+                except:
+                    continue
+            # If all formats failed, try pandas automatic parsing
+            if df['date'].dtype == 'object':
+                df['date'] = pd.to_datetime(df['date'], errors='coerce')
+        # Add time-based features
+        df['day_of_week'] = df['date'].dt.day_name()
+        df['week'] = df['date'].dt.isocalendar().week
+        df['month'] = df['date'].dt.month
+        df['is_weekend'] = df['day_of_week'].isin(['Saturday', 'Sunday'])
+        return df
+    except Exception as e:
+        st.error(f"Error processing data: {str(e)}")
+        return df
+def generate_summary(df):
+    """Generate summary statistics"""
+    try:
+        total_production = df['weight_kg'].sum()
+        total_items = len(df)
+        daily_avg = df.groupby('date')['weight_kg'].sum().mean()
+        summary = {
+            'total_production': total_production,
+            'total_items': total_items,
+            'daily_avg': daily_avg,
+            'date_range': f"{df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}",
+            'production_days': df['date'].nunique()
+        }
+        # Material breakdown
+        material_stats = {}
+        for material in df['material_type'].unique():
+            mat_data = df[df['material_type'] == material]
+            material_stats[material] = {
+                'total': mat_data['weight_kg'].sum(),
+                'percentage': mat_data['weight_kg'].sum() / total_production * 100,
+                'count': len(mat_data)
+            }
+        summary['materials'] = material_stats
+        return summary
+    except Exception as e:
+        st.error(f"Error generating summary: {str(e)}")
+        return {}
+def detect_anomalies(df):
+    """Detect production anomalies"""
+    anomalies = {}
+    try:
+        for material in df['material_type'].unique():
+            mat_data = df[df['material_type'] == material]
+            if len(mat_data) > 0:
+                Q1 = mat_data['weight_kg'].quantile(0.25)
+                Q3 = mat_data['weight_kg'].quantile(0.75)
+                IQR = Q3 - Q1
+                lower_bound = Q1 - 1.5 * IQR
+                upper_bound = Q3 + 1.5 * IQR
+                outliers = mat_data[(mat_data['weight_kg'] < lower_bound) |
+                                   (mat_data['weight_kg'] > upper_bound)]
+                anomalies[material] = {
+                    'count': len(outliers),
+                    'normal_range': f"{lower_bound:.1f} - {upper_bound:.1f} kg",
+                    'dates': outliers['date'].dt.strftime('%Y-%m-%d').tolist()[:5]
+                }
+    except Exception as e:
+        st.error(f"Error detecting anomalies: {str(e)}")
+    return anomalies
+def create_plots(df):
+    """Create all visualization plots"""
+    plots = {}
+    try:
+        # Daily production trend
+        daily_total = df.groupby('date')['weight_kg'].sum().reset_index()
+        plots['overview'] = px.line(
+            daily_total, x='date', y='weight_kg',
+            title='Daily Production Trend',
+            labels={'weight_kg': 'Total Weight (kg)', 'date': 'Date'}
+        )
+        # Material comparison
+        daily_by_material = df.groupby(['date', 'material_type'])['weight_kg'].sum().reset_index()
+        plots['materials'] = px.line(
+            daily_by_material, x='date', y='weight_kg', color='material_type',
+            title='Production by Material Type'
+        )
+        # Weekly pattern
+        weekly_pattern = df.groupby(['day_of_week', 'material_type'])['weight_kg'].mean().reset_index()
+        day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
+        weekly_pattern['day_of_week'] = pd.Categorical(weekly_pattern['day_of_week'], categories=day_order, ordered=True)
+        weekly_pattern = weekly_pattern.sort_values('day_of_week')
+        plots['weekly'] = px.bar(
+            weekly_pattern, x='day_of_week', y='weight_kg', color='material_type',
+            title='Weekly Production Pattern'
+        )
+        # Correlation matrix
+        daily_pivot = df.groupby(['date', 'material_type'])['weight_kg'].sum().unstack(fill_value=0)
+        if len(daily_pivot.columns) > 1:
+            corr_matrix = daily_pivot.corr()
+            plots['correlation'] = px.imshow(
+                corr_matrix, title='Material Type Correlation Matrix',
+                color_continuous_scale='RdBu'
+            )
+    except Exception as e:
+        st.error(f"Error creating plots: {str(e)}")
+    return plots
+def query_llm(model, data_summary, user_question):
+    """Query Gemini 1.5 Pro with production data context"""
+    if not model:
+        return "AI Assistant is not available. Please check API configuration."
+    try:
+        context = f"""
+        You are a production data analyst. Here's the current production data summary:
+        Production Overview:
+        - Total Production: {data_summary['total_production']:,.0f} kg
+        - Production Period: {data_summary['date_range']}
+        - Daily Average: {data_summary['daily_avg']:,.0f} kg
+        - Production Days: {data_summary['production_days']}
+        Material Breakdown:
+        """
+        for material, stats in data_summary['materials'].items():
+            context += f"- {material.title()}: {stats['total']:,.0f} kg ({stats['percentage']:.1f}%)\n"
+        context += f"\nUser Question: {user_question}\n\nPlease provide a concise, data-driven answer based on this production data."
+        # Configure safety settings for Gemini 1.5 Pro
+        safety_settings = [
+            {
+                "category": "HARM_CATEGORY_HARASSMENT",
+                "threshold": "BLOCK_NONE"
+            },
+            {
+                "category": "HARM_CATEGORY_HATE_SPEECH",
+                "threshold": "BLOCK_NONE"
+            },
+            {
+                "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                "threshold": "BLOCK_NONE"
+            },
+            {
+                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                "threshold": "BLOCK_NONE"
+            }
+        ]
+        # Gemini 1.5 Pro generation config
+        generation_config = {
+            'temperature': 0.2,
+            'top_p': 0.8,
+            'top_k': 40,
+            'max_output_tokens': 2048,
+            'candidate_count': 1
+        }
+        response = model.generate_content(
+            context,
+            safety_settings=safety_settings,
+            generation_config=generation_config
+        )
+        if response and response.text:
+            return response.text
+        elif response and hasattr(response, 'candidates') and response.candidates:
+            return "Response was blocked by safety filters. Please try rephrasing your question."
+        else:
+            return "No response received from Gemini 1.5 Pro."
+    except Exception as e:
+        error_msg = str(e).lower()
+        if "403" in error_msg or "forbidden" in error_msg:
+            return "❌ API access denied. Please check:\n1. API Key validity\n2. Gemini API is enabled\n3. Account has sufficient permissions"
+        elif "quota" in error_msg or "resource_exhausted" in error_msg:
+            return "❌ API quota exceeded. Please try again later or upgrade your account."
+        elif "timeout" in error_msg:
+            return "⏱️ Request timeout. Please try again."
+        elif "billing" in error_msg:
+            return "💳 Billing account required for Gemini 1.5 Pro."
+        elif "safety" in error_msg:
+            return "🛡️ Content blocked by safety filters. Please rephrase your question."
+        else:
+            return f"❌ AI service error: {str(e)}"
+# Load data with better error handling
+def load_data(uploaded_file):
+    """Load data with robust error handling"""
+    try:
+        # Try different separators and encodings
+        for sep in ['\t', ',', ';']:
+            for encoding in ['utf-8', 'latin-1', 'cp1252']:
+                try:
+                    df = pd.read_csv(uploaded_file, sep=sep, encoding=encoding)
+                    if len(df.columns) >= 3:  # Minimum expected columns
+                        return df
+                except:
+                    continue
+        # If all attempts fail, try with default settings
+        return pd.read_csv(uploaded_file)
+    except Exception as e:
+        st.error(f"Error loading file: {str(e)}")
+        return None
+# Main app
+def main():
+    st.title("🏭 Production Data Analysis Dashboard")
+    st.markdown("Upload your production data and get AI-powered insights")
+    # Initialize Gemini
+    model = init_gemini()
+    # Sidebar
+    with st.sidebar:
+        st.header("📊 Data Upload")
+        uploaded_file = st.file_uploader("Choose CSV file", type=['csv'])
+        if model:
+            st.success("🤖 Gemini AI Assistant Ready")
+        else:
+            st.warning("⚠️ Gemini AI Assistant unavailable")
+            with st.expander("🔧 API Configuration Help"):
+                st.markdown("""
+                **Steps to enable Gemini AI:**
+                1. **Get FREE API Key**:
+                   - Visit [Google AI Studio](https://aistudio.google.com/app/apikey)
+                   - Sign in with Google account
+                   - Create a new API Key (FREE)
+                2. **Set API Key**:
+                   ```bash
+                   # Environment variable
+                   export GOOGLE_API_KEY="your_api_key_here"
+                   ```
+                3. **Free Tier Limits**:
+                   - ✅ Gemini 1.5 Flash: 15 requests/minute (FREE)
+                   - ⚠️ Gemini 1.5 Pro: May require billing
+                   - 📊 Daily quota: Generous for testing
+                4. **Troubleshooting 403 Errors**:
+                   - Check API Key is correctly copied
+                   - Ensure no extra spaces in key
+                   - Verify Google AI Studio access
+                   - Try creating a new API Key
+                   - Check if region is supported
+                """)
+                # Simplified API Key testing
+                st.markdown("**🧪 Quick API Test**")
+                test_key = st.text_input("Paste API Key to test:", type="password", key="api_test")
+                if st.button("Test Key", key="test_btn") and test_key:
+                    with st.spinner("Testing..."):
+                        try:
+                            import google.generativeai as genai
+                            genai.configure(api_key=test_key.strip())
+                            test_model = genai.GenerativeModel('gemini-1.5-flash-latest')
+                            response = test_model.generate_content("Hello")
+                            if response.text:
+                                st.success("✅ API Key works!")
+                            else:
+                                st.error("❌ No response")
+                        except Exception as e:
+                            if "403" in str(e):
+                                st.error("❌ Access denied - Check key validity")
+                            elif "quota" in str(e).lower():
+                                st.error("❌ Quota exceeded - Try again later")
+                            else:
+                                st.error(f"❌ Error: {str(e)}")
+    if uploaded_file is not None:
+        # Load and process data
+        df = load_data(uploaded_file)
+        if df is not None:
+            try:
+                df = process_data(df)
+                # Validate required columns
+                required_cols = ['date', 'weight_kg', 'material_type']
+                missing_cols = [col for col in required_cols if col not in df.columns]
+                if missing_cols:
+                    st.error(f"Missing required columns: {missing_cols}")
+                    st.info("Available columns: " + ", ".join(df.columns.tolist()))
+                    return
+                # Generate analysis
+                summary = generate_summary(df)
+                if not summary:
+                    return
+                anomalies = detect_anomalies(df)
+                plots = create_plots(df)
+                # Display results
+                col1, col2, col3, col4 = st.columns(4)
+                with col1:
+                    st.metric("Total Production", f"{summary['total_production']:,.0f} kg")
+                with col2:
+                    st.metric("Daily Average", f"{summary['daily_avg']:,.0f} kg")
+                with col3:
+                    st.metric("Production Days", summary['production_days'])
+                with col4:
+                    st.metric("Material Types", len(summary['materials']))
+                # Charts
+                st.subheader("📈 Production Trends")
+                col1, col2 = st.columns(2)
+                with col1:
+                    if 'overview' in plots:
+                        st.plotly_chart(plots['overview'], use_container_width=True)
+                with col2:
+                    if 'materials' in plots:
+                        st.plotly_chart(plots['materials'], use_container_width=True)
+                col3, col4 = st.columns(2)
+                with col3:
+                    if 'weekly' in plots:
+                        st.plotly_chart(plots['weekly'], use_container_width=True)
+                with col4:
+                    if 'correlation' in plots:
+                        st.plotly_chart(plots['correlation'], use_container_width=True)
+                # Material breakdown
+                st.subheader("📋 Material Analysis")
+                for material, stats in summary['materials'].items():
+                    with st.expander(f"{material.title()} - {stats['total']:,.0f} kg ({stats['percentage']:.1f}%)"):
+                        col1, col2, col3 = st.columns(3)
+                        with col1:
+                            st.metric("Total Weight", f"{stats['total']:,.0f} kg")
+                        with col2:
+                            st.metric("Percentage", f"{stats['percentage']:.1f}%")
+                        with col3:
+                            st.metric("Records", stats['count'])
+                # Anomaly detection
+                st.subheader("⚠️ Anomaly Detection")
+                for material, anom in anomalies.items():
+                    if anom['count'] > 0:
+                        st.warning(f"**{material.title()}**: {anom['count']} anomalies detected")
+                        st.caption(f"Normal range: {anom['normal_range']}")
+                        if anom['dates']:
+                            st.caption(f"Recent anomaly dates: {', '.join(anom['dates'])}")
+                    else:
+                        st.success(f"**{material.title()}**: No anomalies detected")
+                # AI Chat Interface
+                if model:
+                    st.subheader("🤖 AI Production Assistant")
+                    # Predefined questions
+                    st.markdown("**Quick Questions:**")
+                    quick_questions = [
+                        "What are the key production trends?",
+                        "Which material type shows the best consistency?",
+                        "Are there any concerning patterns in the data?",
+                        "What recommendations do you have for optimization?"
+                    ]
+                    cols = st.columns(2)
+                    for i, question in enumerate(quick_questions):
+                        with cols[i % 2]:
+                            if st.button(question, key=f"q_{i}"):
+                                with st.spinner("AI analyzing..."):
+                                    answer = query_llm(model, summary, question)
+                                    st.success(f"**Q:** {question}")
+                                    st.write(f"**A:** {answer}")
+                    # Custom question
+                    st.markdown("**Ask a Custom Question:**")
+                    user_question = st.text_input("Your question about the production data:")
+                    if user_question and st.button("Get AI Answer"):
+                        with st.spinner("AI analyzing..."):
+                            answer = query_llm(model, summary, user_question)
+                            st.success(f"**Q:** {user_question}")
+                            st.write(f"**A:** {answer}")
+            except Exception as e:
+                st.error(f"Error processing file: {str(e)}")
+                st.info("Please ensure your CSV file has the required format.")
+    else:
+        st.info("👆 Please upload a CSV file to begin analysis")
+        st.markdown("""
+        ### 📋 Data Format Requirements
+        Your CSV file should contain:
+        - `date`: Date in MM/DD/YYYY format
+        - `weight_kg`: Production weight in kilograms
+        - `material_type`: Type of material (liquid, solid, waste_water, etc.)
+        - `shift`: Shift number (optional)
+        The file should be tab-separated (TSV format with .csv extension).
+        """)
+if __name__ == "__main__":
+    main()