Spaces:

entropy25
/

production-data-analysis

Running

App Files Files Community

entropy25 commited on Aug 13, 2025

Commit

dc6daaa

verified ·

1 Parent(s): 4a7ad8a

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -229

app.py CHANGED Viewed

@@ -3,280 +3,201 @@ import pandas as pd
 import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
-from datetime import datetime
 import google.generativeai as genai
-import json
 # Page config
-st.set_page_config(
-    page_title="Production Data Analysis",
-    page_icon="🏭",
-    layout="wide"
-)
-# Initialize Gemini
 @st.cache_resource
-def init_gemini():
     api_key = st.secrets.get("GOOGLE_API_KEY", "")
     if api_key:
         genai.configure(api_key=api_key)
         return genai.GenerativeModel('gemini-1.5-flash')
     return None
-# Data processing functions
 @st.cache_data
-def process_data(df):
-    """Process and analyze production data"""
     df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
-    df['day_of_week'] = df['date'].dt.day_name()
-    df['week'] = df['date'].dt.isocalendar().week
-    df['month'] = df['date'].dt.month
-    df['is_weekend'] = df['day_of_week'].isin(['Saturday', 'Sunday'])
     return df
-def generate_summary(df):
-    """Generate summary statistics"""
-    total_production = df['weight_kg'].sum()
-    total_items = len(df)
-    daily_avg = df.groupby('date')['weight_kg'].sum().mean()
-    summary = {
-        'total_production': total_production,
-        'total_items': total_items,
-        'daily_avg': daily_avg,
-        'date_range': f"{df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}",
-        'production_days': df['date'].nunique()
-    }
-    # Material breakdown
-    material_stats = {}
     for material in df['material_type'].unique():
-        mat_data = df[df['material_type'] == material]
-        material_stats[material] = {
-            'total': mat_data['weight_kg'].sum(),
-            'percentage': mat_data['weight_kg'].sum() / total_production * 100,
-            'count': len(mat_data)
         }
-    summary['materials'] = material_stats
-    return summary
-def detect_anomalies(df):
-    """Detect production anomalies"""
-    anomalies = {}
     for material in df['material_type'].unique():
-        mat_data = df[df['material_type'] == material]
-        Q1 = mat_data['weight_kg'].quantile(0.25)
-        Q3 = mat_data['weight_kg'].quantile(0.75)
         IQR = Q3 - Q1
-        lower_bound = Q1 - 1.5 * IQR
-        upper_bound = Q3 + 1.5 * IQR
-        outliers = mat_data[(mat_data['weight_kg'] < lower_bound) |
-                           (mat_data['weight_kg'] > upper_bound)]
-        anomalies[material] = {
-            'count': len(outliers),
-            'normal_range': f"{lower_bound:.1f} - {upper_bound:.1f} kg",
-            'dates': outliers['date'].dt.strftime('%Y-%m-%d').tolist()[:5]
         }
-    return anomalies
-def create_plots(df):
-    """Create all visualization plots"""
-    plots = {}
-    # Daily production trend
-    daily_total = df.groupby('date')['weight_kg'].sum().reset_index()
-    plots['overview'] = px.line(
-        daily_total, x='date', y='weight_kg',
-        title='Daily Production Trend',
-        labels={'weight_kg': 'Total Weight (kg)', 'date': 'Date'}
-    )
-    # Material comparison
-    daily_by_material = df.groupby(['date', 'material_type'])['weight_kg'].sum().reset_index()
-    plots['materials'] = px.line(
-        daily_by_material, x='date', y='weight_kg', color='material_type',
-        title='Production by Material Type'
-    )
-    # Weekly pattern
-    weekly_pattern = df.groupby(['day_of_week', 'material_type'])['weight_kg'].mean().reset_index()
-    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
-    weekly_pattern['day_of_week'] = pd.Categorical(weekly_pattern['day_of_week'], categories=day_order, ordered=True)
-    weekly_pattern = weekly_pattern.sort_values('day_of_week')
-    plots['weekly'] = px.bar(
-        weekly_pattern, x='day_of_week', y='weight_kg', color='material_type',
-        title='Weekly Production Pattern'
-    )
-    # Correlation matrix
-    daily_pivot = df.groupby(['date', 'material_type'])['weight_kg'].sum().unstack(fill_value=0)
-    if len(daily_pivot.columns) > 1:
-        corr_matrix = daily_pivot.corr()
-        plots['correlation'] = px.imshow(
-            corr_matrix, title='Material Type Correlation Matrix',
-            color_continuous_scale='RdBu'
-        )
-    return plots
-def query_llm(model, data_summary, user_question):
-    """Query Gemini with production data context"""
-    context = f"""
-    You are a production data analyst. Here's the current production data summary:
-    Production Overview:
-    - Total Production: {data_summary['total_production']:,.0f} kg
-    - Production Period: {data_summary['date_range']}
-    - Daily Average: {data_summary['daily_avg']:,.0f} kg
-    - Production Days: {data_summary['production_days']}
-    Material Breakdown:
-    """
-    for material, stats in data_summary['materials'].items():
-        context += f"- {material.title()}: {stats['total']:,.0f} kg ({stats['percentage']:.1f}%)\n"
-    context += f"\nUser Question: {user_question}\n\nPlease provide a concise, data-driven answer based on this production data."
     try:
         response = model.generate_content(context)
         return response.text
-    except Exception as e:
-        return f"Error querying AI: {str(e)}"
 # Main app
 def main():
-    st.title("🏭 Production Data Analysis Dashboard")
-    st.markdown("Upload your production data and get AI-powered insights")
-    # Initialize Gemini
-    model = init_gemini()
-    # Sidebar
     with st.sidebar:
-        st.header("📊 Data Upload")
-        uploaded_file = st.file_uploader("Choose CSV file", type=['csv'])
         if model:
-            st.success("🤖 AI Assistant Ready")
         else:
-            st.warning("⚠️ AI Assistant unavailable (API key needed)")
-    if uploaded_file is not None:
-        # Load and process data
-        try:
-            df = pd.read_csv(uploaded_file, sep='\t')
-            df = process_data(df)
-            # Generate analysis
-            summary = generate_summary(df)
-            anomalies = detect_anomalies(df)
-            plots = create_plots(df)
-            # Display results
-            col1, col2, col3, col4 = st.columns(4)
-            with col1:
-                st.metric("Total Production", f"{summary['total_production']:,.0f} kg")
-            with col2:
-                st.metric("Daily Average", f"{summary['daily_avg']:,.0f} kg")
-            with col3:
-                st.metric("Production Days", summary['production_days'])
-            with col4:
-                st.metric("Material Types", len(summary['materials']))
-            # Charts
-            st.subheader("📈 Production Trends")
-            col1, col2 = st.columns(2)
-            with col1:
-                st.plotly_chart(plots['overview'], use_container_width=True)
-            with col2:
-                st.plotly_chart(plots['materials'], use_container_width=True)
-            col3, col4 = st.columns(2)
-            with col3:
-                st.plotly_chart(plots['weekly'], use_container_width=True)
-            with col4:
-                if 'correlation' in plots:
-                    st.plotly_chart(plots['correlation'], use_container_width=True)
-            # Material breakdown
-            st.subheader("📋 Material Analysis")
-            for material, stats in summary['materials'].items():
-                with st.expander(f"{material.title()} - {stats['total']:,.0f} kg ({stats['percentage']:.1f}%)"):
-                    col1, col2, col3 = st.columns(3)
-                    with col1:
-                        st.metric("Total Weight", f"{stats['total']:,.0f} kg")
-                    with col2:
-                        st.metric("Percentage", f"{stats['percentage']:.1f}%")
-                    with col3:
-                        st.metric("Records", stats['count'])
-            # Anomaly detection
-            st.subheader("⚠️ Anomaly Detection")
-            for material, anom in anomalies.items():
-                if anom['count'] > 0:
-                    st.warning(f"**{material.title()}**: {anom['count']} anomalies detected")
-                    st.caption(f"Normal range: {anom['normal_range']}")
-                    if anom['dates']:
-                        st.caption(f"Recent anomaly dates: {', '.join(anom['dates'])}")
                 else:
-                    st.success(f"**{material.title()}**: No anomalies detected")
-            # AI Chat Interface
-            if model:
-                st.subheader("🤖 AI Production Assistant")
-                # Predefined questions
-                st.markdown("**Quick Questions:**")
-                quick_questions = [
-                    "What are the key production trends?",
-                    "Which material type shows the best consistency?",
-                    "Are there any concerning patterns in the data?",
-                    "What recommendations do you have for optimization?"
-                ]
-                cols = st.columns(2)
-                for i, question in enumerate(quick_questions):
-                    with cols[i % 2]:
-                        if st.button(question, key=f"q_{i}"):
-                            with st.spinner("AI analyzing..."):
-                                answer = query_llm(model, summary, question)
-                                st.success(f"**Q:** {question}")
-                                st.write(f"**A:** {answer}")
-                # Custom question
-                st.markdown("**Ask a Custom Question:**")
-                user_question = st.text_input("Your question about the production data:")
-                if user_question and st.button("Get AI Answer"):
-                    with st.spinner("AI analyzing..."):
-                        answer = query_llm(model, summary, user_question)
-                        st.success(f"**Q:** {user_question}")
-                        st.write(f"**A:** {answer}")
-        except Exception as e:
-            st.error(f"Error processing file: {str(e)}")
-            st.info("Please ensure your CSV file has columns: date, weight_kg, material_type")
     else:
-        st.info("👆 Please upload a CSV file to begin analysis")
         st.markdown("""
-        ### 📋 Data Format Requirements
-        Your CSV file should contain:
-        - `date`: Date in MM/DD/YYYY format
-        - `weight_kg`: Production weight in kilograms
-        - `material_type`: Type of material (liquid, solid, waste_water, etc.)
-        - `shift`: Shift number (optional)
-        The file should be tab-separated (TSV format with .csv extension).
         """)
 if __name__ == "__main__":

 import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
+from datetime import datetime, timedelta
 import google.generativeai as genai
 # Page config
+st.set_page_config(page_title="Production Monitor", page_icon="🏭", layout="wide")
 @st.cache_resource
+def init_ai():
     api_key = st.secrets.get("GOOGLE_API_KEY", "")
     if api_key:
         genai.configure(api_key=api_key)
         return genai.GenerativeModel('gemini-1.5-flash')
     return None
 @st.cache_data
+def load_data(file):
+    df = pd.read_csv(file, sep='\t')
     df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
+    df['day_name'] = df['date'].dt.day_name()
     return df
+def get_material_stats(df):
+    stats = {}
+    total = df['weight_kg'].sum()
     for material in df['material_type'].unique():
+        data = df[df['material_type'] == material]
+        daily_avg = data.groupby('date')['weight_kg'].sum().mean()
+        stats[material] = {
+            'total': data['weight_kg'].sum(),
+            'percentage': (data['weight_kg'].sum() / total) * 100,
+            'daily_avg': daily_avg,
+            'records': len(data)
         }
+    return stats
+def create_trend_chart(df, time_period='daily', material_filter=None):
+    if material_filter:
+        df = df[df['material_type'].isin(material_filter)]
+    if time_period == 'daily':
+        grouped = df.groupby(['date', 'material_type'])['weight_kg'].sum().reset_index()
+        fig = px.line(grouped, x='date', y='weight_kg', color='material_type',
+                     title='Daily Production Trend')
+    elif time_period == 'shift':
+        grouped = df.groupby(['date', 'shift', 'material_type'])['weight_kg'].sum().reset_index()
+        fig = px.bar(grouped, x='date', y='weight_kg', color='shift',
+                    facet_col='material_type', title='Production by Shift')
+    else:  # weekly
+        df['week'] = df['date'].dt.isocalendar().week
+        grouped = df.groupby(['week', 'material_type'])['weight_kg'].sum().reset_index()
+        fig = px.bar(grouped, x='week', y='weight_kg', color='material_type',
+                    title='Weekly Production')
+    fig.update_layout(height=400)
+    return fig
+def detect_outliers(df):
+    outliers = {}
     for material in df['material_type'].unique():
+        data = df[df['material_type'] == material]['weight_kg']
+        Q1, Q3 = data.quantile(0.25), data.quantile(0.75)
         IQR = Q3 - Q1
+        lower, upper = Q1 - 1.5 * IQR, Q3 + 1.5 * IQR
+        outlier_count = len(data[(data < lower) | (data > upper)])
+        outliers[material] = {
+            'count': outlier_count,
+            'range': f"{lower:.0f} - {upper:.0f} kg"
         }
+    return outliers
+def query_ai(model, stats, question):
+    if not model:
+        return "AI assistant not available"
+    context = f"""Production Data Summary:
+    {chr(10).join([f"- {mat.title()}: {info['total']:,.0f}kg ({info['percentage']:.1f}%)"
+                  for mat, info in stats.items()])}
+    Question: {question}
+    Answer concisely based on the data:"""
     try:
         response = model.generate_content(context)
         return response.text
+    except:
+        return "Error getting AI response"
 # Main app
 def main():
+    st.title("🏭 Production Monitor")
+    st.markdown("*Real-time production analysis dashboard*")
+    model = init_ai()
+    # Sidebar controls
     with st.sidebar:
+        st.header("📊 Controls")
+        uploaded_file = st.file_uploader("Upload Data", type=['csv'])
         if model:
+            st.success("🤖 AI Ready")
         else:
+            st.warning("⚠️ AI Unavailable")
+    if uploaded_file:
+        df = load_data(uploaded_file)
+        stats = get_material_stats(df)
+        # Material cards
+        st.subheader("📋 Material Overview")
+        cols = st.columns(len(stats))
+        for i, (material, info) in enumerate(stats.items()):
+            with cols[i]:
+                st.metric(
+                    label=material.replace('_', ' ').title(),
+                    value=f"{info['total']:,.0f} kg",
+                    delta=f"{info['percentage']:.1f}% of total"
+                )
+                st.caption(f"Daily avg: {info['daily_avg']:,.0f} kg")
+        # Chart controls
+        st.subheader("📈 Trends")
+        col1, col2 = st.columns([3, 1])
+        with col2:
+            time_view = st.selectbox("Time View", ["daily", "weekly", "shift"])
+            materials = st.multiselect(
+                "Materials",
+                options=list(stats.keys()),
+                default=list(stats.keys())
+            )
+        with col1:
+            if materials:
+                chart = create_trend_chart(df, time_view, materials)
+                st.plotly_chart(chart, use_container_width=True)
+        # Shift analysis
+        if 'shift' in df.columns:
+            st.subheader("🌓 Shift Analysis")
+            shift_data = df.groupby(['shift', 'material_type'])['weight_kg'].sum().reset_index()
+            shift_chart = px.bar(shift_data, x='shift', y='weight_kg', color='material_type',
+                               title='Production by Shift')
+            st.plotly_chart(shift_chart, use_container_width=True)
+        # Anomaly detection
+        st.subheader("⚠️ Quality Check")
+        outliers = detect_outliers(df)
+        alert_cols = st.columns(len(outliers))
+        for i, (material, info) in enumerate(outliers.items()):
+            with alert_cols[i]:
+                if info['count'] > 0:
+                    st.warning(f"**{material.title()}**: {info['count']} outliers")
+                    st.caption(f"Normal: {info['range']}")
                 else:
+                    st.success(f"**{material.title()}**: All normal")
+        # AI Assistant
+        if model:
+            st.subheader("🤖 AI Insights")
+            # Quick questions
+            quick_q = [
+                "What's the production trend?",
+                "Which material is most consistent?",
+                "Any efficiency recommendations?"
+            ]
+            cols = st.columns(len(quick_q))
+            for i, q in enumerate(quick_q):
+                with cols[i]:
+                    if st.button(q, key=f"q{i}"):
+                        answer = query_ai(model, stats, q)
+                        st.info(answer)
+            # Custom question
+            custom_q = st.text_input("Ask anything about your data:")
+            if custom_q:
+                if st.button("Ask"):
+                    answer = query_ai(model, stats, custom_q)
+                    st.success(f"**Q:** {custom_q}")
+                    st.write(f"**A:** {answer}")
     else:
+        st.info("📁 Upload your production data to start")
         st.markdown("""
+        **Expected format (TSV):**
+        - `date`: MM/DD/YYYY
+        - `weight_kg`: Production weight
+        - `material_type`: Material category
+        - `shift`: day/night (optional)
         """)
 if __name__ == "__main__":