Spaces:

entropy25
/

production-data-analysis

Sleeping

App Files Files Community

entropy25 commited on Aug 13, 2025

Commit

49f0473

verified ·

1 Parent(s): 3c88859

Update app.py

Browse files

Files changed (1) hide show

app.py +247 -177

app.py CHANGED Viewed

@@ -1,213 +1,283 @@
-import gradio as gr
 import pandas as pd
 import numpy as np
-import plotly.graph_objects as go
 import plotly.express as px
 from datetime import datetime
-import io
-def process_data(file):
-    """Process uploaded CSV file and generate comprehensive analysis"""
-    if file is None:
-        return "Please upload a CSV file", None, None, None, None, None
-    try:
-        # Read the uploaded file
-        df = pd.read_csv(file.name, sep='\t')
-        # Data preprocessing
-        df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
-        if 'original_date' in df.columns:
-            df['original_date'] = pd.to_datetime(df['original_date'], format='%d/%m/%Y', errors='ignore')
-        df['day_of_week'] = df['date'].dt.day_name()
-        df['week'] = df['date'].dt.isocalendar().week
-        df['month'] = df['date'].dt.month
-        df['is_weekend'] = df['day_of_week'].isin(['Saturday', 'Sunday'])
-        # Generate all analyses
-        summary_text = generate_summary(df)
-        overview_plot = create_overview_plot(df)
-        material_plot = create_material_analysis(df)
-        correlation_plot = create_correlation_analysis(df)
-        time_analysis_plot = create_time_analysis(df)
-        anomaly_report = detect_anomalies_report(df)
-        return summary_text, overview_plot, material_plot, correlation_plot, time_analysis_plot, anomaly_report
-    except Exception as e:
-        error_msg = f"Error processing file: {str(e)}\n\nPlease ensure your CSV file has the required columns: date, weight_kg, material_type"
-        return error_msg, None, None, None, None, None
 def generate_summary(df):
-    """Generate comprehensive summary statistics"""
     total_production = df['weight_kg'].sum()
     total_items = len(df)
     daily_avg = df.groupby('date')['weight_kg'].sum().mean()
-    summary = f"""# Production Data Analysis Report
-Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
-## Dataset Overview
-- **Total Records**: {total_items:,}
-- **Date Range**: {df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}
-- **Production Days**: {df['date'].nunique()}
-- **Total Production**: {total_production:,.0f} kg
-- **Daily Average**: {daily_avg:,.0f} kg
-## Material Type Breakdown"""
     for material in df['material_type'].unique():
         mat_data = df[df['material_type'] == material]
-        mat_total = mat_data['weight_kg'].sum()
-        mat_pct = mat_total / total_production * 100
-        mat_count = len(mat_data)
-        summary += f"\n- **{material.title()}**: {mat_total:,.0f} kg ({mat_pct:.1f}%) - {mat_count:,} records"
-    # Shift analysis
-    if 'shift' in df.columns:
-        shift_data = df.groupby('shift')['weight_kg'].agg(['sum', 'mean', 'count'])
-        summary += f"\n\n## Shift Performance"
-        for shift in shift_data.index:
-            summary += f"\n- **Shift {shift}**: {shift_data.loc[shift, 'sum']:,.0f} kg total, {shift_data.loc[shift, 'mean']:.1f} kg avg"
     return summary
-def create_overview_plot(df):
-    """Create overall production trend plot"""
-    daily_total = df.groupby('date')['weight_kg'].sum().reset_index()
-    fig = px.line(daily_total, x='date', y='weight_kg',
-                  title='Daily Production Trend',
-                  labels={'weight_kg': 'Total Weight (kg)', 'date': 'Date'},
-                  template='plotly_white')
-    fig.update_layout(height=400, showlegend=False)
-    return fig
-def create_material_analysis(df):
-    """Create material type comparison plots"""
-    daily_by_material = df.groupby(['date', 'material_type'])['weight_kg'].sum().reset_index()
-    fig = px.line(daily_by_material, x='date', y='weight_kg', color='material_type',
-                  title='Daily Production by Material Type',
-                  labels={'weight_kg': 'Weight (kg)', 'date': 'Date'},
-                  template='plotly_white')
-    fig.update_layout(height=400)
-    return fig
-def create_correlation_analysis(df):
-    """Create correlation matrix plot"""
-    daily_by_material = df.groupby(['date', 'material_type'])['weight_kg'].sum().unstack(fill_value=0)
-    if len(daily_by_material.columns) > 1:
-        correlation_matrix = daily_by_material.corr()
-        fig = px.imshow(correlation_matrix,
-                        title='Material Type Correlation Matrix',
-                        template='plotly_white',
-                        color_continuous_scale='RdBu',
-                        aspect='auto')
-        fig.update_layout(height=400)
-        return fig
-    else:
-        fig = go.Figure()
-        fig.add_annotation(text="Only one material type - correlation analysis not applicable",
-                          xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
-        fig.update_layout(title="Material Type Correlation Matrix", height=400)
-        return fig
-def create_time_analysis(df):
-    """Create time pattern analysis"""
     weekly_pattern = df.groupby(['day_of_week', 'material_type'])['weight_kg'].mean().reset_index()
     day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
     weekly_pattern['day_of_week'] = pd.Categorical(weekly_pattern['day_of_week'], categories=day_order, ordered=True)
     weekly_pattern = weekly_pattern.sort_values('day_of_week')
-    fig = px.bar(weekly_pattern, x='day_of_week', y='weight_kg', color='material_type',
-                 title='Weekly Production Pattern (Average by Day)',
-                 labels={'weight_kg': 'Average Weight (kg)', 'day_of_week': 'Day of Week'},
-                 template='plotly_white')
-    fig.update_layout(height=400)
-    return fig
-def detect_anomalies_report(df):
-    """Generate anomaly detection report"""
-    def detect_outliers(data, column='weight_kg'):
-        Q1 = data[column].quantile(0.25)
-        Q3 = data[column].quantile(0.75)
-        IQR = Q3 - Q1
-        lower_bound = Q1 - 1.5 * IQR
-        upper_bound = Q3 + 1.5 * IQR
-        anomalies = data[(data[column] < lower_bound) | (data[column] > upper_bound)]
-        return anomalies, lower_bound, upper_bound
-    report = "# Anomaly Detection Report\n\n"
-    for material in df['material_type'].unique():
-        material_data = df[df['material_type'] == material]
-        anomalies, lower, upper = detect_outliers(material_data)
-        report += f"## {material.title()} Material\n"
-        report += f"- **Normal Range**: {lower:.1f} - {upper:.1f} kg\n"
-        report += f"- **Anomalies Detected**: {len(anomalies)}\n"
-        if len(anomalies) > 0:
-            dates_list = anomalies['date'].dt.strftime('%Y-%m-%d').head(10).tolist()
-            report += f"- **Anomaly Dates**: {', '.join(dates_list)}\n"
-            if len(anomalies) > 10:
-                report += f"  ... and {len(anomalies) - 10} more\n"
-        report += "\n"
-    return report
-# Create Gradio interface
-with gr.Blocks(title="Production Data Analysis", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🏭 Production Data Analysis Dashboard")
-    gr.Markdown("Upload your production data CSV file to generate comprehensive analysis reports and visualizations.")
-    with gr.Row():
-        file_input = gr.File(
-            label="Upload CSV File",
-            file_types=[".csv"],
-            type="filepath"
-        )
-    analyze_btn = gr.Button("Analyze Data", variant="primary", size="lg")
-    with gr.Row():
-        with gr.Column(scale=1):
-            summary_output = gr.Markdown(label="Summary Report")
-            anomaly_output = gr.Markdown(label="Anomaly Report")
-    with gr.Row():
-        with gr.Column():
-            overview_plot = gr.Plot(label="Production Overview")
-            correlation_plot = gr.Plot(label="Correlation Analysis")
-        with gr.Column():
-            material_plot = gr.Plot(label="Material Analysis")
-            time_plot = gr.Plot(label="Time Pattern Analysis")
-    analyze_btn.click(
-        fn=process_data,
-        inputs=[file_input],
-        outputs=[summary_output, overview_plot, material_plot, correlation_plot, time_plot, anomaly_output]
-    )
-    gr.Markdown("""
-    ## Data Format Requirements
-    Your CSV file should contain the following columns:
-    - `date`: Date in MM/DD/YYYY format
-    - `weight_kg`: Production weight in kilograms
-    - `material_type`: Type of material (e.g., liquid, solid, waste_water)
-    - `shift`: Shift number (optional)
-    - `number`: Item number (optional)
-    The file should be tab-separated (TSV format with .csv extension).
-    """)
 if __name__ == "__main__":
-    demo.launch()

+import streamlit as st
 import pandas as pd
 import numpy as np
 import plotly.express as px
+import plotly.graph_objects as go
 from datetime import datetime
+import google.generativeai as genai
+import json
+# Page config
+st.set_page_config(
+    page_title="Production Data Analysis",
+    page_icon="🏭",
+    layout="wide"
+)
+# Initialize Gemini
+@st.cache_resource
+def init_gemini():
+    api_key = st.secrets.get("GOOGLE_API_KEY", "")
+    if api_key:
+        genai.configure(api_key=api_key)
+        return genai.GenerativeModel('gemini-1.5-flash')
+    return None
+# Data processing functions
+@st.cache_data
+def process_data(df):
+    """Process and analyze production data"""
+    df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
+    df['day_of_week'] = df['date'].dt.day_name()
+    df['week'] = df['date'].dt.isocalendar().week
+    df['month'] = df['date'].dt.month
+    df['is_weekend'] = df['day_of_week'].isin(['Saturday', 'Sunday'])
+    return df
 def generate_summary(df):
+    """Generate summary statistics"""
     total_production = df['weight_kg'].sum()
     total_items = len(df)
     daily_avg = df.groupby('date')['weight_kg'].sum().mean()
+    summary = {
+        'total_production': total_production,
+        'total_items': total_items,
+        'daily_avg': daily_avg,
+        'date_range': f"{df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}",
+        'production_days': df['date'].nunique()
+    }
+    # Material breakdown
+    material_stats = {}
     for material in df['material_type'].unique():
         mat_data = df[df['material_type'] == material]
+        material_stats[material] = {
+            'total': mat_data['weight_kg'].sum(),
+            'percentage': mat_data['weight_kg'].sum() / total_production * 100,
+            'count': len(mat_data)
+        }
+    summary['materials'] = material_stats
     return summary
+def detect_anomalies(df):
+    """Detect production anomalies"""
+    anomalies = {}
+    for material in df['material_type'].unique():
+        mat_data = df[df['material_type'] == material]
+        Q1 = mat_data['weight_kg'].quantile(0.25)
+        Q3 = mat_data['weight_kg'].quantile(0.75)
+        IQR = Q3 - Q1
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+        outliers = mat_data[(mat_data['weight_kg'] < lower_bound) |
+                           (mat_data['weight_kg'] > upper_bound)]
+        anomalies[material] = {
+            'count': len(outliers),
+            'normal_range': f"{lower_bound:.1f} - {upper_bound:.1f} kg",
+            'dates': outliers['date'].dt.strftime('%Y-%m-%d').tolist()[:5]
+        }
+    return anomalies
+def create_plots(df):
+    """Create all visualization plots"""
+    plots = {}
+    # Daily production trend
+    daily_total = df.groupby('date')['weight_kg'].sum().reset_index()
+    plots['overview'] = px.line(
+        daily_total, x='date', y='weight_kg',
+        title='Daily Production Trend',
+        labels={'weight_kg': 'Total Weight (kg)', 'date': 'Date'}
+    )
+    # Material comparison
+    daily_by_material = df.groupby(['date', 'material_type'])['weight_kg'].sum().reset_index()
+    plots['materials'] = px.line(
+        daily_by_material, x='date', y='weight_kg', color='material_type',
+        title='Production by Material Type'
+    )
+    # Weekly pattern
     weekly_pattern = df.groupby(['day_of_week', 'material_type'])['weight_kg'].mean().reset_index()
     day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
     weekly_pattern['day_of_week'] = pd.Categorical(weekly_pattern['day_of_week'], categories=day_order, ordered=True)
     weekly_pattern = weekly_pattern.sort_values('day_of_week')
+    plots['weekly'] = px.bar(
+        weekly_pattern, x='day_of_week', y='weight_kg', color='material_type',
+        title='Weekly Production Pattern'
+    )
+    # Correlation matrix
+    daily_pivot = df.groupby(['date', 'material_type'])['weight_kg'].sum().unstack(fill_value=0)
+    if len(daily_pivot.columns) > 1:
+        corr_matrix = daily_pivot.corr()
+        plots['correlation'] = px.imshow(
+            corr_matrix, title='Material Type Correlation Matrix',
+            color_continuous_scale='RdBu'
+        )
+    return plots
+def query_llm(model, data_summary, user_question):
+    """Query Gemini with production data context"""
+    context = f"""
+    You are a production data analyst. Here's the current production data summary:
+    Production Overview:
+    - Total Production: {data_summary['total_production']:,.0f} kg
+    - Production Period: {data_summary['date_range']}
+    - Daily Average: {data_summary['daily_avg']:,.0f} kg
+    - Production Days: {data_summary['production_days']}
+    Material Breakdown:
+    """
+    for material, stats in data_summary['materials'].items():
+        context += f"- {material.title()}: {stats['total']:,.0f} kg ({stats['percentage']:.1f}%)\n"
+    context += f"\nUser Question: {user_question}\n\nPlease provide a concise, data-driven answer based on this production data."
+    try:
+        response = model.generate_content(context)
+        return response.text
+    except Exception as e:
+        return f"Error querying AI: {str(e)}"
+# Main app
+def main():
+    st.title("🏭 Production Data Analysis Dashboard")
+    st.markdown("Upload your production data and get AI-powered insights")
+    # Initialize Gemini
+    model = init_gemini()
+    # Sidebar
+    with st.sidebar:
+        st.header("📊 Data Upload")
+        uploaded_file = st.file_uploader("Choose CSV file", type=['csv'])
+        if model:
+            st.success("🤖 AI Assistant Ready")
+        else:
+            st.warning("⚠️ AI Assistant unavailable (API key needed)")
+    if uploaded_file is not None:
+        # Load and process data
+        try:
+            df = pd.read_csv(uploaded_file, sep='\t')
+            df = process_data(df)
+            # Generate analysis
+            summary = generate_summary(df)
+            anomalies = detect_anomalies(df)
+            plots = create_plots(df)
+            # Display results
+            col1, col2, col3, col4 = st.columns(4)
+            with col1:
+                st.metric("Total Production", f"{summary['total_production']:,.0f} kg")
+            with col2:
+                st.metric("Daily Average", f"{summary['daily_avg']:,.0f} kg")
+            with col3:
+                st.metric("Production Days", summary['production_days'])
+            with col4:
+                st.metric("Material Types", len(summary['materials']))
+            # Charts
+            st.subheader("📈 Production Trends")
+            col1, col2 = st.columns(2)
+            with col1:
+                st.plotly_chart(plots['overview'], use_container_width=True)
+            with col2:
+                st.plotly_chart(plots['materials'], use_container_width=True)
+            col3, col4 = st.columns(2)
+            with col3:
+                st.plotly_chart(plots['weekly'], use_container_width=True)
+            with col4:
+                if 'correlation' in plots:
+                    st.plotly_chart(plots['correlation'], use_container_width=True)
+            # Material breakdown
+            st.subheader("📋 Material Analysis")
+            for material, stats in summary['materials'].items():
+                with st.expander(f"{material.title()} - {stats['total']:,.0f} kg ({stats['percentage']:.1f}%)"):
+                    col1, col2, col3 = st.columns(3)
+                    with col1:
+                        st.metric("Total Weight", f"{stats['total']:,.0f} kg")
+                    with col2:
+                        st.metric("Percentage", f"{stats['percentage']:.1f}%")
+                    with col3:
+                        st.metric("Records", stats['count'])
+            # Anomaly detection
+            st.subheader("⚠️ Anomaly Detection")
+            for material, anom in anomalies.items():
+                if anom['count'] > 0:
+                    st.warning(f"**{material.title()}**: {anom['count']} anomalies detected")
+                    st.caption(f"Normal range: {anom['normal_range']}")
+                    if anom['dates']:
+                        st.caption(f"Recent anomaly dates: {', '.join(anom['dates'])}")
+                else:
+                    st.success(f"**{material.title()}**: No anomalies detected")
+            # AI Chat Interface
+            if model:
+                st.subheader("🤖 AI Production Assistant")
+                # Predefined questions
+                st.markdown("**Quick Questions:**")
+                quick_questions = [
+                    "What are the key production trends?",
+                    "Which material type shows the best consistency?",
+                    "Are there any concerning patterns in the data?",
+                    "What recommendations do you have for optimization?"
+                ]
+                cols = st.columns(2)
+                for i, question in enumerate(quick_questions):
+                    with cols[i % 2]:
+                        if st.button(question, key=f"q_{i}"):
+                            with st.spinner("AI analyzing..."):
+                                answer = query_llm(model, summary, question)
+                                st.success(f"**Q:** {question}")
+                                st.write(f"**A:** {answer}")
+                # Custom question
+                st.markdown("**Ask a Custom Question:**")
+                user_question = st.text_input("Your question about the production data:")
+                if user_question and st.button("Get AI Answer"):
+                    with st.spinner("AI analyzing..."):
+                        answer = query_llm(model, summary, user_question)
+                        st.success(f"**Q:** {user_question}")
+                        st.write(f"**A:** {answer}")
+        except Exception as e:
+            st.error(f"Error processing file: {str(e)}")
+            st.info("Please ensure your CSV file has columns: date, weight_kg, material_type")
+    else:
+        st.info("👆 Please upload a CSV file to begin analysis")
+        st.markdown("""
+        ### 📋 Data Format Requirements
+        Your CSV file should contain:
+        - `date`: Date in MM/DD/YYYY format
+        - `weight_kg`: Production weight in kilograms
+        - `material_type`: Type of material (liquid, solid, waste_water, etc.)
+        - `shift`: Shift number (optional)
+        The file should be tab-separated (TSV format with .csv extension).
+        """)
 if __name__ == "__main__":
+    main()