Spaces:

entropy25
/

production-data-analysis

Sleeping

App Files Files Community

entropy25 commited on Aug 13, 2025

Commit

e2d32d3

verified ·

1 Parent(s): 83f0e1a

Create app.py

Browse files

Files changed (1) hide show

app.py +225 -0

app.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import gradio as gr
+import pandas as pd
+import numpy as np
+import plotly.graph_objects as go
+import plotly.express as px
+from plotly.subplots import make_subplots
+import warnings
+from datetime import datetime
+import io
+warnings.filterwarnings('ignore')
+def process_data(file):
+    """Process uploaded CSV file and generate comprehensive analysis"""
+    if file is None:
+        return "Please upload a CSV file", None, None, None, None, None
+    try:
+        # Read the uploaded file
+        df = pd.read_csv(file.name, sep='\t')
+        # Data preprocessing
+        df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
+        if 'original_date' in df.columns:
+            df['original_date'] = pd.to_datetime(df['original_date'], format='%d/%m/%Y')
+        df['day_of_week'] = df['date'].dt.day_name()
+        df['week'] = df['date'].dt.isocalendar().week
+        df['month'] = df['date'].dt.month
+        df['is_weekend'] = df['day_of_week'].isin(['Saturday', 'Sunday'])
+        # Generate all analyses
+        summary_text = generate_summary(df)
+        overview_plot = create_overview_plot(df)
+        material_plot = create_material_analysis(df)
+        correlation_plot = create_correlation_analysis(df)
+        time_analysis_plot = create_time_analysis(df)
+        anomaly_report = detect_anomalies_report(df)
+        return summary_text, overview_plot, material_plot, correlation_plot, time_analysis_plot, anomaly_report
+    except Exception as e:
+        return f"Error processing file: {str(e)}", None, None, None, None, None
+def generate_summary(df):
+    """Generate comprehensive summary statistics"""
+    total_production = df['weight_kg'].sum()
+    total_items = len(df)
+    daily_avg = df.groupby('date')['weight_kg'].sum().mean()
+    summary = f"""
+# Production Data Analysis Report
+Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
+## Dataset Overview
+- **Total Records**: {total_items:,}
+- **Date Range**: {df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}
+- **Production Days**: {df['date'].nunique()}
+- **Total Production**: {total_production:,.0f} kg
+- **Daily Average**: {daily_avg:,.0f} kg
+## Material Type Breakdown
+"""
+    for material in df['material_type'].unique():
+        mat_data = df[df['material_type'] == material]
+        mat_total = mat_data['weight_kg'].sum()
+        mat_pct = mat_total / total_production * 100
+        mat_count = len(mat_data)
+        summary += f"- **{material.title()}**: {mat_total:,.0f} kg ({mat_pct:.1f}%) - {mat_count:,} records\n"
+    # Shift analysis
+    if 'shift' in df.columns:
+        shift_data = df.groupby('shift')['weight_kg'].agg(['sum', 'mean', 'count'])
+        summary += f"\n## Shift Performance\n"
+        for shift in shift_data.index:
+            summary += f"- **Shift {shift}**: {shift_data.loc[shift, 'sum']:,.0f} kg total, {shift_data.loc[shift, 'mean']:.1f} kg avg, {shift_data.loc[shift, 'count']} records\n"
+    return summary
+def create_overview_plot(df):
+    """Create overall production trend plot"""
+    daily_total = df.groupby('date')['weight_kg'].sum().reset_index()
+    fig = px.line(daily_total, x='date', y='weight_kg',
+                  title='Daily Production Trend',
+                  labels={'weight_kg': 'Total Weight (kg)', 'date': 'Date'},
+                  template='plotly_white')
+    fig.update_layout(height=400, showlegend=False)
+    return fig
+def create_material_analysis(df):
+    """Create material type comparison plots"""
+    # Daily production by material type
+    daily_by_material = df.groupby(['date', 'material_type'])['weight_kg'].sum().reset_index()
+    fig = px.line(daily_by_material, x='date', y='weight_kg', color='material_type',
+                  title='Daily Production by Material Type',
+                  labels={'weight_kg': 'Weight (kg)', 'date': 'Date'},
+                  template='plotly_white')
+    fig.update_layout(height=400)
+    return fig
+def create_correlation_analysis(df):
+    """Create correlation matrix plot"""
+    daily_by_material = df.groupby(['date', 'material_type'])['weight_kg'].sum().unstack(fill_value=0)
+    if len(daily_by_material.columns) > 1:
+        correlation_matrix = daily_by_material.corr()
+        fig = px.imshow(correlation_matrix,
+                        title='Material Type Correlation Matrix',
+                        template='plotly_white',
+                        color_continuous_scale='RdBu',
+                        aspect='auto')
+        fig.update_layout(height=400)
+        return fig
+    else:
+        # Create empty plot if only one material type
+        fig = go.Figure()
+        fig.add_annotation(text="Only one material type - correlation analysis not applicable",
+                          xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
+        fig.update_layout(title="Material Type Correlation Matrix", height=400)
+        return fig
+def create_time_analysis(df):
+    """Create time pattern analysis"""
+    # Weekly pattern
+    weekly_pattern = df.groupby(['day_of_week', 'material_type'])['weight_kg'].mean().reset_index()
+    # Define day order
+    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
+    weekly_pattern['day_of_week'] = pd.Categorical(weekly_pattern['day_of_week'], categories=day_order, ordered=True)
+    weekly_pattern = weekly_pattern.sort_values('day_of_week')
+    fig = px.bar(weekly_pattern, x='day_of_week', y='weight_kg', color='material_type',
+                 title='Weekly Production Pattern (Average by Day)',
+                 labels={'weight_kg': 'Average Weight (kg)', 'day_of_week': 'Day of Week'},
+                 template='plotly_white')
+    fig.update_layout(height=400)
+    return fig
+def detect_anomalies_report(df):
+    """Generate anomaly detection report"""
+    def detect_outliers(data, column='weight_kg'):
+        Q1 = data[column].quantile(0.25)
+        Q3 = data[column].quantile(0.75)
+        IQR = Q3 - Q1
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+        anomalies = data[(data[column] < lower_bound) | (data[column] > upper_bound)]
+        return anomalies, lower_bound, upper_bound
+    report = "# Anomaly Detection Report\n\n"
+    for material in df['material_type'].unique():
+        material_data = df[df['material_type'] == material]
+        anomalies, lower, upper = detect_outliers(material_data)
+        report += f"## {material.title()} Material\n"
+        report += f"- **Normal Range**: {lower:.1f} - {upper:.1f} kg\n"
+        report += f"- **Anomalies Detected**: {len(anomalies)}\n"
+        if len(anomalies) > 0:
+            report += f"- **Anomaly Dates**: {', '.join(anomalies['date'].dt.strftime('%Y-%m-%d').head(10).tolist())}\n"
+            if len(anomalies) > 10:
+                report += f"  ... and {len(anomalies) - 10} more\n"
+        report += "\n"
+    return report
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="Production Data Analysis", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🏭 Production Data Analysis Dashboard")
+        gr.Markdown("Upload your production data CSV file to generate comprehensive analysis reports and visualizations.")
+        with gr.Row():
+            file_input = gr.File(
+                label="Upload CSV File",
+                file_types=[".csv"],
+                type="filepath"
+            )
+        analyze_btn = gr.Button("Analyze Data", variant="primary", size="lg")
+        with gr.Row():
+            with gr.Column(scale=1):
+                summary_output = gr.Markdown(label="Summary Report")
+                anomaly_output = gr.Markdown(label="Anomaly Report")
+        with gr.Row():
+            with gr.Column():
+                overview_plot = gr.Plot(label="Production Overview")
+                correlation_plot = gr.Plot(label="Correlation Analysis")
+            with gr.Column():
+                material_plot = gr.Plot(label="Material Analysis")
+                time_plot = gr.Plot(label="Time Pattern Analysis")
+        analyze_btn.click(
+            fn=process_data,
+            inputs=[file_input],
+            outputs=[summary_output, overview_plot, material_plot, correlation_plot, time_plot, anomaly_output]
+        )
+        gr.Markdown("""
+        ## Data Format Requirements
+        Your CSV file should contain the following columns:
+        - `date`: Date in MM/DD/YYYY format
+        - `weight_kg`: Production weight in kilograms
+        - `material_type`: Type of material (e.g., liquid, solid, waste_water)
+        - `shift`: Shift number (optional)
+        - `number`: Item number (optional)
+        The file should be tab-separated (TSV format with .csv extension).
+        """)
+    return demo
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()