Spaces:

entropy25
/

IndustrialProductionIntelligence

Running

App Files Files Community

entropy25 commited on Jan 4

Commit

463287e

verified ·

1 Parent(s): 0f3eca6

Update analytics.py

Browse files

Files changed (1) hide show

analytics.py +157 -96

analytics.py CHANGED Viewed

@@ -1,114 +1,175 @@
 import pandas as pd
-import numpy as np
-import io
-import requests
 import streamlit as st
-from datetime import datetime
-from typing import Optional
-from pydantic import BaseModel, validator, ValidationError
-from config import DATA_URLS, SAMPLE_MATERIALS, SAMPLE_SHIFTS, SAMPLE_BASE_WEIGHTS
-class ProductionRecord(BaseModel):
-    date: datetime
-    weight_kg: float
-    material_type: str
-    shift: Optional[str] = None
-    @validator('weight_kg')
-    def weight_must_be_positive(cls, v):
-        if v < 0:
-            raise ValueError('Negative weight detected: possible sensor malfunction')
-        return v
 @st.cache_data
-def load_preset_data(year: str) -> Optional[pd.DataFrame]:
-    try:
-        if year in DATA_URLS:
-            response = requests.get(DATA_URLS[year], timeout=10)
-            response.raise_for_status()
-            df = pd.read_csv(io.StringIO(response.text), sep='\t')
-            df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
-            df['day_name'] = df['date'].dt.day_name()
-            return validate_dataframe(df)
-        else:
-            return generate_sample_data(year)
-    except Exception as e:
-        st.warning(f"Could not load remote {year} data. Loading sample data instead.")
-        return generate_sample_data(year)
-def generate_sample_data(year: str) -> pd.DataFrame:
-    np.random.seed(42 if year == "2024" else 84)
-    start_date = f"01/01/{year}"
-    end_date = f"12/31/{year}"
-    dates = pd.date_range(start=start_date, end=end_date, freq='D')
-    weekdays = dates[dates.weekday < 5]
-    data = []
-    for date in weekdays:
-        for material in SAMPLE_MATERIALS:
-            for shift in SAMPLE_SHIFTS:
-                base_weight = SAMPLE_BASE_WEIGHTS[material]
-                weight = base_weight + np.random.normal(0, base_weight * 0.2)
-                weight = max(weight, base_weight * 0.3)
-                data.append({
-                    'date': date.strftime('%m/%d/%Y'),
-                    'weight_kg': round(weight, 1),
-                    'material_type': material,
-                    'shift': shift
-                })
-    df = pd.DataFrame(data)
-    df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
-    df['day_name'] = df['date'].dt.day_name()
-    return df
 @st.cache_data
-def load_uploaded_data(file) -> pd.DataFrame:
-    max_size = 50 * 1024 * 1024
-    if file.size > max_size:
-        raise ValueError(f"File size {file.size / 1024 / 1024:.1f}MB exceeds limit of 50MB")
-    df = pd.read_csv(file, sep='\t')
-    df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
-    df['day_name'] = df['date'].dt.day_name()
-    return validate_dataframe(df)
-def validate_dataframe(df: pd.DataFrame) -> pd.DataFrame:
-    required_columns = ['date', 'weight_kg', 'material_type']
-    missing_columns = [col for col in required_columns if col not in df.columns]
-    if missing_columns:
-        raise ValueError(f"Missing required columns: {', '.join(missing_columns)}")
-    def validate_row(row):
-        try:
-            record_dict = row.to_dict()
-            if 'shift' not in record_dict or pd.isna(record_dict['shift']):
-                record_dict['shift'] = None
-            ProductionRecord(**record_dict)
-            return True
-        except ValidationError:
-            return False
-    valid_mask = df.apply(validate_row, axis=1)
-    invalid_count = (~valid_mask).sum()
-    if invalid_count > 0:
-        st.warning(f"Found {invalid_count} anomalous records, automatically filtered")
-        return df[valid_mask]
-    return df
-def data_health_check(df: pd.DataFrame) -> dict:
-    completeness = (1 - df.isnull().sum().sum() / df.size) * 100
-    time_span = (df['date'].max() - df['date'].min()).days
-    last_update = df['date'].max().strftime('%Y-%m-%d')
-    return {
-        "Completeness": f"{completeness:.1f}%",
-        "Time Span": f"{time_span} days",
-        "Last Update": last_update,
-        "Total Records": f"{len(df):,}"
-    }

 import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
 import streamlit as st
+from typing import Dict, Optional, List
+from config import get_chart_theme, DESIGN_SYSTEM, get_translation
 @st.cache_data
+def get_material_stats(df: pd.DataFrame) -> Dict:
+    stats = {}
+    total = df['weight_kg'].sum()
+    total_work_days = df['date'].nunique()
+    for material in df['material_type'].unique():
+        data = df[df['material_type'] == material]
+        work_days = data['date'].nunique()
+        daily_avg = data.groupby('date')['weight_kg'].sum().mean()
+        stats[material] = {
+            'total': data['weight_kg'].sum(),
+            'percentage': (data['weight_kg'].sum() / total) * 100,
+            'daily_avg': daily_avg,
+            'work_days': work_days,
+            'records': len(data)
+        }
+    stats['_total_'] = {
+        'total': total,
+        'percentage': 100.0,
+        'daily_avg': df.groupby('date')['weight_kg'].sum().mean(),
+        'work_days': total_work_days,
+        'records': len(df)
+    }
+    return stats
 @st.cache_data
+def detect_outliers(df: pd.DataFrame) -> Dict:
+    outliers = {}
+    for material in df['material_type'].unique():
+        material_data = df[df['material_type'] == material]
+        data = material_data['weight_kg']
+        Q1, Q3 = data.quantile(0.25), data.quantile(0.75)
+        IQR = Q3 - Q1
+        lower, upper = Q1 - 1.5 * IQR, Q3 + 1.5 * IQR
+        outlier_mask = (data < lower) | (data > upper)
+        outlier_dates = material_data[outlier_mask]['date'].dt.strftime('%Y-%m-%d').tolist()
+        outliers[material] = {
+            'count': len(outlier_dates),
+            'range': f"{lower:.0f} - {upper:.0f} kg",
+            'dates': outlier_dates
+        }
+    return outliers
+def create_total_production_chart(df: pd.DataFrame, time_period: str = 'daily', lang: str = 'English'):
+    t = get_translation(lang)
+    if time_period == 'daily':
+        grouped = df.groupby('date')['weight_kg'].sum().reset_index()
+        fig = px.line(grouped, x='date', y='weight_kg',
+                     title=t.get('chart_total_production', 'Total Production Trend'),
+                     labels={'weight_kg': t.get('label_weight', 'Weight (kg)'), 'date': t.get('label_date', 'Date')})
+    elif time_period == 'weekly':
+        df_copy = df.copy()
+        df_copy['week'] = df_copy['date'].dt.isocalendar().week
+        df_copy['year'] = df_copy['date'].dt.year
+        grouped = df_copy.groupby(['year', 'week'])['weight_kg'].sum().reset_index()
+        grouped['week_label'] = grouped['year'].astype(str) + '-W' + grouped['week'].astype(str)
+        fig = px.bar(grouped, x='week_label', y='weight_kg',
+                    title=t.get('chart_total_production_weekly', 'Total Production Trend (Weekly)'),
+                    labels={'weight_kg': t.get('label_weight', 'Weight (kg)'), 'week_label': t.get('label_week', 'Week')})
+    else:
+        df_copy = df.copy()
+        df_copy['month'] = df_copy['date'].dt.to_period('M')
+        grouped = df_copy.groupby('month')['weight_kg'].sum().reset_index()
+        grouped['month'] = grouped['month'].astype(str)
+        fig = px.bar(grouped, x='month', y='weight_kg',
+                    title=t.get('chart_total_production_monthly', 'Total Production Trend (Monthly)'),
+                    labels={'weight_kg': t.get('label_weight', 'Weight (kg)'), 'month': t.get('label_month', 'Month')})
+    fig.update_layout(**get_chart_theme()['layout'], height=400, showlegend=False)
+    return fig
+def create_materials_trend_chart(df: pd.DataFrame, time_period: str = 'daily',
+                                selected_materials: Optional[List[str]] = None, lang: str = 'English'):
+    df_copy = df.copy()
+    t = get_translation(lang)
+    if selected_materials:
+        df_copy = df_copy[df_copy['material_type'].isin(selected_materials)]
+    if time_period == 'daily':
+        grouped = df_copy.groupby(['date', 'material_type'])['weight_kg'].sum().reset_index()
+        fig = px.line(grouped, x='date', y='weight_kg', color='material_type',
+                     title=t.get('chart_materials_trends', 'Materials Production Trends'),
+                     labels={'weight_kg': t.get('label_weight', 'Weight (kg)'),
+                             'date': t.get('label_date', 'Date'),
+                             'material_type': t.get('label_material', 'Material')})
+    elif time_period == 'weekly':
+        df_copy['week'] = df_copy['date'].dt.isocalendar().week
+        df_copy['year'] = df_copy['date'].dt.year
+        grouped = df_copy.groupby(['year', 'week', 'material_type'])['weight_kg'].sum().reset_index()
+        grouped['week_label'] = grouped['year'].astype(str) + '-W' + grouped['week'].astype(str)
+        fig = px.bar(grouped, x='week_label', y='weight_kg', color='material_type',
+                    title=t.get('chart_materials_trends_weekly', 'Materials Production Trends (Weekly)'),
+                    labels={'weight_kg': t.get('label_weight', 'Weight (kg)'),
+                            'week_label': t.get('label_week', 'Week'),
+                            'material_type': t.get('label_material', 'Material')})
+    else:
+        df_copy['month'] = df_copy['date'].dt.to_period('M')
+        grouped = df_copy.groupby(['month', 'material_type'])['weight_kg'].sum().reset_index()
+        grouped['month'] = grouped['month'].astype(str)
+        fig = px.bar(grouped, x='month', y='weight_kg', color='material_type',
+                    title=t.get('chart_materials_trends_monthly', 'Materials Production Trends (Monthly)'),
+                    labels={'weight_kg': t.get('label_weight', 'Weight (kg)'),
+                            'month': t.get('label_month', 'Month'),
+                            'material_type': t.get('label_material', 'Material')})
+    fig.update_layout(**get_chart_theme()['layout'], height=400)
+    return fig
+def create_shift_trend_chart(df: pd.DataFrame, time_period: str = 'daily', lang: str = 'English'):
+    theme = get_chart_theme()
+    t = get_translation(lang)
+    if time_period == 'daily':
+        grouped = df.groupby(['date', 'shift'])['weight_kg'].sum().reset_index()
+        pivot_data = grouped.pivot(index='date', columns='shift', values='weight_kg').fillna(0)
+        fig = go.Figure()
+        if 'day' in pivot_data.columns:
+            fig.add_trace(go.Bar(
+                x=pivot_data.index,
+                y=pivot_data['day'],
+                name=t.get('label_day_shift', 'Day Shift'),
+                marker_color=DESIGN_SYSTEM['colors']['warning'],
+                text=pivot_data['day'].round(0),
+                textposition='inside'
+            ))
+        if 'night' in pivot_data.columns:
+            fig.add_trace(go.Bar(
+                x=pivot_data.index,
+                y=pivot_data['night'],
+                name=t.get('label_night_shift', 'Night Shift'),
+                marker_color=DESIGN_SYSTEM['colors']['primary'],
+                base=pivot_data['day'] if 'day' in pivot_data.columns else 0,
+                text=pivot_data['night'].round(0),
+                textposition='inside'
+            ))
+        fig.update_layout(
+            **theme['layout'],
+            title=t.get('chart_shift_trends', 'Daily Shift Production Trends (Stacked)'),
+            xaxis_title=t.get('label_date', 'Date'),
+            yaxis_title=t.get('label_weight', 'Weight (kg)'),
+            barmode='stack',
+            height=400,
+            showlegend=True
+        )
+    else:
+        grouped = df.groupby(['date', 'shift'])['weight_kg'].sum().reset_index()
+        fig = px.bar(grouped, x='date', y='weight_kg', color='shift',
+                    title=t.get('chart_shift_trends_period', f'{time_period.title()} Shift Production Trends'),
+                    barmode='stack')
+        fig.update_layout(**theme['layout'], height=400)
+    return fig