entropy25 commited on
Commit
463287e
·
verified ·
1 Parent(s): 0f3eca6

Update analytics.py

Browse files
Files changed (1) hide show
  1. analytics.py +157 -96
analytics.py CHANGED
@@ -1,114 +1,175 @@
1
  import pandas as pd
2
- import numpy as np
3
- import io
4
- import requests
5
  import streamlit as st
6
- from datetime import datetime
7
- from typing import Optional
8
- from pydantic import BaseModel, validator, ValidationError
9
 
10
- from config import DATA_URLS, SAMPLE_MATERIALS, SAMPLE_SHIFTS, SAMPLE_BASE_WEIGHTS
11
-
12
- class ProductionRecord(BaseModel):
13
- date: datetime
14
- weight_kg: float
15
- material_type: str
16
- shift: Optional[str] = None
17
-
18
- @validator('weight_kg')
19
- def weight_must_be_positive(cls, v):
20
- if v < 0:
21
- raise ValueError('Negative weight detected: possible sensor malfunction')
22
- return v
23
 
24
  @st.cache_data
25
- def load_preset_data(year: str) -> Optional[pd.DataFrame]:
26
- try:
27
- if year in DATA_URLS:
28
- response = requests.get(DATA_URLS[year], timeout=10)
29
- response.raise_for_status()
30
- df = pd.read_csv(io.StringIO(response.text), sep='\t')
31
- df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
32
- df['day_name'] = df['date'].dt.day_name()
33
- return validate_dataframe(df)
34
- else:
35
- return generate_sample_data(year)
36
- except Exception as e:
37
- st.warning(f"Could not load remote {year} data. Loading sample data instead.")
38
- return generate_sample_data(year)
39
-
40
- def generate_sample_data(year: str) -> pd.DataFrame:
41
- np.random.seed(42 if year == "2024" else 84)
42
- start_date = f"01/01/{year}"
43
- end_date = f"12/31/{year}"
44
- dates = pd.date_range(start=start_date, end=end_date, freq='D')
45
- weekdays = dates[dates.weekday < 5]
46
 
47
- data = []
48
- for date in weekdays:
49
- for material in SAMPLE_MATERIALS:
50
- for shift in SAMPLE_SHIFTS:
51
- base_weight = SAMPLE_BASE_WEIGHTS[material]
52
- weight = base_weight + np.random.normal(0, base_weight * 0.2)
53
- weight = max(weight, base_weight * 0.3)
54
-
55
- data.append({
56
- 'date': date.strftime('%m/%d/%Y'),
57
- 'weight_kg': round(weight, 1),
58
- 'material_type': material,
59
- 'shift': shift
60
- })
61
 
62
- df = pd.DataFrame(data)
63
- df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
64
- df['day_name'] = df['date'].dt.day_name()
65
- return df
66
 
67
  @st.cache_data
68
- def load_uploaded_data(file) -> pd.DataFrame:
69
- max_size = 50 * 1024 * 1024
70
- if file.size > max_size:
71
- raise ValueError(f"File size {file.size / 1024 / 1024:.1f}MB exceeds limit of 50MB")
72
 
73
- df = pd.read_csv(file, sep='\t')
74
- df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
75
- df['day_name'] = df['date'].dt.day_name()
76
- return validate_dataframe(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
- def validate_dataframe(df: pd.DataFrame) -> pd.DataFrame:
79
- required_columns = ['date', 'weight_kg', 'material_type']
80
- missing_columns = [col for col in required_columns if col not in df.columns]
81
 
82
- if missing_columns:
83
- raise ValueError(f"Missing required columns: {', '.join(missing_columns)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- def validate_row(row):
86
- try:
87
- record_dict = row.to_dict()
88
- if 'shift' not in record_dict or pd.isna(record_dict['shift']):
89
- record_dict['shift'] = None
90
- ProductionRecord(**record_dict)
91
- return True
92
- except ValidationError:
93
- return False
94
 
95
- valid_mask = df.apply(validate_row, axis=1)
96
- invalid_count = (~valid_mask).sum()
97
 
98
- if invalid_count > 0:
99
- st.warning(f"Found {invalid_count} anomalous records, automatically filtered")
100
- return df[valid_mask]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
- return df
 
103
 
104
- def data_health_check(df: pd.DataFrame) -> dict:
105
- completeness = (1 - df.isnull().sum().sum() / df.size) * 100
106
- time_span = (df['date'].max() - df['date'].min()).days
107
- last_update = df['date'].max().strftime('%Y-%m-%d')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- return {
110
- "Completeness": f"{completeness:.1f}%",
111
- "Time Span": f"{time_span} days",
112
- "Last Update": last_update,
113
- "Total Records": f"{len(df):,}"
114
- }
 
1
  import pandas as pd
2
+ import plotly.express as px
3
+ import plotly.graph_objects as go
 
4
  import streamlit as st
5
+ from typing import Dict, Optional, List
 
 
6
 
7
+ from config import get_chart_theme, DESIGN_SYSTEM, get_translation
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  @st.cache_data
10
+ def get_material_stats(df: pd.DataFrame) -> Dict:
11
+ stats = {}
12
+ total = df['weight_kg'].sum()
13
+ total_work_days = df['date'].nunique()
14
+
15
+ for material in df['material_type'].unique():
16
+ data = df[df['material_type'] == material]
17
+ work_days = data['date'].nunique()
18
+ daily_avg = data.groupby('date')['weight_kg'].sum().mean()
19
+
20
+ stats[material] = {
21
+ 'total': data['weight_kg'].sum(),
22
+ 'percentage': (data['weight_kg'].sum() / total) * 100,
23
+ 'daily_avg': daily_avg,
24
+ 'work_days': work_days,
25
+ 'records': len(data)
26
+ }
 
 
 
 
27
 
28
+ stats['_total_'] = {
29
+ 'total': total,
30
+ 'percentage': 100.0,
31
+ 'daily_avg': df.groupby('date')['weight_kg'].sum().mean(),
32
+ 'work_days': total_work_days,
33
+ 'records': len(df)
34
+ }
 
 
 
 
 
 
 
35
 
36
+ return stats
 
 
 
37
 
38
  @st.cache_data
39
+ def detect_outliers(df: pd.DataFrame) -> Dict:
40
+ outliers = {}
 
 
41
 
42
+ for material in df['material_type'].unique():
43
+ material_data = df[df['material_type'] == material]
44
+ data = material_data['weight_kg']
45
+
46
+ Q1, Q3 = data.quantile(0.25), data.quantile(0.75)
47
+ IQR = Q3 - Q1
48
+ lower, upper = Q1 - 1.5 * IQR, Q3 + 1.5 * IQR
49
+
50
+ outlier_mask = (data < lower) | (data > upper)
51
+ outlier_dates = material_data[outlier_mask]['date'].dt.strftime('%Y-%m-%d').tolist()
52
+
53
+ outliers[material] = {
54
+ 'count': len(outlier_dates),
55
+ 'range': f"{lower:.0f} - {upper:.0f} kg",
56
+ 'dates': outlier_dates
57
+ }
58
+
59
+ return outliers
60
 
61
+ def create_total_production_chart(df: pd.DataFrame, time_period: str = 'daily', lang: str = 'English'):
62
+ t = get_translation(lang)
 
63
 
64
+ if time_period == 'daily':
65
+ grouped = df.groupby('date')['weight_kg'].sum().reset_index()
66
+ fig = px.line(grouped, x='date', y='weight_kg',
67
+ title=t.get('chart_total_production', 'Total Production Trend'),
68
+ labels={'weight_kg': t.get('label_weight', 'Weight (kg)'), 'date': t.get('label_date', 'Date')})
69
+ elif time_period == 'weekly':
70
+ df_copy = df.copy()
71
+ df_copy['week'] = df_copy['date'].dt.isocalendar().week
72
+ df_copy['year'] = df_copy['date'].dt.year
73
+ grouped = df_copy.groupby(['year', 'week'])['weight_kg'].sum().reset_index()
74
+ grouped['week_label'] = grouped['year'].astype(str) + '-W' + grouped['week'].astype(str)
75
+ fig = px.bar(grouped, x='week_label', y='weight_kg',
76
+ title=t.get('chart_total_production_weekly', 'Total Production Trend (Weekly)'),
77
+ labels={'weight_kg': t.get('label_weight', 'Weight (kg)'), 'week_label': t.get('label_week', 'Week')})
78
+ else:
79
+ df_copy = df.copy()
80
+ df_copy['month'] = df_copy['date'].dt.to_period('M')
81
+ grouped = df_copy.groupby('month')['weight_kg'].sum().reset_index()
82
+ grouped['month'] = grouped['month'].astype(str)
83
+ fig = px.bar(grouped, x='month', y='weight_kg',
84
+ title=t.get('chart_total_production_monthly', 'Total Production Trend (Monthly)'),
85
+ labels={'weight_kg': t.get('label_weight', 'Weight (kg)'), 'month': t.get('label_month', 'Month')})
86
 
87
+ fig.update_layout(**get_chart_theme()['layout'], height=400, showlegend=False)
88
+ return fig
89
+
90
+ def create_materials_trend_chart(df: pd.DataFrame, time_period: str = 'daily',
91
+ selected_materials: Optional[List[str]] = None, lang: str = 'English'):
92
+ df_copy = df.copy()
93
+ t = get_translation(lang)
 
 
94
 
95
+ if selected_materials:
96
+ df_copy = df_copy[df_copy['material_type'].isin(selected_materials)]
97
 
98
+ if time_period == 'daily':
99
+ grouped = df_copy.groupby(['date', 'material_type'])['weight_kg'].sum().reset_index()
100
+ fig = px.line(grouped, x='date', y='weight_kg', color='material_type',
101
+ title=t.get('chart_materials_trends', 'Materials Production Trends'),
102
+ labels={'weight_kg': t.get('label_weight', 'Weight (kg)'),
103
+ 'date': t.get('label_date', 'Date'),
104
+ 'material_type': t.get('label_material', 'Material')})
105
+ elif time_period == 'weekly':
106
+ df_copy['week'] = df_copy['date'].dt.isocalendar().week
107
+ df_copy['year'] = df_copy['date'].dt.year
108
+ grouped = df_copy.groupby(['year', 'week', 'material_type'])['weight_kg'].sum().reset_index()
109
+ grouped['week_label'] = grouped['year'].astype(str) + '-W' + grouped['week'].astype(str)
110
+ fig = px.bar(grouped, x='week_label', y='weight_kg', color='material_type',
111
+ title=t.get('chart_materials_trends_weekly', 'Materials Production Trends (Weekly)'),
112
+ labels={'weight_kg': t.get('label_weight', 'Weight (kg)'),
113
+ 'week_label': t.get('label_week', 'Week'),
114
+ 'material_type': t.get('label_material', 'Material')})
115
+ else:
116
+ df_copy['month'] = df_copy['date'].dt.to_period('M')
117
+ grouped = df_copy.groupby(['month', 'material_type'])['weight_kg'].sum().reset_index()
118
+ grouped['month'] = grouped['month'].astype(str)
119
+ fig = px.bar(grouped, x='month', y='weight_kg', color='material_type',
120
+ title=t.get('chart_materials_trends_monthly', 'Materials Production Trends (Monthly)'),
121
+ labels={'weight_kg': t.get('label_weight', 'Weight (kg)'),
122
+ 'month': t.get('label_month', 'Month'),
123
+ 'material_type': t.get('label_material', 'Material')})
124
 
125
+ fig.update_layout(**get_chart_theme()['layout'], height=400)
126
+ return fig
127
 
128
+ def create_shift_trend_chart(df: pd.DataFrame, time_period: str = 'daily', lang: str = 'English'):
129
+ theme = get_chart_theme()
130
+ t = get_translation(lang)
131
+
132
+ if time_period == 'daily':
133
+ grouped = df.groupby(['date', 'shift'])['weight_kg'].sum().reset_index()
134
+ pivot_data = grouped.pivot(index='date', columns='shift', values='weight_kg').fillna(0)
135
+
136
+ fig = go.Figure()
137
+
138
+ if 'day' in pivot_data.columns:
139
+ fig.add_trace(go.Bar(
140
+ x=pivot_data.index,
141
+ y=pivot_data['day'],
142
+ name=t.get('label_day_shift', 'Day Shift'),
143
+ marker_color=DESIGN_SYSTEM['colors']['warning'],
144
+ text=pivot_data['day'].round(0),
145
+ textposition='inside'
146
+ ))
147
+
148
+ if 'night' in pivot_data.columns:
149
+ fig.add_trace(go.Bar(
150
+ x=pivot_data.index,
151
+ y=pivot_data['night'],
152
+ name=t.get('label_night_shift', 'Night Shift'),
153
+ marker_color=DESIGN_SYSTEM['colors']['primary'],
154
+ base=pivot_data['day'] if 'day' in pivot_data.columns else 0,
155
+ text=pivot_data['night'].round(0),
156
+ textposition='inside'
157
+ ))
158
+
159
+ fig.update_layout(
160
+ **theme['layout'],
161
+ title=t.get('chart_shift_trends', 'Daily Shift Production Trends (Stacked)'),
162
+ xaxis_title=t.get('label_date', 'Date'),
163
+ yaxis_title=t.get('label_weight', 'Weight (kg)'),
164
+ barmode='stack',
165
+ height=400,
166
+ showlegend=True
167
+ )
168
+ else:
169
+ grouped = df.groupby(['date', 'shift'])['weight_kg'].sum().reset_index()
170
+ fig = px.bar(grouped, x='date', y='weight_kg', color='shift',
171
+ title=t.get('chart_shift_trends_period', f'{time_period.title()} Shift Production Trends'),
172
+ barmode='stack')
173
+ fig.update_layout(**theme['layout'], height=400)
174
 
175
+ return fig