SHELLAPANDIANGANHUNGING commited on
Commit
3ea3dfc
·
verified ·
1 Parent(s): 3c97078

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -79
app.py CHANGED
@@ -1187,142 +1187,168 @@ if WORDCLOUD_AVAILABLE:
1187
 
1188
  else:
1189
  st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
 
1190
  # =================== 5. Matrix (Tetap Dipertahankan) ===================
1191
  st.markdown("<h3 class='section-title'>OBJECTIVE 5 - Findings vs Lead Time: Which Divisions Move Slow?</h3>", unsafe_allow_html=True)
1192
 
1193
  import math
1194
  import plotly.express as px
1195
  import pandas as pd
 
1196
  try:
1197
  df_local_matrix = df.copy()
1198
- # ============================
1199
- # 0. Filter: ONLY 1 COMPANY & 1 PROFILE (if applicable)
1200
- # ============================
1201
- # (Skipped for general dashboard view)
1202
- # ============================
1203
  # 1. Exclude Positive findings
1204
- # ============================
1205
  if 'temuan_kategori' in df_local_matrix.columns:
1206
  df_local_matrix = df_local_matrix[df_local_matrix["temuan_kategori"] != "Positive"]
1207
- # ============================
1208
  # 2. Ensure datetime columns
1209
- # ============================
1210
  df_local_matrix['created_at'] = pd.to_datetime(df_local_matrix['created_at'], errors='coerce')
1211
  df_local_matrix['close_at'] = pd.to_datetime(df_local_matrix['close_at'], errors='coerce')
1212
- # ============================
1213
- # 3. Compute LEAD TIME
1214
- # ============================
1215
  df_local_matrix['lead_time_days'] = (df_local_matrix['close_at'] - df_local_matrix['created_at']).dt.days
1216
  df_local_matrix['lead_time_days'] = df_local_matrix['lead_time_days'].fillna(0)
1217
- # ============================
1218
- # 4. Average Monthly Finding Count per Operator
1219
- # ============================
1220
  if 'nama' not in df_local_matrix.columns:
1221
- st.error("❌ Kolom 'nama' (operator) tidak ditemukan.")
1222
- # st.stop() # Stop bisa dihilangkan agar script tetap jalan
1223
  else:
1224
- # Buat kolom bulan (YYYY-MM)
1225
  df_local_matrix = df_local_matrix.assign(month=df_local_matrix['created_at'].dt.to_period('M').astype(str))
1226
- # Hitung jumlah temuan per operator per bulan
 
1227
  monthly_counts = (
1228
  df_local_matrix
1229
  .groupby(['nama', 'month'])['kode_temuan']
1230
  .nunique()
1231
  .reset_index(name='monthly_count')
1232
  )
1233
- # Hitung rata-rata bulanan per operator
 
1234
  operator_avg = (
1235
  monthly_counts
1236
  .groupby('nama')['monthly_count']
1237
- .mean() # <-- RATA-RATA per bulan (bukan total!)
1238
  .reset_index(name='Finding Count')
1239
  )
1240
- # ============================
1241
- # 5. Average Lead Time per Operator
1242
- # ============================
1243
  operator_lead = (
1244
- df_local_matrix.groupby('nama')['lead_time_days']
 
1245
  .mean()
1246
- .reset_index(name='Lead Time')
1247
  )
1248
- # ============================
1249
- # 6. Merge Risk Matrix
1250
- # ============================
1251
  risk_matrix = operator_avg.merge(operator_lead, on='nama', how='left')
1252
  risk_matrix = risk_matrix.rename(columns={'nama': 'Division'})
1253
- # Handle operator tanpa lead time (e.g., belum closed)
1254
- risk_matrix['Average Lead Time'] = risk_matrix['Average Lead Time'].fillna(0)
1255
- # ============================
1256
- # 7. Quadrant Logic (unchanged)
1257
- # ============================
1258
- X_LIMIT = 20
1259
- Y_LIMIT = 3
 
 
1260
  def assign_quadrant(row):
1261
- if row['Finding Count'] >= X_LIMIT and row['Average Lead Time'] >= Y_LIMIT:
 
 
1262
  return "Quadrant I – High Leadtime & High Count"
1263
- elif row['Finding Count'] < X_LIMIT and row['Average Lead Time'] >= Y_LIMIT:
1264
  return "Quadrant II – High Leadtime but Low Count"
1265
- elif row['Finding Count'] >= X_LIMIT and row['Average Lead Time'] < Y_LIMIT:
1266
  return "Quadrant III – Low Leadtime but High Count"
1267
  else:
1268
  return "Quadrant IV – Low Leadtime & Low Count"
 
1269
  risk_matrix['quadrant'] = risk_matrix.apply(assign_quadrant, axis=1)
1270
  quadrant_count = risk_matrix['quadrant'].value_counts()
1271
- # ============================
1272
- # 8. Scatter Plot (format visual tetap sam persis)
1273
- # ============================
1274
- max_x = risk_matrix['Finding Count'].max() + 1
1275
- max_y = risk_matrix['Average Lead Time'].max() + 5
1276
  fig = px.scatter(
1277
  risk_matrix,
1278
  x='Finding Count',
1279
  y='Average Lead Time',
1280
- hover_name="Division",
1281
- size=[12] * len(risk_matrix),
1282
- size_max=15,
1283
- # title="Audit Findings Risk Matrix: Avg Monthly Count vs Lead Time"
 
 
 
 
 
1284
  )
1285
- # Background quadrant (same as original)
 
1286
  fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=Y_LIMIT, y1=max_y,
1287
- fillcolor="rgba(255,0,0,0.25)", line_width=0) # Q1
1288
  fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=Y_LIMIT, y1=max_y,
1289
- fillcolor="rgba(255,150,50,0.25)", line_width=0) # Q2
1290
  fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=0, y1=Y_LIMIT,
1291
- fillcolor="rgba(255,200,200,0.25)", line_width=0) # Q3
1292
  fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=0, y1=Y_LIMIT,
1293
- fillcolor="rgba(0,120,255,0.15)", line_width=0) # Q4
1294
- fig.add_vline(x=X_LIMIT, line_dash="dash", line_color="black")
1295
- fig.add_hline(y=Y_LIMIT, line_dash="dash", line_color="black")
1296
- # Quadrant count annotations (same positions & style)
1297
- fig.add_annotation(x=X_LIMIT + (max_x - X_LIMIT)/2,
1298
- y=Y_LIMIT + (max_y - Y_LIMIT)/2,
1299
- text=f"<b>{quadrant_count.get('Quadrant I – High Leadtime & High Count',0)}</b>",
1300
- showarrow=False, font=dict(size=22, color="darkred"))
1301
- fig.add_annotation(x=X_LIMIT/2,
1302
- y=Y_LIMIT + (max_y - Y_LIMIT)/2,
1303
- text=f"<b>{quadrant_count.get('Quadrant II High Leadtime but Low Count',0)}</b>",
1304
- showarrow=False, font=dict(size=22, color="orange"))
1305
- fig.add_annotation(x=X_LIMIT + (max_x - X_LIMIT)/2,
1306
- y=Y_LIMIT/2,
1307
- text=f"<b>{quadrant_count.get('Quadrant III Low Leadtime but High Count',0)}</b>",
1308
- showarrow=False, font=dict(size=22, color="red"))
1309
- fig.add_annotation(x=X_LIMIT/2,
1310
- y=Y_LIMIT/2,
1311
- text=f"<b>{quadrant_count.get('Quadrant IV Low Leadtime & Low Count',0)}</b>",
1312
- showarrow=False, font=dict(size=22, color="green"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1313
  st.plotly_chart(fig, use_container_width=True)
1314
- # ============================
1315
- # 9. Summary Table
1316
- # ============================
1317
- # st.markdown("<h3 style='font-size:12px; margin-bottom:6px;'>Summary (Avg Monthly Count vs Avg Lead Time)</h3>",unsafe_allow_html=True)
1318
 
 
 
1319
  st.dataframe(
1320
- risk_matrix.sort_values("Finding Count", ascending=False),
1321
- use_container_width=True
 
 
 
 
1322
  )
 
1323
  except Exception as e:
1324
- st.error(f"⚠️ Error Risk Matrix: {e}")
1325
- # st.exception(e) # Uncomment for debugging
 
 
1326
 
1327
  import streamlit as st
1328
  import plotly.graph_objects as go
 
1187
 
1188
  else:
1189
  st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
1190
+
1191
  # =================== 5. Matrix (Tetap Dipertahankan) ===================
1192
  st.markdown("<h3 class='section-title'>OBJECTIVE 5 - Findings vs Lead Time: Which Divisions Move Slow?</h3>", unsafe_allow_html=True)
1193
 
1194
  import math
1195
  import plotly.express as px
1196
  import pandas as pd
1197
+
1198
  try:
1199
  df_local_matrix = df.copy()
1200
+
 
 
 
 
1201
  # 1. Exclude Positive findings
 
1202
  if 'temuan_kategori' in df_local_matrix.columns:
1203
  df_local_matrix = df_local_matrix[df_local_matrix["temuan_kategori"] != "Positive"]
1204
+
1205
  # 2. Ensure datetime columns
 
1206
  df_local_matrix['created_at'] = pd.to_datetime(df_local_matrix['created_at'], errors='coerce')
1207
  df_local_matrix['close_at'] = pd.to_datetime(df_local_matrix['close_at'], errors='coerce')
1208
+
1209
+ # 3. Compute LEAD TIME in days
 
1210
  df_local_matrix['lead_time_days'] = (df_local_matrix['close_at'] - df_local_matrix['created_at']).dt.days
1211
  df_local_matrix['lead_time_days'] = df_local_matrix['lead_time_days'].fillna(0)
1212
+
1213
+ # 4. Avg Monthly Finding Count per Division ('nama')
 
1214
  if 'nama' not in df_local_matrix.columns:
1215
+ st.error("❌ Kolom 'nama' (Division/Operator) tidak ditemukan.")
 
1216
  else:
1217
+ # Create YYYY-MM month column
1218
  df_local_matrix = df_local_matrix.assign(month=df_local_matrix['created_at'].dt.to_period('M').astype(str))
1219
+
1220
+ # Count unique findings per division per month
1221
  monthly_counts = (
1222
  df_local_matrix
1223
  .groupby(['nama', 'month'])['kode_temuan']
1224
  .nunique()
1225
  .reset_index(name='monthly_count')
1226
  )
1227
+
1228
+ # Average findings per month per division
1229
  operator_avg = (
1230
  monthly_counts
1231
  .groupby('nama')['monthly_count']
1232
+ .mean()
1233
  .reset_index(name='Finding Count')
1234
  )
1235
+
1236
+ # 5. Average Lead Time per Division
 
1237
  operator_lead = (
1238
+ df_local_matrix
1239
+ .groupby('nama')['lead_time_days']
1240
  .mean()
1241
+ .reset_index(name='Average Lead Time') # 🔑 Renamed here!
1242
  )
1243
+
1244
+ # 6. Merge Risk Matrix
 
1245
  risk_matrix = operator_avg.merge(operator_lead, on='nama', how='left')
1246
  risk_matrix = risk_matrix.rename(columns={'nama': 'Division'})
1247
+
1248
+ # Fill missing lead time (e.g., no closed findings) with 0
1249
+ risk_matrix['Average Lead Time'] = risk_matrix['Average Lead Time'].fillna(0).round(2)
1250
+ risk_matrix['Finding Count'] = risk_matrix['Finding Count'].round(2)
1251
+
1252
+ # 7. Quadrant Logic
1253
+ X_LIMIT = 20 # avg findings/month threshold
1254
+ Y_LIMIT = 3 # avg lead time (days) threshold
1255
+
1256
  def assign_quadrant(row):
1257
+ fc = row['Finding Count']
1258
+ lt = row['Average Lead Time']
1259
+ if fc >= X_LIMIT and lt >= Y_LIMIT:
1260
  return "Quadrant I – High Leadtime & High Count"
1261
+ elif fc < X_LIMIT and lt >= Y_LIMIT:
1262
  return "Quadrant II – High Leadtime but Low Count"
1263
+ elif fc >= X_LIMIT and lt < Y_LIMIT:
1264
  return "Quadrant III – Low Leadtime but High Count"
1265
  else:
1266
  return "Quadrant IV – Low Leadtime & Low Count"
1267
+
1268
  risk_matrix['quadrant'] = risk_matrix.apply(assign_quadrant, axis=1)
1269
  quadrant_count = risk_matrix['quadrant'].value_counts()
1270
+
1271
+ # 8. Plot Scatter with Quadrant Backgrounds
1272
+ max_x = max(risk_matrix['Finding Count'].max() + 1, X_LIMIT + 5)
1273
+ max_y = max(risk_matrix['Average Lead Time'].max() + 5, Y_LIMIT + 5)
1274
+
1275
  fig = px.scatter(
1276
  risk_matrix,
1277
  x='Finding Count',
1278
  y='Average Lead Time',
1279
+ hover_name='Division',
1280
+ color='quadrant',
1281
+ color_discrete_map={
1282
+ "Quadrant I High Leadtime & High Count": "darkred",
1283
+ "Quadrant II – High Leadtime but Low Count": "orange",
1284
+ "Quadrant III – Low Leadtime but High Count": "tomato",
1285
+ "Quadrant IV – Low Leadtime & Low Count": "green"
1286
+ },
1287
+ height=500
1288
  )
1289
+
1290
+ # Quadrant background shading
1291
  fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=Y_LIMIT, y1=max_y,
1292
+ fillcolor="rgba(255,0,0,0.15)", line_width=0, layer="below")
1293
  fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=Y_LIMIT, y1=max_y,
1294
+ fillcolor="rgba(255,150,50,0.15)", line_width=0, layer="below")
1295
  fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=0, y1=Y_LIMIT,
1296
+ fillcolor="rgba(255,200,200,0.15)", line_width=0, layer="below")
1297
  fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=0, y1=Y_LIMIT,
1298
+ fillcolor="rgba(0,120,255,0.10)", line_width=0, layer="below")
1299
+
1300
+ # Threshold lines
1301
+ fig.add_vline(x=X_LIMIT, line_dash="dash", line_color="gray", opacity=0.7)
1302
+ fig.add_hline(y=Y_LIMIT, line_dash="dash", line_color="gray", opacity=0.7)
1303
+
1304
+ # Quadrant labels & counts
1305
+ annotations = [
1306
+ (X_LIMIT + (max_x - X_LIMIT)/2, Y_LIMIT + (max_y - Y_LIMIT)/2, "Q1", "darkred"),
1307
+ (X_LIMIT/2, Y_LIMIT + (max_y - Y_LIMIT)/2, "Q2", "orange"),
1308
+ (X_LIMIT + (max_x - X_LIMIT)/2, Y_LIMIT/2, "Q3", "tomato"),
1309
+ (X_LIMIT/2, Y_LIMIT/2, "Q4", "green"),
1310
+ ]
1311
+
1312
+ for x, y, label, color in annotations:
1313
+ count = quadrant_count.get(f"Quadrant {label.replace('Q','I ' if '1' in label else ('II ' if '2' in label else ('III ' if '3' in label else 'IV '))}– ...", 0)
1314
+ # Safer lookup: reconstruct key
1315
+ key = next((k for k in quadrant_count.index if label in k), None)
1316
+ cnt = quadrant_count.get(key, 0) if key else 0
1317
+ fig.add_annotation(
1318
+ x=x, y=y,
1319
+ text=f"<b>{label}<br>{int(cnt)}</b>",
1320
+ showarrow=False,
1321
+ font=dict(size=16, color=color),
1322
+ bgcolor="white",
1323
+ opacity=0.8
1324
+ )
1325
+
1326
+ # Axis & layout tuning
1327
+ fig.update_layout(
1328
+ xaxis_title="Average Monthly Findings per Division",
1329
+ yaxis_title="Average Lead Time (Days)",
1330
+ legend_title="Quadrant",
1331
+ margin=dict(t=40, b=40, l=40, r=40)
1332
+ )
1333
+
1334
  st.plotly_chart(fig, use_container_width=True)
 
 
 
 
1335
 
1336
+ # 9. Summary Table (sorted descending by Finding Count)
1337
+ st.markdown("<h4 style='text-align:center;'>Division Performance Summary</h4>", unsafe_allow_html=True)
1338
  st.dataframe(
1339
+ risk_matrix[
1340
+ ['Division', 'Finding Count', 'Average Lead Time', 'quadrant']
1341
+ ].sort_values("Finding Count", ascending=False)
1342
+ .round(2),
1343
+ use_container_width=True,
1344
+ hide_index=True
1345
  )
1346
+
1347
  except Exception as e:
1348
+ st.error(f"⚠️ Error in Risk Matrix (Objective 5): {e}")
1349
+ st.exception(e) # Optional: show full traceback during dev
1350
+
1351
+ # st.exception(e) # Uncomment for debugging
1352
 
1353
  import streamlit as st
1354
  import plotly.graph_objects as go