SHELLAPANDIANGANHUNGING commited on
Commit
e45b23a
·
verified ·
1 Parent(s): 41220fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -106
app.py CHANGED
@@ -1174,165 +1174,141 @@ else:
1174
  st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
1175
 
1176
  # =================== 5. Matrix (Tetap Dipertahankan) ===================
1177
- st.markdown("<h3 class='section-title'>OBJECTIVE 5 - Findings vs Lead Time: Which Divisions Move Slow?</h3>", unsafe_allow_html=True)
 
1178
 
1179
  import math
1180
  import plotly.express as px
1181
  import pandas as pd
1182
-
1183
  try:
1184
  df_local_matrix = df.copy()
1185
-
 
 
 
 
1186
  # 1. Exclude Positive findings
 
1187
  if 'temuan_kategori' in df_local_matrix.columns:
1188
  df_local_matrix = df_local_matrix[df_local_matrix["temuan_kategori"] != "Positive"]
1189
-
1190
  # 2. Ensure datetime columns
 
1191
  df_local_matrix['created_at'] = pd.to_datetime(df_local_matrix['created_at'], errors='coerce')
1192
  df_local_matrix['close_at'] = pd.to_datetime(df_local_matrix['close_at'], errors='coerce')
1193
-
1194
- # 3. Compute LEAD TIME in days
 
1195
  df_local_matrix['lead_time_days'] = (df_local_matrix['close_at'] - df_local_matrix['created_at']).dt.days
1196
  df_local_matrix['lead_time_days'] = df_local_matrix['lead_time_days'].fillna(0)
1197
-
1198
- # 4. Avg Monthly Finding Count per Division ('nama')
 
1199
  if 'nama' not in df_local_matrix.columns:
1200
- st.error("❌ Kolom 'nama' (Division/Operator) tidak ditemukan.")
 
1201
  else:
1202
- # Create YYYY-MM month column
1203
  df_local_matrix = df_local_matrix.assign(month=df_local_matrix['created_at'].dt.to_period('M').astype(str))
1204
-
1205
- # Count unique findings per division per month
1206
  monthly_counts = (
1207
  df_local_matrix
1208
  .groupby(['nama', 'month'])['kode_temuan']
1209
  .nunique()
1210
  .reset_index(name='monthly_count')
1211
  )
1212
-
1213
- # Average findings per month per division
1214
  operator_avg = (
1215
  monthly_counts
1216
  .groupby('nama')['monthly_count']
1217
- .mean()
1218
  .reset_index(name='Finding Count')
1219
  )
1220
-
1221
- # 5. Average Lead Time per Division
 
1222
  operator_lead = (
1223
- df_local_matrix
1224
- .groupby('nama')['lead_time_days']
1225
  .mean()
1226
- .reset_index(name='Average Lead Time') # 🔑 Renamed here!
1227
  )
1228
-
1229
- # 6. Merge Risk Matrix
 
1230
  risk_matrix = operator_avg.merge(operator_lead, on='nama', how='left')
1231
- risk_matrix = risk_matrix.rename(columns={'nama': 'Division'})
1232
-
1233
- # Fill missing lead time (e.g., no closed findings) with 0
1234
- risk_matrix['Average Lead Time'] = risk_matrix['Average Lead Time'].fillna(0).round(2)
1235
- risk_matrix['Finding Count'] = risk_matrix['Finding Count'].round(2)
1236
-
1237
- # 7. Quadrant Logic
1238
- X_LIMIT = 20 # avg findings/month threshold
1239
- Y_LIMIT = 3 # avg lead time (days) threshold
1240
-
1241
  def assign_quadrant(row):
1242
- fc = row['Finding Count']
1243
- lt = row['Average Lead Time']
1244
- if fc >= X_LIMIT and lt >= Y_LIMIT:
1245
  return "Quadrant I – High Leadtime & High Count"
1246
- elif fc < X_LIMIT and lt >= Y_LIMIT:
1247
  return "Quadrant II – High Leadtime but Low Count"
1248
- elif fc >= X_LIMIT and lt < Y_LIMIT:
1249
  return "Quadrant III – Low Leadtime but High Count"
1250
  else:
1251
  return "Quadrant IV – Low Leadtime & Low Count"
1252
-
1253
  risk_matrix['quadrant'] = risk_matrix.apply(assign_quadrant, axis=1)
1254
  quadrant_count = risk_matrix['quadrant'].value_counts()
1255
-
1256
- # 8. Plot Scatter with Quadrant Backgrounds
1257
- max_x = max(risk_matrix['Finding Count'].max() + 1, X_LIMIT + 5)
1258
- max_y = max(risk_matrix['Average Lead Time'].max() + 5, Y_LIMIT + 5)
1259
-
1260
  fig = px.scatter(
1261
  risk_matrix,
1262
  x='Finding Count',
1263
  y='Average Lead Time',
1264
- hover_name='Division',
1265
- color='quadrant',
1266
- color_discrete_map={
1267
- "Quadrant I High Leadtime & High Count": "darkred",
1268
- "Quadrant II – High Leadtime but Low Count": "orange",
1269
- "Quadrant III – Low Leadtime but High Count": "tomato",
1270
- "Quadrant IV – Low Leadtime & Low Count": "green"
1271
- },
1272
- height=500
1273
  )
1274
-
1275
- # Quadrant background shading
1276
  fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=Y_LIMIT, y1=max_y,
1277
- fillcolor="rgba(255,0,0,0.15)", line_width=0, layer="below")
1278
  fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=Y_LIMIT, y1=max_y,
1279
- fillcolor="rgba(255,150,50,0.15)", line_width=0, layer="below")
1280
  fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=0, y1=Y_LIMIT,
1281
- fillcolor="rgba(255,200,200,0.15)", line_width=0, layer="below")
1282
  fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=0, y1=Y_LIMIT,
1283
- fillcolor="rgba(0,120,255,0.10)", line_width=0, layer="below")
1284
-
1285
- # Threshold lines
1286
- fig.add_vline(x=X_LIMIT, line_dash="dash", line_color="gray", opacity=0.7)
1287
- fig.add_hline(y=Y_LIMIT, line_dash="dash", line_color="gray", opacity=0.7)
1288
-
1289
- # Quadrant labels & counts
1290
- annotations = [
1291
- (X_LIMIT + (max_x - X_LIMIT)/2, Y_LIMIT + (max_y - Y_LIMIT)/2, "Q1", "darkred"),
1292
- (X_LIMIT/2, Y_LIMIT + (max_y - Y_LIMIT)/2, "Q2", "orange"),
1293
- (X_LIMIT + (max_x - X_LIMIT)/2, Y_LIMIT/2, "Q3", "tomato"),
1294
- (X_LIMIT/2, Y_LIMIT/2, "Q4", "green"),
1295
- ]
1296
-
1297
- for x, y, label, color in annotations:
1298
- count = quadrant_count.get(f"Quadrant {'I' if label=='Q1' else 'II' if label=='Q2' else 'III' if label=='Q3' else 'IV'}– ...",0)
1299
-
1300
- # Safer lookup: reconstruct key
1301
- key = next((k for k in quadrant_count.index if label in k), None)
1302
- cnt = quadrant_count.get(key, 0) if key else 0
1303
- fig.add_annotation(
1304
- x=x, y=y,
1305
- text=f"<b>{label}<br>{int(cnt)}</b>",
1306
- showarrow=False,
1307
- font=dict(size=16, color=color),
1308
- bgcolor="white",
1309
- opacity=0.8
1310
- )
1311
-
1312
- # Axis & layout tuning
1313
- fig.update_layout(
1314
- xaxis_title="Average Monthly Findings per Division",
1315
- yaxis_title="Average Lead Time (Days)",
1316
- legend_title="Quadrant",
1317
- margin=dict(t=40, b=40, l=40, r=40)
1318
- )
1319
-
1320
  st.plotly_chart(fig, use_container_width=True)
1321
-
1322
- # 9. Summary Table (sorted descending by Finding Count)
1323
- st.markdown("<h4 style='text-align:center;'>Division Performance Summary</h4>", unsafe_allow_html=True)
 
1324
  st.dataframe(
1325
- risk_matrix[
1326
- ['Division', 'Finding Count', 'Average Lead Time', 'quadrant']
1327
- ].sort_values("Finding Count", ascending=False)
1328
- .round(2),
1329
- use_container_width=True,
1330
- hide_index=True
1331
  )
1332
-
1333
  except Exception as e:
1334
- st.error(f"⚠️ Error in Risk Matrix (Objective 5): {e}")
1335
- st.exception(e) # Optional: show full traceback during dev
1336
 
1337
  # st.exception(e) # Uncomment for debugging
1338
 
 
1174
  st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
1175
 
1176
  # =================== 5. Matrix (Tetap Dipertahankan) ===================
1177
+ # =================== 5. Matrix (Tetap Dipertahankan) ===================
1178
+ st.markdown("<h3 class='section-title'>OBJECTIVE 5 - Findings vs Lead Time: Which Companies Move Slow?</h3>", unsafe_allow_html=True)
1179
 
1180
  import math
1181
  import plotly.express as px
1182
  import pandas as pd
 
1183
  try:
1184
  df_local_matrix = df.copy()
1185
+ # ============================
1186
+ # 0. Filter: ONLY 1 COMPANY & 1 PROFILE (if applicable)
1187
+ # ============================
1188
+ # (Skipped for general dashboard view)
1189
+ # ============================
1190
  # 1. Exclude Positive findings
1191
+ # ============================
1192
  if 'temuan_kategori' in df_local_matrix.columns:
1193
  df_local_matrix = df_local_matrix[df_local_matrix["temuan_kategori"] != "Positive"]
1194
+ # ============================
1195
  # 2. Ensure datetime columns
1196
+ # ============================
1197
  df_local_matrix['created_at'] = pd.to_datetime(df_local_matrix['created_at'], errors='coerce')
1198
  df_local_matrix['close_at'] = pd.to_datetime(df_local_matrix['close_at'], errors='coerce')
1199
+ # ============================
1200
+ # 3. Compute LEAD TIME
1201
+ # ============================
1202
  df_local_matrix['lead_time_days'] = (df_local_matrix['close_at'] - df_local_matrix['created_at']).dt.days
1203
  df_local_matrix['lead_time_days'] = df_local_matrix['lead_time_days'].fillna(0)
1204
+ # ============================
1205
+ # 4. Average Monthly Finding Count per Operator
1206
+ # ============================
1207
  if 'nama' not in df_local_matrix.columns:
1208
+ st.error("❌ Kolom 'nama' (operator) tidak ditemukan.")
1209
+ # st.stop() # Stop bisa dihilangkan agar script tetap jalan
1210
  else:
1211
+ # Buat kolom bulan (YYYY-MM)
1212
  df_local_matrix = df_local_matrix.assign(month=df_local_matrix['created_at'].dt.to_period('M').astype(str))
1213
+ # Hitung jumlah temuan per operator per bulan
 
1214
  monthly_counts = (
1215
  df_local_matrix
1216
  .groupby(['nama', 'month'])['kode_temuan']
1217
  .nunique()
1218
  .reset_index(name='monthly_count')
1219
  )
1220
+ # Hitung rata-rata bulanan per operator
 
1221
  operator_avg = (
1222
  monthly_counts
1223
  .groupby('nama')['monthly_count']
1224
+ .mean() # <-- RATA-RATA per bulan (bukan total!)
1225
  .reset_index(name='Finding Count')
1226
  )
1227
+ # ============================
1228
+ # 5. Average Lead Time per Operator
1229
+ # ============================
1230
  operator_lead = (
1231
+ df_local_matrix.groupby('nama')['lead_time_days']
 
1232
  .mean()
1233
+ .reset_index(name='Average Lead Time')
1234
  )
1235
+ # ============================
1236
+ # 6. Merge Risk Matrix
1237
+ # ============================
1238
  risk_matrix = operator_avg.merge(operator_lead, on='nama', how='left')
1239
+ risk_matrix = risk_matrix.rename(columns={'nama': 'Operator Name'})
1240
+ # Handle operator tanpa lead time (e.g., belum closed)
1241
+ risk_matrix['Average Lead Time'] = risk_matrix['Average Lead Time'].fillna(0)
1242
+ # ============================
1243
+ # 7. Quadrant Logic (unchanged)
1244
+ # ============================
1245
+ X_LIMIT = 20
1246
+ Y_LIMIT = 3
 
 
1247
  def assign_quadrant(row):
1248
+ if row['Finding Count'] >= X_LIMIT and row['Average Lead Time'] >= Y_LIMIT:
 
 
1249
  return "Quadrant I – High Leadtime & High Count"
1250
+ elif row['Finding Count'] < X_LIMIT and row['Average Lead Time'] >= Y_LIMIT:
1251
  return "Quadrant II – High Leadtime but Low Count"
1252
+ elif row['Finding Count'] >= X_LIMIT and row['Average Lead Time'] < Y_LIMIT:
1253
  return "Quadrant III – Low Leadtime but High Count"
1254
  else:
1255
  return "Quadrant IV – Low Leadtime & Low Count"
 
1256
  risk_matrix['quadrant'] = risk_matrix.apply(assign_quadrant, axis=1)
1257
  quadrant_count = risk_matrix['quadrant'].value_counts()
1258
+ # ============================
1259
+ # 8. Scatter Plot (format visual tetap sam persis)
1260
+ # ============================
1261
+ max_x = risk_matrix['Finding Count'].max() + 1
1262
+ max_y = risk_matrix['Average Lead Time'].max() + 5
1263
  fig = px.scatter(
1264
  risk_matrix,
1265
  x='Finding Count',
1266
  y='Average Lead Time',
1267
+ hover_name="Operator Name",
1268
+ size=[12] * len(risk_matrix),
1269
+ size_max=15,
1270
+ title="Audit Findings Risk Matrix: Avg Monthly Count vs Lead Time"
 
 
 
 
 
1271
  )
1272
+ # Background quadrant (same as original)
 
1273
  fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=Y_LIMIT, y1=max_y,
1274
+ fillcolor="rgba(255,0,0,0.25)", line_width=0) # Q1
1275
  fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=Y_LIMIT, y1=max_y,
1276
+ fillcolor="rgba(255,150,50,0.25)", line_width=0) # Q2
1277
  fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=0, y1=Y_LIMIT,
1278
+ fillcolor="rgba(255,200,200,0.25)", line_width=0) # Q3
1279
  fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=0, y1=Y_LIMIT,
1280
+ fillcolor="rgba(0,120,255,0.15)", line_width=0) # Q4
1281
+ fig.add_vline(x=X_LIMIT, line_dash="dash", line_color="black")
1282
+ fig.add_hline(y=Y_LIMIT, line_dash="dash", line_color="black")
1283
+ # Quadrant count annotations (same positions & style)
1284
+ fig.add_annotation(x=X_LIMIT + (max_x - X_LIMIT)/2,
1285
+ y=Y_LIMIT + (max_y - Y_LIMIT)/2,
1286
+ text=f"<b>{quadrant_count.get('Quadrant I – High Leadtime & High Count',0)}</b>",
1287
+ showarrow=False, font=dict(size=22, color="darkred"))
1288
+ fig.add_annotation(x=X_LIMIT/2,
1289
+ y=Y_LIMIT + (max_y - Y_LIMIT)/2,
1290
+ text=f"<b>{quadrant_count.get('Quadrant II High Leadtime but Low Count',0)}</b>",
1291
+ showarrow=False, font=dict(size=22, color="orange"))
1292
+ fig.add_annotation(x=X_LIMIT + (max_x - X_LIMIT)/2,
1293
+ y=Y_LIMIT/2,
1294
+ text=f"<b>{quadrant_count.get('Quadrant III Low Leadtime but High Count',0)}</b>",
1295
+ showarrow=False, font=dict(size=22, color="red"))
1296
+ fig.add_annotation(x=X_LIMIT/2,
1297
+ y=Y_LIMIT/2,
1298
+ text=f"<b>{quadrant_count.get('Quadrant IV Low Leadtime & Low Count',0)}</b>",
1299
+ showarrow=False, font=dict(size=22, color="green"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1300
  st.plotly_chart(fig, use_container_width=True)
1301
+ # ============================
1302
+ # 9. Summary Table
1303
+ # ============================
1304
+ st.subheader("Summary (Avg Monthly Count vs Avg Lead Time)")
1305
  st.dataframe(
1306
+ risk_matrix.sort_values("Finding Count", ascending=False),
1307
+ use_container_width=True
 
 
 
 
1308
  )
 
1309
  except Exception as e:
1310
+ st.error(f"⚠️ Error Risk Matrix: {e}")
1311
+ # st.exception(e) # Uncomment for debugging
1312
 
1313
  # st.exception(e) # Uncomment for debugging
1314