Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1174,165 +1174,141 @@ else:
|
|
| 1174 |
st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
|
| 1175 |
|
| 1176 |
# =================== 5. Matrix (Tetap Dipertahankan) ===================
|
| 1177 |
-
|
|
|
|
| 1178 |
|
| 1179 |
import math
|
| 1180 |
import plotly.express as px
|
| 1181 |
import pandas as pd
|
| 1182 |
-
|
| 1183 |
try:
|
| 1184 |
df_local_matrix = df.copy()
|
| 1185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1186 |
# 1. Exclude Positive findings
|
|
|
|
| 1187 |
if 'temuan_kategori' in df_local_matrix.columns:
|
| 1188 |
df_local_matrix = df_local_matrix[df_local_matrix["temuan_kategori"] != "Positive"]
|
| 1189 |
-
|
| 1190 |
# 2. Ensure datetime columns
|
|
|
|
| 1191 |
df_local_matrix['created_at'] = pd.to_datetime(df_local_matrix['created_at'], errors='coerce')
|
| 1192 |
df_local_matrix['close_at'] = pd.to_datetime(df_local_matrix['close_at'], errors='coerce')
|
| 1193 |
-
|
| 1194 |
-
# 3. Compute LEAD TIME
|
|
|
|
| 1195 |
df_local_matrix['lead_time_days'] = (df_local_matrix['close_at'] - df_local_matrix['created_at']).dt.days
|
| 1196 |
df_local_matrix['lead_time_days'] = df_local_matrix['lead_time_days'].fillna(0)
|
| 1197 |
-
|
| 1198 |
-
# 4.
|
|
|
|
| 1199 |
if 'nama' not in df_local_matrix.columns:
|
| 1200 |
-
st.error("❌ Kolom 'nama' (
|
|
|
|
| 1201 |
else:
|
| 1202 |
-
#
|
| 1203 |
df_local_matrix = df_local_matrix.assign(month=df_local_matrix['created_at'].dt.to_period('M').astype(str))
|
| 1204 |
-
|
| 1205 |
-
# Count unique findings per division per month
|
| 1206 |
monthly_counts = (
|
| 1207 |
df_local_matrix
|
| 1208 |
.groupby(['nama', 'month'])['kode_temuan']
|
| 1209 |
.nunique()
|
| 1210 |
.reset_index(name='monthly_count')
|
| 1211 |
)
|
| 1212 |
-
|
| 1213 |
-
# Average findings per month per division
|
| 1214 |
operator_avg = (
|
| 1215 |
monthly_counts
|
| 1216 |
.groupby('nama')['monthly_count']
|
| 1217 |
-
.mean()
|
| 1218 |
.reset_index(name='Finding Count')
|
| 1219 |
)
|
| 1220 |
-
|
| 1221 |
-
# 5. Average Lead Time per
|
|
|
|
| 1222 |
operator_lead = (
|
| 1223 |
-
df_local_matrix
|
| 1224 |
-
.groupby('nama')['lead_time_days']
|
| 1225 |
.mean()
|
| 1226 |
-
.reset_index(name='Average Lead Time')
|
| 1227 |
)
|
| 1228 |
-
|
| 1229 |
-
# 6. Merge
|
|
|
|
| 1230 |
risk_matrix = operator_avg.merge(operator_lead, on='nama', how='left')
|
| 1231 |
-
risk_matrix = risk_matrix.rename(columns={'nama': '
|
| 1232 |
-
|
| 1233 |
-
|
| 1234 |
-
|
| 1235 |
-
|
| 1236 |
-
|
| 1237 |
-
|
| 1238 |
-
|
| 1239 |
-
Y_LIMIT = 3 # avg lead time (days) threshold
|
| 1240 |
-
|
| 1241 |
def assign_quadrant(row):
|
| 1242 |
-
|
| 1243 |
-
lt = row['Average Lead Time']
|
| 1244 |
-
if fc >= X_LIMIT and lt >= Y_LIMIT:
|
| 1245 |
return "Quadrant I – High Leadtime & High Count"
|
| 1246 |
-
elif
|
| 1247 |
return "Quadrant II – High Leadtime but Low Count"
|
| 1248 |
-
elif
|
| 1249 |
return "Quadrant III – Low Leadtime but High Count"
|
| 1250 |
else:
|
| 1251 |
return "Quadrant IV – Low Leadtime & Low Count"
|
| 1252 |
-
|
| 1253 |
risk_matrix['quadrant'] = risk_matrix.apply(assign_quadrant, axis=1)
|
| 1254 |
quadrant_count = risk_matrix['quadrant'].value_counts()
|
| 1255 |
-
|
| 1256 |
-
# 8. Plot
|
| 1257 |
-
|
| 1258 |
-
|
| 1259 |
-
|
| 1260 |
fig = px.scatter(
|
| 1261 |
risk_matrix,
|
| 1262 |
x='Finding Count',
|
| 1263 |
y='Average Lead Time',
|
| 1264 |
-
hover_name=
|
| 1265 |
-
|
| 1266 |
-
|
| 1267 |
-
|
| 1268 |
-
"Quadrant II – High Leadtime but Low Count": "orange",
|
| 1269 |
-
"Quadrant III – Low Leadtime but High Count": "tomato",
|
| 1270 |
-
"Quadrant IV – Low Leadtime & Low Count": "green"
|
| 1271 |
-
},
|
| 1272 |
-
height=500
|
| 1273 |
)
|
| 1274 |
-
|
| 1275 |
-
# Quadrant background shading
|
| 1276 |
fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=Y_LIMIT, y1=max_y,
|
| 1277 |
-
fillcolor="rgba(255,0,0,0.
|
| 1278 |
fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=Y_LIMIT, y1=max_y,
|
| 1279 |
-
fillcolor="rgba(255,150,50,0.
|
| 1280 |
fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=0, y1=Y_LIMIT,
|
| 1281 |
-
fillcolor="rgba(255,200,200,0.
|
| 1282 |
fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=0, y1=Y_LIMIT,
|
| 1283 |
-
fillcolor="rgba(0,120,255,0.
|
| 1284 |
-
|
| 1285 |
-
|
| 1286 |
-
|
| 1287 |
-
fig.
|
| 1288 |
-
|
| 1289 |
-
|
| 1290 |
-
|
| 1291 |
-
|
| 1292 |
-
|
| 1293 |
-
|
| 1294 |
-
|
| 1295 |
-
|
| 1296 |
-
|
| 1297 |
-
|
| 1298 |
-
|
| 1299 |
-
|
| 1300 |
-
|
| 1301 |
-
|
| 1302 |
-
|
| 1303 |
-
fig.add_annotation(
|
| 1304 |
-
x=x, y=y,
|
| 1305 |
-
text=f"<b>{label}<br>{int(cnt)}</b>",
|
| 1306 |
-
showarrow=False,
|
| 1307 |
-
font=dict(size=16, color=color),
|
| 1308 |
-
bgcolor="white",
|
| 1309 |
-
opacity=0.8
|
| 1310 |
-
)
|
| 1311 |
-
|
| 1312 |
-
# Axis & layout tuning
|
| 1313 |
-
fig.update_layout(
|
| 1314 |
-
xaxis_title="Average Monthly Findings per Division",
|
| 1315 |
-
yaxis_title="Average Lead Time (Days)",
|
| 1316 |
-
legend_title="Quadrant",
|
| 1317 |
-
margin=dict(t=40, b=40, l=40, r=40)
|
| 1318 |
-
)
|
| 1319 |
-
|
| 1320 |
st.plotly_chart(fig, use_container_width=True)
|
| 1321 |
-
|
| 1322 |
-
# 9. Summary Table
|
| 1323 |
-
|
|
|
|
| 1324 |
st.dataframe(
|
| 1325 |
-
risk_matrix
|
| 1326 |
-
|
| 1327 |
-
].sort_values("Finding Count", ascending=False)
|
| 1328 |
-
.round(2),
|
| 1329 |
-
use_container_width=True,
|
| 1330 |
-
hide_index=True
|
| 1331 |
)
|
| 1332 |
-
|
| 1333 |
except Exception as e:
|
| 1334 |
-
st.error(f"⚠️ Error
|
| 1335 |
-
st.exception(e)
|
| 1336 |
|
| 1337 |
# st.exception(e) # Uncomment for debugging
|
| 1338 |
|
|
|
|
| 1174 |
st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
|
| 1175 |
|
| 1176 |
# =================== 5. Matrix (Tetap Dipertahankan) ===================
|
| 1177 |
+
# =================== 5. Matrix (Tetap Dipertahankan) ===================
|
| 1178 |
+
st.markdown("<h3 class='section-title'>OBJECTIVE 5 - Findings vs Lead Time: Which Companies Move Slow?</h3>", unsafe_allow_html=True)
|
| 1179 |
|
| 1180 |
import math
|
| 1181 |
import plotly.express as px
|
| 1182 |
import pandas as pd
|
|
|
|
| 1183 |
try:
|
| 1184 |
df_local_matrix = df.copy()
|
| 1185 |
+
# ============================
|
| 1186 |
+
# 0. Filter: ONLY 1 COMPANY & 1 PROFILE (if applicable)
|
| 1187 |
+
# ============================
|
| 1188 |
+
# (Skipped for general dashboard view)
|
| 1189 |
+
# ============================
|
| 1190 |
# 1. Exclude Positive findings
|
| 1191 |
+
# ============================
|
| 1192 |
if 'temuan_kategori' in df_local_matrix.columns:
|
| 1193 |
df_local_matrix = df_local_matrix[df_local_matrix["temuan_kategori"] != "Positive"]
|
| 1194 |
+
# ============================
|
| 1195 |
# 2. Ensure datetime columns
|
| 1196 |
+
# ============================
|
| 1197 |
df_local_matrix['created_at'] = pd.to_datetime(df_local_matrix['created_at'], errors='coerce')
|
| 1198 |
df_local_matrix['close_at'] = pd.to_datetime(df_local_matrix['close_at'], errors='coerce')
|
| 1199 |
+
# ============================
|
| 1200 |
+
# 3. Compute LEAD TIME
|
| 1201 |
+
# ============================
|
| 1202 |
df_local_matrix['lead_time_days'] = (df_local_matrix['close_at'] - df_local_matrix['created_at']).dt.days
|
| 1203 |
df_local_matrix['lead_time_days'] = df_local_matrix['lead_time_days'].fillna(0)
|
| 1204 |
+
# ============================
|
| 1205 |
+
# 4. Average Monthly Finding Count per Operator
|
| 1206 |
+
# ============================
|
| 1207 |
if 'nama' not in df_local_matrix.columns:
|
| 1208 |
+
st.error("❌ Kolom 'nama' (operator) tidak ditemukan.")
|
| 1209 |
+
# st.stop() # Stop bisa dihilangkan agar script tetap jalan
|
| 1210 |
else:
|
| 1211 |
+
# Buat kolom bulan (YYYY-MM)
|
| 1212 |
df_local_matrix = df_local_matrix.assign(month=df_local_matrix['created_at'].dt.to_period('M').astype(str))
|
| 1213 |
+
# Hitung jumlah temuan per operator per bulan
|
|
|
|
| 1214 |
monthly_counts = (
|
| 1215 |
df_local_matrix
|
| 1216 |
.groupby(['nama', 'month'])['kode_temuan']
|
| 1217 |
.nunique()
|
| 1218 |
.reset_index(name='monthly_count')
|
| 1219 |
)
|
| 1220 |
+
# Hitung rata-rata bulanan per operator
|
|
|
|
| 1221 |
operator_avg = (
|
| 1222 |
monthly_counts
|
| 1223 |
.groupby('nama')['monthly_count']
|
| 1224 |
+
.mean() # <-- RATA-RATA per bulan (bukan total!)
|
| 1225 |
.reset_index(name='Finding Count')
|
| 1226 |
)
|
| 1227 |
+
# ============================
|
| 1228 |
+
# 5. Average Lead Time per Operator
|
| 1229 |
+
# ============================
|
| 1230 |
operator_lead = (
|
| 1231 |
+
df_local_matrix.groupby('nama')['lead_time_days']
|
|
|
|
| 1232 |
.mean()
|
| 1233 |
+
.reset_index(name='Average Lead Time')
|
| 1234 |
)
|
| 1235 |
+
# ============================
|
| 1236 |
+
# 6. Merge Risk Matrix
|
| 1237 |
+
# ============================
|
| 1238 |
risk_matrix = operator_avg.merge(operator_lead, on='nama', how='left')
|
| 1239 |
+
risk_matrix = risk_matrix.rename(columns={'nama': 'Operator Name'})
|
| 1240 |
+
# Handle operator tanpa lead time (e.g., belum closed)
|
| 1241 |
+
risk_matrix['Average Lead Time'] = risk_matrix['Average Lead Time'].fillna(0)
|
| 1242 |
+
# ============================
|
| 1243 |
+
# 7. Quadrant Logic (unchanged)
|
| 1244 |
+
# ============================
|
| 1245 |
+
X_LIMIT = 20
|
| 1246 |
+
Y_LIMIT = 3
|
|
|
|
|
|
|
| 1247 |
def assign_quadrant(row):
|
| 1248 |
+
if row['Finding Count'] >= X_LIMIT and row['Average Lead Time'] >= Y_LIMIT:
|
|
|
|
|
|
|
| 1249 |
return "Quadrant I – High Leadtime & High Count"
|
| 1250 |
+
elif row['Finding Count'] < X_LIMIT and row['Average Lead Time'] >= Y_LIMIT:
|
| 1251 |
return "Quadrant II – High Leadtime but Low Count"
|
| 1252 |
+
elif row['Finding Count'] >= X_LIMIT and row['Average Lead Time'] < Y_LIMIT:
|
| 1253 |
return "Quadrant III – Low Leadtime but High Count"
|
| 1254 |
else:
|
| 1255 |
return "Quadrant IV – Low Leadtime & Low Count"
|
|
|
|
| 1256 |
risk_matrix['quadrant'] = risk_matrix.apply(assign_quadrant, axis=1)
|
| 1257 |
quadrant_count = risk_matrix['quadrant'].value_counts()
|
| 1258 |
+
# ============================
|
| 1259 |
+
# 8. Scatter Plot (format visual tetap sam persis)
|
| 1260 |
+
# ============================
|
| 1261 |
+
max_x = risk_matrix['Finding Count'].max() + 1
|
| 1262 |
+
max_y = risk_matrix['Average Lead Time'].max() + 5
|
| 1263 |
fig = px.scatter(
|
| 1264 |
risk_matrix,
|
| 1265 |
x='Finding Count',
|
| 1266 |
y='Average Lead Time',
|
| 1267 |
+
hover_name="Operator Name",
|
| 1268 |
+
size=[12] * len(risk_matrix),
|
| 1269 |
+
size_max=15,
|
| 1270 |
+
title="Audit Findings Risk Matrix: Avg Monthly Count vs Lead Time"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1271 |
)
|
| 1272 |
+
# Background quadrant (same as original)
|
|
|
|
| 1273 |
fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=Y_LIMIT, y1=max_y,
|
| 1274 |
+
fillcolor="rgba(255,0,0,0.25)", line_width=0) # Q1
|
| 1275 |
fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=Y_LIMIT, y1=max_y,
|
| 1276 |
+
fillcolor="rgba(255,150,50,0.25)", line_width=0) # Q2
|
| 1277 |
fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=0, y1=Y_LIMIT,
|
| 1278 |
+
fillcolor="rgba(255,200,200,0.25)", line_width=0) # Q3
|
| 1279 |
fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=0, y1=Y_LIMIT,
|
| 1280 |
+
fillcolor="rgba(0,120,255,0.15)", line_width=0) # Q4
|
| 1281 |
+
fig.add_vline(x=X_LIMIT, line_dash="dash", line_color="black")
|
| 1282 |
+
fig.add_hline(y=Y_LIMIT, line_dash="dash", line_color="black")
|
| 1283 |
+
# Quadrant count annotations (same positions & style)
|
| 1284 |
+
fig.add_annotation(x=X_LIMIT + (max_x - X_LIMIT)/2,
|
| 1285 |
+
y=Y_LIMIT + (max_y - Y_LIMIT)/2,
|
| 1286 |
+
text=f"<b>{quadrant_count.get('Quadrant I – High Leadtime & High Count',0)}</b>",
|
| 1287 |
+
showarrow=False, font=dict(size=22, color="darkred"))
|
| 1288 |
+
fig.add_annotation(x=X_LIMIT/2,
|
| 1289 |
+
y=Y_LIMIT + (max_y - Y_LIMIT)/2,
|
| 1290 |
+
text=f"<b>{quadrant_count.get('Quadrant II – High Leadtime but Low Count',0)}</b>",
|
| 1291 |
+
showarrow=False, font=dict(size=22, color="orange"))
|
| 1292 |
+
fig.add_annotation(x=X_LIMIT + (max_x - X_LIMIT)/2,
|
| 1293 |
+
y=Y_LIMIT/2,
|
| 1294 |
+
text=f"<b>{quadrant_count.get('Quadrant III – Low Leadtime but High Count',0)}</b>",
|
| 1295 |
+
showarrow=False, font=dict(size=22, color="red"))
|
| 1296 |
+
fig.add_annotation(x=X_LIMIT/2,
|
| 1297 |
+
y=Y_LIMIT/2,
|
| 1298 |
+
text=f"<b>{quadrant_count.get('Quadrant IV – Low Leadtime & Low Count',0)}</b>",
|
| 1299 |
+
showarrow=False, font=dict(size=22, color="green"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1300 |
st.plotly_chart(fig, use_container_width=True)
|
| 1301 |
+
# ============================
|
| 1302 |
+
# 9. Summary Table
|
| 1303 |
+
# ============================
|
| 1304 |
+
st.subheader("Summary (Avg Monthly Count vs Avg Lead Time)")
|
| 1305 |
st.dataframe(
|
| 1306 |
+
risk_matrix.sort_values("Finding Count", ascending=False),
|
| 1307 |
+
use_container_width=True
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1308 |
)
|
|
|
|
| 1309 |
except Exception as e:
|
| 1310 |
+
st.error(f"⚠️ Error Risk Matrix: {e}")
|
| 1311 |
+
# st.exception(e) # Uncomment for debugging
|
| 1312 |
|
| 1313 |
# st.exception(e) # Uncomment for debugging
|
| 1314 |
|