Spaces:

bukittechnology
/

pln

Sleeping

App Files Files Community

SHELLAPANDIANGANHUNGING commited on Dec 11, 2025

Commit

e45b23a

verified ·

1 Parent(s): 41220fe

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -106

app.py CHANGED Viewed

@@ -1174,165 +1174,141 @@ else:
     st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
 # =================== 5. Matrix (Tetap Dipertahankan) ===================
-st.markdown("<h3 class='section-title'>OBJECTIVE 5 - Findings vs Lead Time: Which Divisions Move Slow?</h3>", unsafe_allow_html=True)
 import math
 import plotly.express as px
 import pandas as pd
 try:
     df_local_matrix = df.copy()
     # 1. Exclude Positive findings
     if 'temuan_kategori' in df_local_matrix.columns:
         df_local_matrix = df_local_matrix[df_local_matrix["temuan_kategori"] != "Positive"]
     # 2. Ensure datetime columns
     df_local_matrix['created_at'] = pd.to_datetime(df_local_matrix['created_at'], errors='coerce')
     df_local_matrix['close_at'] = pd.to_datetime(df_local_matrix['close_at'], errors='coerce')
-    # 3. Compute LEAD TIME in days
     df_local_matrix['lead_time_days'] = (df_local_matrix['close_at'] - df_local_matrix['created_at']).dt.days
     df_local_matrix['lead_time_days'] = df_local_matrix['lead_time_days'].fillna(0)
-    # 4. Avg Monthly Finding Count per Division ('nama')
     if 'nama' not in df_local_matrix.columns:
-        st.error("❌ Kolom 'nama' (Division/Operator) tidak ditemukan.")
     else:
-        # Create YYYY-MM month column
         df_local_matrix = df_local_matrix.assign(month=df_local_matrix['created_at'].dt.to_period('M').astype(str))
-        # Count unique findings per division per month
         monthly_counts = (
             df_local_matrix
             .groupby(['nama', 'month'])['kode_temuan']
             .nunique()
             .reset_index(name='monthly_count')
         )
-        # Average findings per month per division
         operator_avg = (
             monthly_counts
             .groupby('nama')['monthly_count']
-            .mean()
             .reset_index(name='Finding Count')
         )
-        # 5. Average Lead Time per Division
         operator_lead = (
-            df_local_matrix
-            .groupby('nama')['lead_time_days']
             .mean()
-            .reset_index(name='Average Lead Time')  # 🔑 Renamed here!
         )
-        # 6. Merge → Risk Matrix
         risk_matrix = operator_avg.merge(operator_lead, on='nama', how='left')
-        risk_matrix = risk_matrix.rename(columns={'nama': 'Division'})
-        # Fill missing lead time (e.g., no closed findings) with 0
-        risk_matrix['Average Lead Time'] = risk_matrix['Average Lead Time'].fillna(0).round(2)
-        risk_matrix['Finding Count'] = risk_matrix['Finding Count'].round(2)
-        # 7. Quadrant Logic
-        X_LIMIT = 20  # avg findings/month threshold
-        Y_LIMIT = 3   # avg lead time (days) threshold
         def assign_quadrant(row):
-            fc = row['Finding Count']
-            lt = row['Average Lead Time']
-            if fc >= X_LIMIT and lt >= Y_LIMIT:
                 return "Quadrant I – High Leadtime & High Count"
-            elif fc < X_LIMIT and lt >= Y_LIMIT:
                 return "Quadrant II – High Leadtime but Low Count"
-            elif fc >= X_LIMIT and lt < Y_LIMIT:
                 return "Quadrant III – Low Leadtime but High Count"
             else:
                 return "Quadrant IV – Low Leadtime & Low Count"
         risk_matrix['quadrant'] = risk_matrix.apply(assign_quadrant, axis=1)
         quadrant_count = risk_matrix['quadrant'].value_counts()
-        # 8. Plot Scatter with Quadrant Backgrounds
-        max_x = max(risk_matrix['Finding Count'].max() + 1, X_LIMIT + 5)
-        max_y = max(risk_matrix['Average Lead Time'].max() + 5, Y_LIMIT + 5)
         fig = px.scatter(
             risk_matrix,
             x='Finding Count',
             y='Average Lead Time',
-            hover_name='Division',
-            color='quadrant',
-            color_discrete_map={
-                "Quadrant I – High Leadtime & High Count": "darkred",
-                "Quadrant II – High Leadtime but Low Count": "orange",
-                "Quadrant III – Low Leadtime but High Count": "tomato",
-                "Quadrant IV – Low Leadtime & Low Count": "green"
-            },
-            height=500
         )
-        # Quadrant background shading
         fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=Y_LIMIT, y1=max_y,
-                      fillcolor="rgba(255,0,0,0.15)", line_width=0, layer="below")
         fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=Y_LIMIT, y1=max_y,
-                      fillcolor="rgba(255,150,50,0.15)", line_width=0, layer="below")
         fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=0, y1=Y_LIMIT,
-                      fillcolor="rgba(255,200,200,0.15)", line_width=0, layer="below")
         fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=0, y1=Y_LIMIT,
-                      fillcolor="rgba(0,120,255,0.10)", line_width=0, layer="below")
-        # Threshold lines
-        fig.add_vline(x=X_LIMIT, line_dash="dash", line_color="gray", opacity=0.7)
-        fig.add_hline(y=Y_LIMIT, line_dash="dash", line_color="gray", opacity=0.7)
-        # Quadrant labels & counts
-        annotations = [
-            (X_LIMIT + (max_x - X_LIMIT)/2, Y_LIMIT + (max_y - Y_LIMIT)/2, "Q1", "darkred"),
-            (X_LIMIT/2, Y_LIMIT + (max_y - Y_LIMIT)/2, "Q2", "orange"),
-            (X_LIMIT + (max_x - X_LIMIT)/2, Y_LIMIT/2, "Q3", "tomato"),
-            (X_LIMIT/2, Y_LIMIT/2, "Q4", "green"),
-        ]
-        for x, y, label, color in annotations:
-            count = quadrant_count.get(f"Quadrant {'I' if label=='Q1' else 'II' if label=='Q2' else 'III' if label=='Q3' else 'IV'}– ...",0)
-            # Safer lookup: reconstruct key
-            key = next((k for k in quadrant_count.index if label in k), None)
-            cnt = quadrant_count.get(key, 0) if key else 0
-            fig.add_annotation(
-                x=x, y=y,
-                text=f"<b>{label}<br>{int(cnt)}</b>",
-                showarrow=False,
-                font=dict(size=16, color=color),
-                bgcolor="white",
-                opacity=0.8
-            )
-        # Axis & layout tuning
-        fig.update_layout(
-            xaxis_title="Average Monthly Findings per Division",
-            yaxis_title="Average Lead Time (Days)",
-            legend_title="Quadrant",
-            margin=dict(t=40, b=40, l=40, r=40)
-        )
         st.plotly_chart(fig, use_container_width=True)
-        # 9. Summary Table (sorted descending by Finding Count)
-        st.markdown("<h4 style='text-align:center;'>Division Performance Summary</h4>", unsafe_allow_html=True)
         st.dataframe(
-            risk_matrix[
-                ['Division', 'Finding Count', 'Average Lead Time', 'quadrant']
-            ].sort_values("Finding Count", ascending=False)
-            .round(2),
-            use_container_width=True,
-            hide_index=True
         )
 except Exception as e:
-    st.error(f"⚠️ Error in Risk Matrix (Objective 5): {e}")
-    st.exception(e)  # Optional: show full traceback during dev
 # st.exception(e) # Uncomment for debugging

     st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
 # =================== 5. Matrix (Tetap Dipertahankan) ===================
+# =================== 5. Matrix (Tetap Dipertahankan) ===================
+st.markdown("<h3 class='section-title'>OBJECTIVE 5 - Findings vs Lead Time: Which Companies Move Slow?</h3>", unsafe_allow_html=True)
 import math
 import plotly.express as px
 import pandas as pd
 try:
     df_local_matrix = df.copy()
+    # ============================
+    # 0. Filter: ONLY 1 COMPANY & 1 PROFILE (if applicable)
+    # ============================
+    # (Skipped for general dashboard view)
+    # ============================
     # 1. Exclude Positive findings
+    # ============================
     if 'temuan_kategori' in df_local_matrix.columns:
         df_local_matrix = df_local_matrix[df_local_matrix["temuan_kategori"] != "Positive"]
+    # ============================
     # 2. Ensure datetime columns
+    # ============================
     df_local_matrix['created_at'] = pd.to_datetime(df_local_matrix['created_at'], errors='coerce')
     df_local_matrix['close_at'] = pd.to_datetime(df_local_matrix['close_at'], errors='coerce')
+    # ============================
+    # 3. Compute LEAD TIME
+    # ============================
     df_local_matrix['lead_time_days'] = (df_local_matrix['close_at'] - df_local_matrix['created_at']).dt.days
     df_local_matrix['lead_time_days'] = df_local_matrix['lead_time_days'].fillna(0)
+    # ============================
+    # 4. Average Monthly Finding Count per Operator
+    # ============================
     if 'nama' not in df_local_matrix.columns:
+        st.error("❌ Kolom 'nama' (operator) tidak ditemukan.")
+        # st.stop() # Stop bisa dihilangkan agar script tetap jalan
     else:
+        # Buat kolom bulan (YYYY-MM)
         df_local_matrix = df_local_matrix.assign(month=df_local_matrix['created_at'].dt.to_period('M').astype(str))
+        # Hitung jumlah temuan per operator per bulan
         monthly_counts = (
             df_local_matrix
             .groupby(['nama', 'month'])['kode_temuan']
             .nunique()
             .reset_index(name='monthly_count')
         )
+        # Hitung rata-rata bulanan per operator
         operator_avg = (
             monthly_counts
             .groupby('nama')['monthly_count']
+            .mean()  # <-- RATA-RATA per bulan (bukan total!)
             .reset_index(name='Finding Count')
         )
+        # ============================
+        # 5. Average Lead Time per Operator
+        # ============================
         operator_lead = (
+            df_local_matrix.groupby('nama')['lead_time_days']
             .mean()
+            .reset_index(name='Average Lead Time')
         )
+        # ============================
+        # 6. Merge Risk Matrix
+        # ============================
         risk_matrix = operator_avg.merge(operator_lead, on='nama', how='left')
+        risk_matrix = risk_matrix.rename(columns={'nama': 'Operator Name'})
+        # Handle operator tanpa lead time (e.g., belum closed)
+        risk_matrix['Average Lead Time'] = risk_matrix['Average Lead Time'].fillna(0)
+        # ============================
+        # 7. Quadrant Logic (unchanged)
+        # ============================
+        X_LIMIT = 20
+        Y_LIMIT = 3
         def assign_quadrant(row):
+            if row['Finding Count'] >= X_LIMIT and row['Average Lead Time'] >= Y_LIMIT:
                 return "Quadrant I – High Leadtime & High Count"
+            elif row['Finding Count'] < X_LIMIT and row['Average Lead Time'] >= Y_LIMIT:
                 return "Quadrant II – High Leadtime but Low Count"
+            elif row['Finding Count'] >= X_LIMIT and row['Average Lead Time'] < Y_LIMIT:
                 return "Quadrant III – Low Leadtime but High Count"
             else:
                 return "Quadrant IV – Low Leadtime & Low Count"
         risk_matrix['quadrant'] = risk_matrix.apply(assign_quadrant, axis=1)
         quadrant_count = risk_matrix['quadrant'].value_counts()
+        # ============================
+        # 8. Scatter Plot (format visual tetap sam persis)
+        # ============================
+        max_x = risk_matrix['Finding Count'].max() + 1
+        max_y = risk_matrix['Average Lead Time'].max() + 5
         fig = px.scatter(
             risk_matrix,
             x='Finding Count',
             y='Average Lead Time',
+            hover_name="Operator Name",
+            size=[12] * len(risk_matrix),
+            size_max=15,
+            title="Audit Findings Risk Matrix: Avg Monthly Count vs Lead Time"
         )
+        # Background quadrant (same as original)
         fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=Y_LIMIT, y1=max_y,
+                      fillcolor="rgba(255,0,0,0.25)", line_width=0)      # Q1
         fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=Y_LIMIT, y1=max_y,
+                      fillcolor="rgba(255,150,50,0.25)", line_width=0)  # Q2
         fig.add_shape(type="rect", x0=X_LIMIT, x1=max_x, y0=0, y1=Y_LIMIT,
+                      fillcolor="rgba(255,200,200,0.25)", line_width=0) # Q3
         fig.add_shape(type="rect", x0=0, x1=X_LIMIT, y0=0, y1=Y_LIMIT,
+                      fillcolor="rgba(0,120,255,0.15)", line_width=0)   # Q4
+        fig.add_vline(x=X_LIMIT, line_dash="dash", line_color="black")
+        fig.add_hline(y=Y_LIMIT, line_dash="dash", line_color="black")
+        # Quadrant count annotations (same positions & style)
+        fig.add_annotation(x=X_LIMIT + (max_x - X_LIMIT)/2,
+                           y=Y_LIMIT + (max_y - Y_LIMIT)/2,
+                           text=f"<b>{quadrant_count.get('Quadrant I – High Leadtime & High Count',0)}</b>",
+                           showarrow=False, font=dict(size=22, color="darkred"))
+        fig.add_annotation(x=X_LIMIT/2,
+                           y=Y_LIMIT + (max_y - Y_LIMIT)/2,
+                           text=f"<b>{quadrant_count.get('Quadrant II – High Leadtime but Low Count',0)}</b>",
+                           showarrow=False, font=dict(size=22, color="orange"))
+        fig.add_annotation(x=X_LIMIT + (max_x - X_LIMIT)/2,
+                           y=Y_LIMIT/2,
+                           text=f"<b>{quadrant_count.get('Quadrant III – Low Leadtime but High Count',0)}</b>",
+                           showarrow=False, font=dict(size=22, color="red"))
+        fig.add_annotation(x=X_LIMIT/2,
+                           y=Y_LIMIT/2,
+                           text=f"<b>{quadrant_count.get('Quadrant IV – Low Leadtime & Low Count',0)}</b>",
+                           showarrow=False, font=dict(size=22, color="green"))
         st.plotly_chart(fig, use_container_width=True)
+        # ============================
+        # 9. Summary Table
+        # ============================
+        st.subheader("Summary (Avg Monthly Count vs Avg Lead Time)")
         st.dataframe(
+            risk_matrix.sort_values("Finding Count", ascending=False),
+            use_container_width=True
         )
 except Exception as e:
+    st.error(f"⚠️ Error Risk Matrix: {e}")
+    # st.exception(e) # Uncomment for debugging
 # st.exception(e) # Uncomment for debugging