SHELLAPANDIANGANHUNGING commited on
Commit
a70f541
·
verified ·
1 Parent(s): 7457555

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +228 -68
app.py CHANGED
@@ -1260,15 +1260,6 @@ except Exception as e:
1260
  st.error(f"⚠️ Error Risk Matrix: {e}")
1261
  # st.exception(e) # Uncomment for debugging
1262
 
1263
- # =================== 7. PREDICTIVE INSIGHTS (FINAL — PLN BLUE EDITION v2) ===================
1264
- # ✅ Panel 1: ONLY Coverage < 90% AND Slope < 0
1265
- # ✅ Panel 3: ONLY Coverage = 100% AND Slope > 0 → Avg/Month
1266
- # ✅ Estetik: Sortable, Hover, Zebra, PLN Blue, No Emoticons
1267
- import streamlit as st
1268
- import plotly.graph_objects as go
1269
- import numpy as np
1270
- import pandas as pd
1271
-
1272
  import streamlit as st
1273
  import plotly.graph_objects as go
1274
  import numpy as np
@@ -1413,7 +1404,9 @@ function makeSortable(tableId) {
1413
  }
1414
  setTimeout(() => {
1415
  makeSortable('tbl-creators');
1416
- makeSortable('tbl-issues');
 
 
1417
  }, 800);
1418
  </script>
1419
  """, unsafe_allow_html=True)
@@ -1435,11 +1428,9 @@ def ascii_sparkline_pln(data):
1435
  except:
1436
  return "<span class='spark' style='color:#999;'>▁▁▁</span>"
1437
 
1438
- # ——————— 1. Creators: ONLY Coverage < 90% AND Slope < 0 (Non-Positive Only) ———————
1439
  def predict_creators(df):
1440
- # 🔥 Filter: Hanya yang bukan 'Positive'
1441
- df = df[df['temuan_kategori'] != 'Positive'].copy() # ✅ Filter non-Positive
1442
-
1443
  if 'creator_name' not in df.columns or df.empty:
1444
  return pd.DataFrame()
1445
 
@@ -1485,13 +1476,109 @@ def predict_creators(df):
1485
  # ✅ Ambil 10 creator dengan slope paling negatif (paling turun)
1486
  return df_res.sort_values('Trend Slope', ascending=True).head(10) if not df_res.empty else df_res
1487
 
1488
- # ——————— 3. Issues: ONLY Coverage=100% & Trend Slope > 0 → Avg/Month ———————
1489
- def predict_issues(df):
1490
- if 'kategori' not in df.columns or df.empty:
 
1491
  return pd.DataFrame()
1492
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1493
  # 🔥 Filter: Hanya yang bukan 'Positive'
1494
- df = df[df['kategori'] != 'Positive'].copy() # ✅ Filter non-Positive
 
 
 
1495
 
1496
  start_month = df['created_at'].min().to_period('M')
1497
  end_month = df['created_at'].max().to_period('M')
@@ -1499,7 +1586,7 @@ def predict_issues(df):
1499
  n_months = len(all_months)
1500
 
1501
  results = []
1502
- for cat, group in df.groupby('kategori'):
1503
  ts_data = (
1504
  group.groupby(group['created_at'].dt.to_period('M'))
1505
  .size()
@@ -1540,13 +1627,15 @@ def predict_issues(df):
1540
  )
1541
  df_res = df_res.sort_values('Trend Slope', ascending=False)
1542
 
1543
- return df_res.reset_index(drop=True)
1544
 
1545
  # ——————— RUN ———————
1546
  df_creator = predict_creators(df_filtered)
1547
- df_issue = predict_issues(df_filtered)
 
 
1548
 
1549
- # 🎯 PANEL 1: Creators (FILTERED: Coverage < 90% & Slope < 0) — Non-Positive Only
1550
  st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
1551
  st.markdown("<div class='predictive-header'>1. Which Reporters Are Predicted to Have No Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
1552
  if not df_creator.empty:
@@ -1582,11 +1671,83 @@ if not df_creator.empty:
1582
  # )
1583
  st.markdown("</div>", unsafe_allow_html=True)
1584
 
1585
- # 🎯 PANEL 3: Issues (FILTERED: Coverage=100% & Rising) Hanya Non-Positive
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1586
  st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
1587
  st.markdown(
1588
  "<div class='predictive-header'>"
1589
- "2. Which Issue Categories Are Likely to Appear in the Next 3 Months (Non Positive Only)"
1590
  "<span style='font-size:0.75em; font-weight:400; color:#003DA5;'>"
1591
  " &nbsp;&nbsp;(* Categorization uses NLP — Natural Language Processing from random text)"
1592
  "</span>"
@@ -1594,48 +1755,48 @@ st.markdown(
1594
  unsafe_allow_html=True
1595
  )
1596
 
1597
- # if not df_issue.empty:
1598
- # cols = ['Category', 'Avg/Month', 'Coverage (%)', 'Trend Slope', 'Status', 'Trend']
1599
-
1600
- # # 🔵 Rename ONLY for display
1601
- # df_display = df_issue[cols].rename(columns={
1602
- # "Status": "Status Issue for Next Month"
1603
- # })
1604
-
1605
- # html = df_display.to_html(escape=False, index=False, table_id="tbl-issues")
1606
- # st.markdown(f"<div class='predictive-table-wrapper'>{html}</div>", unsafe_allow_html=True)
1607
-
1608
- # # st.markdown(
1609
- # # "<div class='predictive-note'>"
1610
- # # "<strong>Filtered:</strong> Reported every month (100% coverage) with increasing trend. "
1611
- # # "<strong>Avg/Month</strong> = total ÷ months. "
1612
- # # "<span class='trend-rising'>High-Risk Rising</span> = slope > 0.2."
1613
- # # "</div>",
1614
- # # unsafe_allow_html=True
1615
- # # )
1616
-
1617
- # # else:
1618
- # # st.markdown(
1619
- # # "<div class='predictive-table-wrapper'>"
1620
- # # "<p style='text-align:center; color:#c62828; padding:24px; font-weight:500;'>"
1621
- # # "⚠️ No rising categories with 100% monthly coverage."
1622
- # # "</p>"
1623
- # # "<p style='text-align:center; color:#666; font-size:0.9em;'>"
1624
- # # "Consider relaxing coverage filter if data is sparse."
1625
- # # "</p></div>",
1626
- # # unsafe_allow_html=True
1627
- # # )
1628
-
1629
- # st.markdown("</div>", unsafe_allow_html=True)
1630
-
1631
-
1632
- # # =================== WHITEBOARD STYLE CHART FOR PANEL 3 ===================
1633
- # st.markdown("<h4 style='text-align: center; color: #2c3e50;'>Whiteboard Insight: Trend vs Frequency</h4>", unsafe_allow_html=True)
1634
-
1635
- # # Buat chart scatter dengan gaya whiteboard
1636
- if not df_issue.empty:
1637
  # Ambil data untuk scatter
1638
- df_plot = df_issue.copy()
1639
  df_plot['Size'] = df_plot['Avg/Month'] # Ukuran lingkaran = frekuensi (Avg/Month)
1640
  df_plot['Y'] = df_plot['Trend Slope'] # Y = Trend Slope
1641
 
@@ -1661,7 +1822,7 @@ if not df_issue.empty:
1661
 
1662
  # Layout
1663
  fig.update_layout(
1664
- title=dict(text="<b>Issue Trend vs Frequency (Non-Positive)</b>", x=0.5, y=0.95),
1665
  xaxis=dict(
1666
  title="Category",
1667
  tickangle=45,
@@ -1733,8 +1894,7 @@ if not df_issue.empty:
1733
  )
1734
  st.markdown(insight_text, unsafe_allow_html=True)
1735
  else:
1736
- st.info("No data available for non-positive issues with 100% coverage and positive trend.")
1737
- # =================== 6. ✅ AI INSIGHT ENGINE (BARU - BERDASARKAN DATA & RATIO) ===================
1738
 
1739
  st.markdown("<h3 class='section-title'>OBJECTIVE 7 - Insight and Recommendation</h3>", unsafe_allow_html=True)
1740
 
 
1260
  st.error(f"⚠️ Error Risk Matrix: {e}")
1261
  # st.exception(e) # Uncomment for debugging
1262
 
 
 
 
 
 
 
 
 
 
1263
  import streamlit as st
1264
  import plotly.graph_objects as go
1265
  import numpy as np
 
1404
  }
1405
  setTimeout(() => {
1406
  makeSortable('tbl-creators');
1407
+ makeSortable('tbl-locations');
1408
+ makeSortable('tbl-divisions');
1409
+ makeSortable('tbl-categories');
1410
  }, 800);
1411
  </script>
1412
  """, unsafe_allow_html=True)
 
1428
  except:
1429
  return "<span class='spark' style='color:#999;'>▁▁▁</span>"
1430
 
1431
+ # ——————— 1. Creators: ONLY Coverage < 90% AND Slope < 0 ———————
1432
  def predict_creators(df):
1433
+ # Tidak ada filter Non-Positive
 
 
1434
  if 'creator_name' not in df.columns or df.empty:
1435
  return pd.DataFrame()
1436
 
 
1476
  # ✅ Ambil 10 creator dengan slope paling negatif (paling turun)
1477
  return df_res.sort_values('Trend Slope', ascending=True).head(10) if not df_res.empty else df_res
1478
 
1479
+ # ——————— 2. Locations: ONLY Coverage < 90% AND Slope < 0 ———————
1480
+ def predict_locations(df):
1481
+ # Tidak ada filter Non-Positive
1482
+ if 'nama_lokasi_full' not in df.columns or df.empty:
1483
  return pd.DataFrame()
1484
 
1485
+ start_month = df['created_at'].min().to_period('M')
1486
+ end_month = df['created_at'].max().to_period('M')
1487
+ all_months = pd.period_range(start=start_month, end=end_month, freq='M')
1488
+
1489
+ df_monthly = (
1490
+ df.groupby(['nama_lokasi_full', df['created_at'].dt.to_period('M')])
1491
+ .size()
1492
+ .unstack(fill_value=0)
1493
+ .reindex(columns=all_months, fill_value=0)
1494
+ .stack()
1495
+ .reset_index(name='count')
1496
+ )
1497
+ df_monthly.columns = ['Location', 'Month', 'Count']
1498
+
1499
+ results = []
1500
+ for lokasi, group in df_monthly.groupby('Location'):
1501
+ ts = group.set_index('Month')['Count']
1502
+ total = len(all_months)
1503
+ active = (ts > 0).sum()
1504
+ coverage = active / total if total > 0 else 0
1505
+ avg_rate = ts.mean()
1506
+
1507
+ if len(ts) >= 2:
1508
+ try:
1509
+ slope = np.polyfit(np.arange(len(ts)), ts.values, 1)[0]
1510
+ # ✅ FILTER: Coverage < 90% AND Slope < 0
1511
+ if slope < 0 and coverage < 0.9:
1512
+ reason = f"Slope = {slope:.3f}, Coverage = {coverage*100:.1f}%. Avg: {avg_rate:.2f}/mo."
1513
+ results.append({
1514
+ 'Location': lokasi,
1515
+ 'Avg Reports/Month': round(avg_rate, 2),
1516
+ 'Coverage (%)': round(coverage * 100, 1),
1517
+ 'Trend Slope': round(slope, 3),
1518
+ 'Trend': ascii_sparkline_pln(ts.values.tolist()),
1519
+ 'Reason': reason
1520
+ })
1521
+ except:
1522
+ continue
1523
+ df_res = pd.DataFrame(results)
1524
+ # ✅ Ambil 10 lokasi dengan slope paling negatif (paling turun)
1525
+ return df_res.sort_values('Trend Slope', ascending=True).head(10) if not df_res.empty else df_res
1526
+
1527
+ # ——————— 3. Divisions: ONLY Coverage < 90% AND Slope < 0 ———————
1528
+ def predict_divisions(df):
1529
+ # ❌ Tidak ada filter Non-Positive
1530
+ if 'nama' not in df.columns or df.empty:
1531
+ return pd.DataFrame()
1532
+
1533
+ start_month = df['created_at'].min().to_period('M')
1534
+ end_month = df['created_at'].max().to_period('M')
1535
+ all_months = pd.period_range(start=start_month, end=end_month, freq='M')
1536
+
1537
+ df_monthly = (
1538
+ df.groupby(['nama', df['created_at'].dt.to_period('M')])
1539
+ .size()
1540
+ .unstack(fill_value=0)
1541
+ .reindex(columns=all_months, fill_value=0)
1542
+ .stack()
1543
+ .reset_index(name='count')
1544
+ )
1545
+ df_monthly.columns = ['Division', 'Month', 'Count']
1546
+
1547
+ results = []
1548
+ for div, group in df_monthly.groupby('Division'):
1549
+ ts = group.set_index('Month')['Count']
1550
+ total = len(all_months)
1551
+ active = (ts > 0).sum()
1552
+ coverage = active / total if total > 0 else 0
1553
+ avg_rate = ts.mean()
1554
+
1555
+ if len(ts) >= 2:
1556
+ try:
1557
+ slope = np.polyfit(np.arange(len(ts)), ts.values, 1)[0]
1558
+ # ✅ FILTER: Coverage < 90% AND Slope < 0
1559
+ if slope < 0 and coverage < 0.9:
1560
+ reason = f"Slope = {slope:.3f}, Coverage = {coverage*100:.1f}%. Avg: {avg_rate:.2f}/mo."
1561
+ results.append({
1562
+ 'Division': div,
1563
+ 'Avg Reports/Month': round(avg_rate, 2),
1564
+ 'Coverage (%)': round(coverage * 100, 1),
1565
+ 'Trend Slope': round(slope, 3),
1566
+ 'Trend': ascii_sparkline_pln(ts.values.tolist()),
1567
+ 'Reason': reason
1568
+ })
1569
+ except:
1570
+ continue
1571
+ df_res = pd.DataFrame(results)
1572
+ # ✅ Ambil 10 divisi dengan slope paling negatif (paling turun)
1573
+ return df_res.sort_values('Trend Slope', ascending=True).head(10) if not df_res.empty else df_res
1574
+
1575
+ # ——————— 4. Categories: ONLY Non-Positive + Coverage=100% & Trend Slope > 0 ———————
1576
+ def predict_categories(df):
1577
  # 🔥 Filter: Hanya yang bukan 'Positive'
1578
+ df = df[df['temuan_kategori'] != 'Positive'].copy() # ✅ Filter non-Positive
1579
+
1580
+ if 'temuan_kategori' not in df.columns or df.empty:
1581
+ return pd.DataFrame()
1582
 
1583
  start_month = df['created_at'].min().to_period('M')
1584
  end_month = df['created_at'].max().to_period('M')
 
1586
  n_months = len(all_months)
1587
 
1588
  results = []
1589
+ for cat, group in df.groupby('temuan_kategori'):
1590
  ts_data = (
1591
  group.groupby(group['created_at'].dt.to_period('M'))
1592
  .size()
 
1627
  )
1628
  df_res = df_res.sort_values('Trend Slope', ascending=False)
1629
 
1630
+ return df_res.reset_index(drop=True).head(10) if not df_res.empty else df_res
1631
 
1632
  # ——————— RUN ———————
1633
  df_creator = predict_creators(df_filtered)
1634
+ df_location = predict_locations(df_filtered)
1635
+ df_division = predict_divisions(df_filtered)
1636
+ df_category = predict_categories(df_filtered)
1637
 
1638
+ # 🎯 PANEL 1: Creators (FILTERED: Coverage < 90% & Slope < 0)
1639
  st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
1640
  st.markdown("<div class='predictive-header'>1. Which Reporters Are Predicted to Have No Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
1641
  if not df_creator.empty:
 
1671
  # )
1672
  st.markdown("</div>", unsafe_allow_html=True)
1673
 
1674
+ # 🎯 PANEL 2: Locations (FILTERED: Coverage < 90% & Slope < 0)
1675
+ st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
1676
+ st.markdown("<div class='predictive-header'>2. Which Locations Are Predicted to Have No Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
1677
+ if not df_location.empty:
1678
+ cols = ['Location', 'Avg Reports/Month', 'Coverage (%)', 'Trend Slope', 'Trend', 'Reason']
1679
+
1680
+ # 🔥 Rename hanya untuk DISPLAY, bukan data asli
1681
+ df_display = df_location[cols].rename(columns={
1682
+ "Reason": "Reason Forecast"
1683
+ })
1684
+
1685
+ html = df_display.to_html(escape=False, index=False, table_id="tbl-locations")
1686
+ st.markdown(f"<div class='predictive-table-wrapper'>{html}</div>", unsafe_allow_html=True)
1687
+
1688
+ # st.markdown(
1689
+ # "<div class='predictive-note'>"
1690
+ # "<strong>Criteria:</strong> Coverage < 90% AND negative slope. "
1691
+ # "High-risk: steep negative slope + low baseline activity."
1692
+ # "</div>",
1693
+ # unsafe_allow_html=True
1694
+ # )
1695
+
1696
+ # else:
1697
+ # st.markdown(
1698
+ # "<div class='predictive-table-wrapper'>"
1699
+ # "<p style='text-align:center; color:#666; padding:24px; font-style:italic;'>"
1700
+ # "No locations meet criteria: Coverage < 90% and negative trend."
1701
+ # "</p>"
1702
+ # "<div class='warning-box'>"
1703
+ # "💡 Note: Locations with Coverage ≥ 90% are excluded — they are considered stable reporting zones."
1704
+ # "</div>"
1705
+ # "</div>",
1706
+ # unsafe_allow_html=True
1707
+ # )
1708
+ st.markdown("</div>", unsafe_allow_html=True)
1709
+
1710
+ # 🎯 PANEL 3: Divisions (FILTERED: Coverage < 90% & Slope < 0)
1711
+ st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
1712
+ st.markdown("<div class='predictive-header'>3. Which Divisions Are Predicted to Have No Future Inspections? (Top 10 Most Declining)</div>", unsafe_allow_html=True)
1713
+ if not df_division.empty:
1714
+ cols = ['Division', 'Avg Reports/Month', 'Coverage (%)', 'Trend Slope', 'Trend', 'Reason']
1715
+
1716
+ # 🔥 Rename hanya untuk DISPLAY, bukan data asli
1717
+ df_display = df_division[cols].rename(columns={
1718
+ "Reason": "Reason Forecast"
1719
+ })
1720
+
1721
+ html = df_display.to_html(escape=False, index=False, table_id="tbl-divisions")
1722
+ st.markdown(f"<div class='predictive-table-wrapper'>{html}</div>", unsafe_allow_html=True)
1723
+
1724
+ # st.markdown(
1725
+ # "<div class='predictive-note'>"
1726
+ # "<strong>Criteria:</strong> Coverage < 90% AND negative slope. "
1727
+ # "High-risk: steep negative slope + low baseline activity."
1728
+ # "</div>",
1729
+ # unsafe_allow_html=True
1730
+ # )
1731
+
1732
+ # else:
1733
+ # st.markdown(
1734
+ # "<div class='predictive-table-wrapper'>"
1735
+ # "<p style='text-align:center; color:#666; padding:24px; font-style:italic;'>"
1736
+ # "No divisions meet criteria: Coverage < 90% and negative trend."
1737
+ # "</p>"
1738
+ # "<div class='warning-box'>"
1739
+ # "💡 Note: Divisions with Coverage ≥ 90% are excluded — they are considered stable reporting zones."
1740
+ # "</div>"
1741
+ # "</div>",
1742
+ # unsafe_allow_html=True
1743
+ # )
1744
+ st.markdown("</div>", unsafe_allow_html=True)
1745
+
1746
+ # 🎯 PANEL 4: Categories (FILTERED: Non-Positive + Coverage=100% & Rising)
1747
  st.markdown("<div class='predictive-panel'>", unsafe_allow_html=True)
1748
  st.markdown(
1749
  "<div class='predictive-header'>"
1750
+ "4. Which Issue Categories Are Likely to Appear in the Next 3 Months (Non-Positive Only)"
1751
  "<span style='font-size:0.75em; font-weight:400; color:#003DA5;'>"
1752
  " &nbsp;&nbsp;(* Categorization uses NLP — Natural Language Processing from random text)"
1753
  "</span>"
 
1755
  unsafe_allow_html=True
1756
  )
1757
 
1758
+ if not df_category.empty:
1759
+ cols = ['Category', 'Avg/Month', 'Coverage (%)', 'Trend Slope', 'Status', 'Trend']
1760
+
1761
+ # 🔵 Rename ONLY for display
1762
+ df_display = df_category[cols].rename(columns={
1763
+ "Status": "Status Issue for Next Month"
1764
+ })
1765
+
1766
+ html = df_display.to_html(escape=False, index=False, table_id="tbl-categories")
1767
+ st.markdown(f"<div class='predictive-table-wrapper'>{html}</div>", unsafe_allow_html=True)
1768
+
1769
+ # st.markdown(
1770
+ # "<div class='predictive-note'>"
1771
+ # "<strong>Filtered:</strong> Reported every month (100% coverage) with increasing trend. "
1772
+ # "<strong>Avg/Month</strong> = total ÷ months. "
1773
+ # "<span class='trend-rising'>High-Risk Rising</span> = slope > 0.2."
1774
+ # "</div>",
1775
+ # unsafe_allow_html=True
1776
+ # )
1777
+
1778
+ # else:
1779
+ # st.markdown(
1780
+ # "<div class='predictive-table-wrapper'>"
1781
+ # "<p style='text-align:center; color:#c62828; padding:24px; font-weight:500;'>"
1782
+ # "⚠️ No rising categories with 100% monthly coverage."
1783
+ # "</p>"
1784
+ # "<p style='text-align:center; color:#666; font-size:0.9em;'>"
1785
+ # "Consider relaxing coverage filter if data is sparse."
1786
+ # "</p></div>",
1787
+ # unsafe_allow_html=True
1788
+ # )
1789
+
1790
+ st.markdown("</div>", unsafe_allow_html=True)
1791
+
1792
+
1793
+ # =================== WHITEBOARD STYLE CHART FOR PANEL 4 ===================
1794
+ st.markdown("<h4 style='text-align: center; color: #2c3e50;'>Whiteboard Insight: Trend vs Frequency</h4>", unsafe_allow_html=True)
1795
+
1796
+ # Buat chart scatter dengan gaya whiteboard
1797
+ if not df_category.empty:
1798
  # Ambil data untuk scatter
1799
+ df_plot = df_category.copy()
1800
  df_plot['Size'] = df_plot['Avg/Month'] # Ukuran lingkaran = frekuensi (Avg/Month)
1801
  df_plot['Y'] = df_plot['Trend Slope'] # Y = Trend Slope
1802
 
 
1822
 
1823
  # Layout
1824
  fig.update_layout(
1825
+ title=dict(text="<b>Issue Category Trend vs Frequency (Non-Positive)</b>", x=0.5, y=0.95),
1826
  xaxis=dict(
1827
  title="Category",
1828
  tickangle=45,
 
1894
  )
1895
  st.markdown(insight_text, unsafe_allow_html=True)
1896
  else:
1897
+ st.info("No data available for non-positive issue categories with 100% coverage and positive trend.")
 
1898
 
1899
  st.markdown("<h3 class='section-title'>OBJECTIVE 7 - Insight and Recommendation</h3>", unsafe_allow_html=True)
1900