SHELLAPANDIANGANHUNGING commited on
Commit
91c6c39
·
verified ·
1 Parent(s): 52d12ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -122
app.py CHANGED
@@ -375,8 +375,6 @@ st.markdown(
375
  )
376
 
377
  df_local = df_filtered.copy()
378
-
379
- # Tambah kolom bulan
380
  df_local['created_month'] = df_local['created_at'].dt.to_period('M')
381
 
382
  if 'temuan_kode_distrik' in df_local.columns:
@@ -386,146 +384,130 @@ if 'temuan_kode_distrik' in df_local.columns:
386
  else 'Other'
387
  )
388
 
389
- # Otomatis bagi dataset berdasarkan Area_Type
390
  df_pg = df_local[df_local['Area_Type'] == 'PG'].copy()
391
  df_um = df_local[df_local['Area_Type'] == 'UM'].copy()
392
 
393
- # --- Fungsi untuk menghitung rasio perusahaan ---
394
  def calculate_avg_ratio_per_company(df_area):
395
  if df_area.empty:
396
  return pd.DataFrame()
397
- # Hitung temuan per bulan per perusahaan
398
- findings_by_company_month = df_area.groupby(['created_month', 'nama_perusahaan']).size().reset_index(name='findings_count')
399
- # Hitung jumlah orang unik per bulan per perusahaan
400
- creators_by_company_month = df_area.groupby(['created_month', 'nama_perusahaan'])['creator_nid'].nunique().reset_index(name='unique_creators')
401
- # Gabung
402
- merged = findings_by_company_month.merge(creators_by_company_month, on=['created_month', 'nama_perusahaan'], how='outer')
403
  merged = merged.fillna({'findings_count': 0, 'unique_creators': 0})
404
- # Filter untuk menghindari pembagian dengan nol
405
  merged = merged[merged['unique_creators'] > 0]
406
- # Hitung rasio (ignore NaN)
407
  merged['ratio'] = merged['findings_count'] / merged['unique_creators']
408
  merged['ratio'] = merged['ratio'].replace([np.inf, -np.inf], np.nan)
409
 
410
- # Jika tidak ada baris valid setelah filter, kembalikan DataFrame kosong
411
- if merged.empty:
412
- return pd.DataFrame()
413
-
414
- # Rata-rata bulanan per perusahaan
415
  avg_ratio = merged.groupby('nama_perusahaan')['ratio'].mean().reset_index(name='avg_monthly_ratio')
 
416
 
417
- # Jika hasil akhirnya hanya NaN, kembalikan DataFrame kosong
418
- if avg_ratio['avg_monthly_ratio'].isna().all():
419
- return pd.DataFrame()
420
-
421
- return avg_ratio
422
-
423
- # Hitung untuk masing-masing area
424
  avg_ratio_pg = calculate_avg_ratio_per_company(df_pg)
425
  avg_ratio_um = calculate_avg_ratio_per_company(df_um)
426
 
427
- # Palet biru pastel konsisten (soft, harmonis)
428
- pastel_blues = [
429
- "#A8DADC", # light cyan
430
- "#E2ECE9", # pale mint
431
- "#CCE4E7", # soft sky
432
- "#B5D9D9", # muted aqua
433
- "#98C8D1", # gentle teal
434
- "#7FB9C1", # calm blue
435
- "#6BA9B3" # deep pastel teal
436
- ]
437
- pln_color = "#FFD700" # Kuning PLN
438
-
439
- def assign_colors(df):
440
- colors = []
441
- blue_idx = 0
442
- for company in df['nama_perusahaan']:
443
- if 'PLN' in str(company).upper():
444
- colors.append(pln_color)
445
- else:
446
- colors.append(pastel_blues[blue_idx % len(pastel_blues)])
447
- blue_idx += 1
448
- return colors
449
 
450
- # Fungsi untuk membuat polar bar chart
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
  def create_polar_bar_chart(df, area_name):
452
  if df.empty:
453
  return None
454
 
455
- # Urutkan berdasarkan rasio untuk tampilan konsisten
456
- df = df.sort_values('avg_monthly_ratio', ascending=True)
457
  companies = df['nama_perusahaan'].tolist()
458
  ratios = df['avg_monthly_ratio'].tolist()
459
- colors = assign_colors(df)
460
-
461
- # Hitung total temuan untuk menghitung sudut (proporsi)
462
  total_findings = df_local[df_local['Area_Type'] == area_name].groupby('nama_perusahaan').size()
463
- angles = []
464
- total_angle = 0
465
- for comp in companies:
466
- count = total_findings.get(comp, 0)
467
- angle = (count / total_findings.sum()) * 360 if total_findings.sum() > 0 else 0
468
- angles.append(angle)
469
-
470
- # Hitung sudut tengah untuk setiap bar
471
  mid_angles = []
472
- current_angle = 0
473
  for a in angles:
474
- mid_angles.append(current_angle + a / 2)
475
- current_angle += a
476
-
477
  fig = go.Figure()
478
-
479
- # 🔥 Tambahkan satu trace untuk setiap perusahaan agar muncul di legend
480
- for i, (comp, ratio, color, angle) in enumerate(zip(companies, ratios, colors, angles)):
481
- fig.add_trace(go.Barpolar(
482
- r=[ratio],
483
- theta=[mid_angles[i]],
484
- width=[angle],
485
- marker_color=[color],
486
- marker_line_color="white",
487
- marker_line_width=1.2,
488
- opacity=0.9,
489
- hovertemplate="<b>%{text}</b><br>Avg Ratio: %{r:.2f}<extra></extra>", # 🔥 Hapus [0]
490
- text=[comp],
491
- name=comp,
 
 
 
 
 
 
 
 
 
 
492
  showlegend=True
493
  ))
494
 
495
  fig.update_layout(
496
  title=f'{area_name} Area',
497
  polar=dict(
498
- bgcolor="white",
499
  radialaxis=dict(
500
  visible=True,
501
- tickfont=dict(size=9, color="gray"), # 🔥 Warna angka radial jadi putih
502
  gridcolor='lightgray',
503
- title=dict(text='Avg Finding/Person', font=dict(size=10, color="white")) # 🔥 Warna judul radial
504
  ),
505
  angularaxis=dict(
506
  visible=True,
507
  direction='clockwise',
508
- tickfont=dict(size=9, color="white"), # 🔥 Warna angka derajat jadi putih
509
  showline=False,
510
- linecolor="lightgray", # 🔥 Warna garis derajat jadi putih
511
- gridcolor="rgba(255,255,255,0.2)" # 🔥 Warna grid derajat jadi transparan putih
512
  ),
513
  ),
514
- showlegend=True, # 🔥 Aktifkan legend
515
  legend=dict(
516
  orientation="v",
517
- yanchor="top", # 🔥 Atas
518
  y=1,
519
- xanchor="right", # 🔥 Kanan
520
  x=1.02,
521
  font=dict(size=10)
522
  ),
523
- margin=dict(t=40, b=20, l=20, r=20),
524
- height=400,
525
- paper_bgcolor="rgba(0,0,0,0)",
526
- plot_bgcolor="rgba(0,0,0,0)"
527
  )
528
-
529
  return fig
530
 
531
  # Plot
@@ -536,54 +518,43 @@ if 'temuan_kode_distrik' in df_local.columns:
536
  fig_pg = create_polar_bar_chart(avg_ratio_pg, 'PG')
537
  if fig_pg:
538
  st.plotly_chart(fig_pg, use_container_width=True)
539
-
540
- # AI Insight untuk PG
541
  if not avg_ratio_pg.empty:
542
- top_company_pg = avg_ratio_pg.loc[avg_ratio_pg['avg_monthly_ratio'].idxmax()]
543
- low_company_pg = avg_ratio_pg.loc[avg_ratio_pg['avg_monthly_ratio'].idxmin()]
544
-
545
  st.markdown("### Insight")
546
- insight_text = (
547
  f"<div class='ai-insight'>"
548
- f"In PG Area, <strong>{top_company_pg['nama_perusahaan']}</strong> has the highest average finding-to-person ratio "
549
- f"(<strong>{top_company_pg['avg_monthly_ratio']:.2f}</strong>), indicating potentially high exposure or active reporting. "
550
- f"Consider reviewing their operational procedures. "
551
- f"Conversely, <strong>{low_company_pg['nama_perusahaan']}</strong> has the lowest ratio "
552
- f"(<strong>{low_company_pg['avg_monthly_ratio']:.2f}</strong>), suggesting the need to actively improve reporting frequency."
553
- f"</div>"
554
  )
555
- st.markdown(insight_text, unsafe_allow_html=True)
556
  else:
557
- st.warning("No data for PG area or all ratios are NaN.")
558
 
559
  with col2:
560
  st.markdown("<h5>Unit Maintenance: Avg Monthly Finding by Company</h5>", unsafe_allow_html=True)
561
  fig_um = create_polar_bar_chart(avg_ratio_um, 'UM')
562
  if fig_um:
563
  st.plotly_chart(fig_um, use_container_width=True)
564
-
565
- # AI Insight untuk UM
566
  if not avg_ratio_um.empty:
567
- top_company_um = avg_ratio_um.loc[avg_ratio_um['avg_monthly_ratio'].idxmax()]
568
- low_company_um = avg_ratio_um.loc[avg_ratio_um['avg_monthly_ratio'].idxmin()]
569
-
570
  st.markdown("### Insight")
571
- insight_text = (
572
  f"<div class='ai-insight'>"
573
- f"In UM Area, all companies show almost the same average finding per person ratio"
574
- # f"(<strong>{top_company_um['avg_monthly_ratio']:.2f}</strong>), warranting a focused safety audit. "
575
- # f"<strong>{low_company_um['nama_perusahaan']}</strong> shows the lowest ratio "
576
- # f"(<strong>{low_company_um['avg_monthly_ratio']:.2f}</strong>), which could reflect strong safety practices or requires verification of reporting completeness."
577
- f"</div>"
578
  )
579
- st.markdown(insight_text, unsafe_allow_html=True)
580
  else:
581
- st.warning("No data for UM area or all ratios are NaN.")
582
  else:
583
- st.error("Column 'temuan_kode_distrik' not found in the data. Cannot determine PG/UM areas.")
584
  st.stop()
585
-
586
-
587
  # =================== OBJECTIVE 2 — Active vs Inactive Locations (Treemap with Color Gradient) ===================
588
  st.markdown(
589
  """
 
375
  )
376
 
377
  df_local = df_filtered.copy()
 
 
378
  df_local['created_month'] = df_local['created_at'].dt.to_period('M')
379
 
380
  if 'temuan_kode_distrik' in df_local.columns:
 
384
  else 'Other'
385
  )
386
 
 
387
  df_pg = df_local[df_local['Area_Type'] == 'PG'].copy()
388
  df_um = df_local[df_local['Area_Type'] == 'UM'].copy()
389
 
390
+ # --- Hitung rasio per perusahaan ---
391
  def calculate_avg_ratio_per_company(df_area):
392
  if df_area.empty:
393
  return pd.DataFrame()
394
+ findings = df_area.groupby(['created_month', 'nama_perusahaan']).size().reset_index(name='findings_count')
395
+ creators = df_area.groupby(['created_month', 'nama_perusahaan'])['creator_nid'].nunique().reset_index(name='unique_creators')
396
+ merged = findings.merge(creators, on=['created_month', 'nama_perusahaan'], how='outer')
 
 
 
397
  merged = merged.fillna({'findings_count': 0, 'unique_creators': 0})
 
398
  merged = merged[merged['unique_creators'] > 0]
 
399
  merged['ratio'] = merged['findings_count'] / merged['unique_creators']
400
  merged['ratio'] = merged['ratio'].replace([np.inf, -np.inf], np.nan)
401
 
 
 
 
 
 
402
  avg_ratio = merged.groupby('nama_perusahaan')['ratio'].mean().reset_index(name='avg_monthly_ratio')
403
+ return avg_ratio.dropna(subset=['avg_monthly_ratio'])
404
 
 
 
 
 
 
 
 
405
  avg_ratio_pg = calculate_avg_ratio_per_company(df_pg)
406
  avg_ratio_um = calculate_avg_ratio_per_company(df_um)
407
 
408
+ # 🔑 PALET & COLOR MAPPING GLOBAL — KONSISTEN ANTAR CHART
409
+ PLN_COLOR = "#FFD700"
410
+ PASTEL_BLUES = [
411
+ "#A8DADC", "#E2ECE9", "#CCE4E7", "#B5D9D9",
412
+ "#98C8D1", "#7FB9C1", "#6BA9B3", "#5A9CB5", "#4A8FA7", "#3A8399"
413
+ ] # diperpanjang sedikit untuk antisipasi banyak kontraktor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
 
415
+ # 🔥 Dapatkan daftar *semua* perusahaan non-PLN unik (dari seluruh data), urut alfabetis → assign warna deterministik
416
+ all_companies = pd.concat([avg_ratio_pg, avg_ratio_um])['nama_perusahaan'].dropna().unique()
417
+ non_pln_companies = sorted([c for c in all_companies if 'PLN' not in str(c).upper()])
418
+
419
+ # Mapping: company → color (PLN khusus, lainnya dari palet biru berurut)
420
+ COMPANY_COLOR_MAP = {}
421
+ for i, company in enumerate(non_pln_companies):
422
+ COMPANY_COLOR_MAP[company] = PASTEL_BLUES[i % len(PASTEL_BLUES)]
423
+ # Pastikan PLN juga masuk mapping (jika muncul)
424
+ for company in all_companies:
425
+ if 'PLN' in str(company).upper():
426
+ COMPANY_COLOR_MAP[company] = PLN_COLOR
427
+
428
+ # 🔥 Fungsi helper: dapatkan warna berdasarkan nama perusahaan (konsisten!)
429
+ def get_color(company_name):
430
+ return COMPANY_COLOR_MAP.get(company_name, "#CCCCCC") # fallback abu-abu
431
+
432
+ # 🔥 Fungsi chart — pakai mapping global
433
  def create_polar_bar_chart(df, area_name):
434
  if df.empty:
435
  return None
436
 
437
+ # Urutkan untuk stabilitas visual (misal: ascending ratio)
438
+ df = df.sort_values('avg_monthly_ratio', ascending=True).reset_index(drop=True)
439
  companies = df['nama_perusahaan'].tolist()
440
  ratios = df['avg_monthly_ratio'].tolist()
441
+ colors = [get_color(comp) for comp in companies]
442
+
443
+ # Hitung proporsi temuan (untuk lebar bar)
444
  total_findings = df_local[df_local['Area_Type'] == area_name].groupby('nama_perusahaan').size()
445
+ angles = [total_findings.get(comp, 0) / total_findings.sum() * 360 if total_findings.sum() > 0 else 0 for comp in companies]
446
+
447
+ # Hitung posisi tengah
 
 
 
 
 
448
  mid_angles = []
449
+ current = 0
450
  for a in angles:
451
+ mid_angles.append(current + a / 2)
452
+ current += a
453
+
454
  fig = go.Figure()
455
+
456
+ # Satu trace saja lebih clean, legend bisa diatur manual jika perlu
457
+ fig.add_trace(go.Barpolar(
458
+ r=ratios,
459
+ theta=mid_angles,
460
+ width=angles,
461
+ marker_color=colors,
462
+ marker_line_color="white",
463
+ marker_line_width=1.2,
464
+ opacity=0.9,
465
+ hovertemplate="<b>%{text}</b><br>Avg Ratio: %{r:.2f}<extra></extra>",
466
+ text=companies,
467
+ showlegend=False # Kita akan buat legend manual yang rapi
468
+ ))
469
+
470
+ # 🔥 LEGEND MANUAL — hanya tampilkan setiap perusahaan sekali, dengan warna konsisten
471
+ # Tambahkan satu scatter "dummy" per perusahaan untuk legend
472
+ for company in sorted(set(companies)):
473
+ color = get_color(company)
474
+ fig.add_trace(go.Scatterpolar(
475
+ r=[None], theta=[None], # invisible
476
+ mode='markers',
477
+ marker=dict(color=color, size=10),
478
+ name=company,
479
  showlegend=True
480
  ))
481
 
482
  fig.update_layout(
483
  title=f'{area_name} Area',
484
  polar=dict(
 
485
  radialaxis=dict(
486
  visible=True,
487
+ tickfont=dict(size=9, color="gray"),
488
  gridcolor='lightgray',
489
+ title=dict(text='Avg Finding/Person', font=dict(size=10, color="gray"))
490
  ),
491
  angularaxis=dict(
492
  visible=True,
493
  direction='clockwise',
494
+ tickfont=dict(size=9, color="gray"),
495
  showline=False,
496
+ gridcolor="lightgray"
 
497
  ),
498
  ),
499
+ showlegend=True,
500
  legend=dict(
501
  orientation="v",
502
+ yanchor="top",
503
  y=1,
504
+ xanchor="right",
505
  x=1.02,
506
  font=dict(size=10)
507
  ),
508
+ height=450,
509
+ margin=dict(t=40, b=20, l=20, r=40)
 
 
510
  )
 
511
  return fig
512
 
513
  # Plot
 
518
  fig_pg = create_polar_bar_chart(avg_ratio_pg, 'PG')
519
  if fig_pg:
520
  st.plotly_chart(fig_pg, use_container_width=True)
 
 
521
  if not avg_ratio_pg.empty:
522
+ top = avg_ratio_pg.loc[avg_ratio_pg['avg_monthly_ratio'].idxmax()]
523
+ low = avg_ratio_pg.loc[avg_ratio_pg['avg_monthly_ratio'].idxmin()]
 
524
  st.markdown("### Insight")
525
+ st.markdown(
526
  f"<div class='ai-insight'>"
527
+ f"In PG Area, <strong>{top['nama_perusahaan']}</strong> has the highest ratio (<strong>{top['avg_monthly_ratio']:.2f}</strong>), "
528
+ f"while <strong>{low['nama_perusahaan']}</strong> has the lowest (<strong>{low['avg_monthly_ratio']:.2f}</strong>). "
529
+ f"Consider cross-learning between them to standardize reporting culture."
530
+ f"</div>",
531
+ unsafe_allow_html=True
 
532
  )
 
533
  else:
534
+ st.warning("No data for PG area.")
535
 
536
  with col2:
537
  st.markdown("<h5>Unit Maintenance: Avg Monthly Finding by Company</h5>", unsafe_allow_html=True)
538
  fig_um = create_polar_bar_chart(avg_ratio_um, 'UM')
539
  if fig_um:
540
  st.plotly_chart(fig_um, use_container_width=True)
 
 
541
  if not avg_ratio_um.empty:
542
+ top = avg_ratio_um.loc[avg_ratio_um['avg_monthly_ratio'].idxmax()]
543
+ low = avg_ratio_um.loc[avg_ratio_um['avg_monthly_ratio'].idxmin()]
 
544
  st.markdown("### Insight")
545
+ st.markdown(
546
  f"<div class='ai-insight'>"
547
+ f"In UM Area, <strong>{top['nama_perusahaan']}</strong> leads with ratio <strong>{top['avg_monthly_ratio']:.2f}</strong>, "
548
+ f"and <strong>{low['nama_perusahaan']}</strong> reports least (<strong>{low['avg_monthly_ratio']:.2f}</strong>). "
549
+ f"Check if low reporters have sufficient access to reporting tools or training."
550
+ f"</div>",
551
+ unsafe_allow_html=True
552
  )
 
553
  else:
554
+ st.warning("No data for UM area.")
555
  else:
556
+ st.error("Column 'temuan_kode_distrik' not found.")
557
  st.stop()
 
 
558
  # =================== OBJECTIVE 2 — Active vs Inactive Locations (Treemap with Color Gradient) ===================
559
  st.markdown(
560
  """