SHELLAPANDIANGANHUNGING commited on
Commit
d6e30c9
·
verified ·
1 Parent(s): 6f69822

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -38
app.py CHANGED
@@ -1058,73 +1058,78 @@ with col_3d:
1058
 
1059
 
1060
  ####OBJECTIVE 4
 
1061
  try:
1062
  from wordcloud import WordCloud
1063
  import matplotlib.pyplot as plt
1064
  WORDCLOUD_AVAILABLE = True
1065
  except ImportError:
1066
  WORDCLOUD_AVAILABLE = False
1067
- # Tidak perlu warning di sini, karena akan ditampilkan di bawah jika digunakan
1068
 
1069
- st.markdown("<h3 class='section-title'>4. Global Text Insights (Word Clouds)</h3>", unsafe_allow_html=True)
 
1070
 
1071
  if WORDCLOUD_AVAILABLE:
1072
- # Filter data: hanya yang temuan_kategori != 'Positive'
1073
- df_filtered_kategori = df_local[df_local['temuan_kategori'] != 'Positive'] if 'temuan_kategori' in df_local.columns else df_local
 
1074
 
1075
  if df_filtered_kategori.empty:
1076
  st.warning("No data available after filtering out 'Positive' category.")
1077
  else:
 
1078
  col_wc1 = st.columns(1)
1079
 
1080
- # Fungsi untuk membuat dan menampilkan wordcloud
1081
  def generate_wordcloud(text_data, title, col):
1082
- # Periksa apakah text_data adalah Series kosong atau None
1083
  if text_data is None or text_data.empty:
1084
- col.warning(f"No data available in series for {title}.")
1085
  return
1086
- # Periksa apakah semua nilai adalah NaN
1087
  if text_data.isna().all():
1088
  col.warning(f"All data is NaN for {title}.")
1089
  return
1090
- # Gabung semua teks menjadi satu string
1091
- text = ' '.join(text_data.dropna().astype(str))
1092
- # Bersihkan teks dari karakter non-alfanumerik (opsional)
1093
  import re
 
1094
  text = re.sub(r'[^a-zA-Z\s]', ' ', text)
1095
- if text.strip(): # Pastikan teks tidak kosong setelah pembersihan
1096
- # Buat WordCloud
1097
- wordcloud = WordCloud(
1098
- width=100,
1099
- height=50,
1100
- background_color='white',
1101
- colormap='viridis',
1102
- max_words=1000,
1103
- relative_scaling=0.5,
1104
- random_state=42
1105
- ).generate(text)
1106
-
1107
- # Plot menggunakan matplotlib
1108
- fig, ax = plt.subplots(figsize=(3, 2))
1109
- ax.imshow(wordcloud, interpolation='bilinear')
1110
- ax.axis('off')
1111
- ax.set_title(title, fontsize=12)
1112
- plt.tight_layout()
1113
-
1114
- # Tampilkan di Streamlit
1115
- col.pyplot(fig, use_container_width=True)
1116
- else:
1117
- col.warning(f"No valid text data for {title} after cleaning.")
1118
 
1119
- # Kolom Temuan Nama (setelah filter)
1120
- with col_wc1[0]: # 🔥 Perbaikan: akses kolom dengan indeks [0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1121
  if 'keyword_kategori' in df_filtered_kategori.columns:
1122
- generate_wordcloud(df_filtered_kategori['keyword_kategori'], col_wc1[0])
 
 
1123
  else:
1124
- col_wc1[0].warning("Column 'temuan_nama' not available.")
1125
 
1126
  else:
1127
  st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
 
1128
  # =================== 5. Matrix (Tetap Dipertahankan) ===================
1129
  st.markdown("<h3 class='section-title'>OBJECTIVE 5 - Findings vs Lead Time: Which Companies Move Slow?</h3>", unsafe_allow_html=True)
1130
 
 
1058
 
1059
 
1060
  ####OBJECTIVE 4
1061
+ # ================= WORDCLOUD (HIGH QUALITY) ==================
1062
  try:
1063
  from wordcloud import WordCloud
1064
  import matplotlib.pyplot as plt
1065
  WORDCLOUD_AVAILABLE = True
1066
  except ImportError:
1067
  WORDCLOUD_AVAILABLE = False
 
1068
 
1069
+ st.markdown("<h3 class='section-title'>4. Global Text Insights (Word Clouds)</h3>",
1070
+ unsafe_allow_html=True)
1071
 
1072
  if WORDCLOUD_AVAILABLE:
1073
+
1074
+ df_filtered_kategori = df_local[df_local['temuan_kategori'] != 'Positive'] \
1075
+ if 'temuan_kategori' in df_local.columns else df_local
1076
 
1077
  if df_filtered_kategori.empty:
1078
  st.warning("No data available after filtering out 'Positive' category.")
1079
  else:
1080
+
1081
  col_wc1 = st.columns(1)
1082
 
1083
+ # === FUNCTION: GENERATE HIGH QUALITY WORDCLOUD ===
1084
  def generate_wordcloud(text_data, title, col):
1085
+
1086
  if text_data is None or text_data.empty:
1087
+ col.warning(f"No data available for {title}.")
1088
  return
1089
+
1090
  if text_data.isna().all():
1091
  col.warning(f"All data is NaN for {title}.")
1092
  return
1093
+
1094
+ # Gabungkan text
 
1095
  import re
1096
+ text = ' '.join(text_data.dropna().astype(str))
1097
  text = re.sub(r'[^a-zA-Z\s]', ' ', text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1098
 
1099
+ if not text.strip():
1100
+ col.warning(f"No valid text remaining for {title}.")
1101
+ return
1102
+
1103
+ # === HIGH RESOLUTION WORDCLOUD ===
1104
+ wordcloud = WordCloud(
1105
+ width=1600, # resolusi besar
1106
+ height=800,
1107
+ background_color='white',
1108
+ colormap='viridis',
1109
+ max_words=1000,
1110
+ random_state=42
1111
+ ).generate(text)
1112
+
1113
+ fig, ax = plt.subplots(figsize=(5, 3), dpi=200) # tajam
1114
+ ax.imshow(wordcloud, interpolation='bilinear')
1115
+ ax.axis('off')
1116
+ ax.set_title(title, fontsize=16)
1117
+ plt.tight_layout()
1118
+
1119
+ col.pyplot(fig, use_container_width=True)
1120
+
1121
+ # === CALL WORDCLOUD ===
1122
+ with col_wc1[0]:
1123
  if 'keyword_kategori' in df_filtered_kategori.columns:
1124
+ generate_wordcloud(df_filtered_kategori['keyword_kategori'],
1125
+ "WordCloud Temuan",
1126
+ col_wc1[0])
1127
  else:
1128
+ col_wc1[0].warning("Column 'keyword_kategori' not available.")
1129
 
1130
  else:
1131
  st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
1132
+
1133
  # =================== 5. Matrix (Tetap Dipertahankan) ===================
1134
  st.markdown("<h3 class='section-title'>OBJECTIVE 5 - Findings vs Lead Time: Which Companies Move Slow?</h3>", unsafe_allow_html=True)
1135