Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1050,11 +1050,10 @@ with col_3d:
|
|
| 1050 |
|
| 1051 |
####OBJECTIVE 4
|
| 1052 |
# ================= WORDCLOUD (HIGH QUALITY) ==================
|
| 1053 |
-
# ================= WORDCLOUD (HIGH QUALITY) ==================
|
| 1054 |
-
# ================= WORDCLOUD (HIGH QUALITY) ==================
|
| 1055 |
try:
|
| 1056 |
from wordcloud import WordCloud
|
| 1057 |
import matplotlib.pyplot as plt
|
|
|
|
| 1058 |
WORDCLOUD_AVAILABLE = True
|
| 1059 |
except ImportError:
|
| 1060 |
WORDCLOUD_AVAILABLE = False
|
|
@@ -1064,60 +1063,71 @@ st.markdown("<h3 class='section-title'>OBJECTIVE 4 - What Unsafe Issues Appear M
|
|
| 1064 |
|
| 1065 |
if WORDCLOUD_AVAILABLE:
|
| 1066 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1067 |
df_filtered_kategori = df_local[df_local['temuan_kategori'] != 'Positive'] \
|
| 1068 |
if 'temuan_kategori' in df_local.columns else df_local
|
| 1069 |
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
else:
|
| 1073 |
-
|
| 1074 |
-
col_wc1 = st.columns(1)
|
| 1075 |
-
|
| 1076 |
-
# === FUNCTION: GENERATE HIGH QUALITY WORDCLOUD ===
|
| 1077 |
-
def generate_wordcloud(text_data, col):
|
| 1078 |
-
|
| 1079 |
-
if text_data is None or text_data.empty:
|
| 1080 |
-
col.warning("No data available for WordCloud.")
|
| 1081 |
-
return
|
| 1082 |
-
|
| 1083 |
-
if text_data.isna().all():
|
| 1084 |
-
col.warning("All data is NaN.")
|
| 1085 |
-
return
|
| 1086 |
-
|
| 1087 |
-
# Gabungkan text
|
| 1088 |
-
import re
|
| 1089 |
-
text = ' '.join(text_data.dropna().astype(str))
|
| 1090 |
-
text = re.sub(r'[^a-zA-Z\s]', ' ', text)
|
| 1091 |
-
|
| 1092 |
-
if not text.strip():
|
| 1093 |
-
col.warning("No valid text remaining after cleaning.")
|
| 1094 |
-
return
|
| 1095 |
-
|
| 1096 |
-
# === HIGH RESOLUTION WORDCLOUD ===
|
| 1097 |
-
wordcloud = WordCloud(
|
| 1098 |
-
width=1600, # resolusi besar agar HD
|
| 1099 |
-
height=800,
|
| 1100 |
-
background_color='white',
|
| 1101 |
-
colormap='viridis',
|
| 1102 |
-
max_words=1000,
|
| 1103 |
-
random_state=42
|
| 1104 |
-
).generate(text)
|
| 1105 |
-
|
| 1106 |
-
# === SMALL BUT SHARP OUTPUT ===
|
| 1107 |
-
fig, ax = plt.subplots(figsize=(3, 2), dpi=200)
|
| 1108 |
-
ax.imshow(wordcloud, interpolation='bilinear')
|
| 1109 |
-
ax.axis('off')
|
| 1110 |
-
plt.tight_layout()
|
| 1111 |
|
| 1112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1113 |
|
| 1114 |
-
|
| 1115 |
-
|
|
|
|
|
|
|
|
|
|
| 1116 |
if 'keyword_kategori' in df_filtered_kategori.columns:
|
| 1117 |
-
|
| 1118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1119 |
else:
|
| 1120 |
-
|
| 1121 |
|
| 1122 |
else:
|
| 1123 |
st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
|
|
|
|
| 1050 |
|
| 1051 |
####OBJECTIVE 4
|
| 1052 |
# ================= WORDCLOUD (HIGH QUALITY) ==================
|
|
|
|
|
|
|
| 1053 |
try:
|
| 1054 |
from wordcloud import WordCloud
|
| 1055 |
import matplotlib.pyplot as plt
|
| 1056 |
+
import plotly.express as px
|
| 1057 |
WORDCLOUD_AVAILABLE = True
|
| 1058 |
except ImportError:
|
| 1059 |
WORDCLOUD_AVAILABLE = False
|
|
|
|
| 1063 |
|
| 1064 |
if WORDCLOUD_AVAILABLE:
|
| 1065 |
|
| 1066 |
+
# 🔥 Pie Chart: Semua data (tidak difilter)
|
| 1067 |
+
df_all_kategori = df_local.copy()
|
| 1068 |
+
|
| 1069 |
+
# 🔥 WordCloud: Hanya data Non-Positive
|
| 1070 |
df_filtered_kategori = df_local[df_local['temuan_kategori'] != 'Positive'] \
|
| 1071 |
if 'temuan_kategori' in df_local.columns else df_local
|
| 1072 |
|
| 1073 |
+
# 2 Kolom
|
| 1074 |
+
col1, col2 = st.columns(2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1075 |
|
| 1076 |
+
# === PIE CHART: Semua temuan_kategori ===
|
| 1077 |
+
with col1:
|
| 1078 |
+
if 'temuan_kategori' in df_all_kategori.columns:
|
| 1079 |
+
# Hitung jumlah per kategori
|
| 1080 |
+
category_counts = df_all_kategori['temuan_kategori'].value_counts()
|
| 1081 |
+
|
| 1082 |
+
if not category_counts.empty:
|
| 1083 |
+
# Buat pie chart
|
| 1084 |
+
fig_pie = px.pie(
|
| 1085 |
+
names=category_counts.index,
|
| 1086 |
+
values=category_counts.values,
|
| 1087 |
+
title="Distribution of All Issue Categories",
|
| 1088 |
+
color_discrete_sequence=px.colors.sequential.Viridis
|
| 1089 |
+
)
|
| 1090 |
+
fig_pie.update_traces(textposition='inside', textinfo='percent+label')
|
| 1091 |
+
fig_pie.update_layout(height=500)
|
| 1092 |
+
st.plotly_chart(fig_pie, use_container_width=True)
|
| 1093 |
+
else:
|
| 1094 |
+
st.warning("No data available for pie chart.")
|
| 1095 |
+
else:
|
| 1096 |
+
st.warning("Column 'temuan_kategori' not available.")
|
| 1097 |
|
| 1098 |
+
# === WORDCLOUD: Hanya Non-Positive (dari keyword_kategori) ===
|
| 1099 |
+
with col2:
|
| 1100 |
+
if df_filtered_kategori.empty:
|
| 1101 |
+
st.warning("No data available after filtering out 'Positive' category.")
|
| 1102 |
+
else:
|
| 1103 |
if 'keyword_kategori' in df_filtered_kategori.columns:
|
| 1104 |
+
# Gabungkan text
|
| 1105 |
+
import re
|
| 1106 |
+
text = ' '.join(df_filtered_kategori['keyword_kategori'].dropna().astype(str))
|
| 1107 |
+
text = re.sub(r'[^a-zA-Z\s]', ' ', text)
|
| 1108 |
+
|
| 1109 |
+
if text.strip():
|
| 1110 |
+
# Buat WordCloud
|
| 1111 |
+
wordcloud = WordCloud(
|
| 1112 |
+
width=1600, # resolusi besar agar HD
|
| 1113 |
+
height=800,
|
| 1114 |
+
background_color='white',
|
| 1115 |
+
colormap='viridis',
|
| 1116 |
+
max_words=1000,
|
| 1117 |
+
random_state=42
|
| 1118 |
+
).generate(text)
|
| 1119 |
+
|
| 1120 |
+
# Output kecil tapi tajam
|
| 1121 |
+
fig, ax = plt.subplots(figsize=(3, 2), dpi=200)
|
| 1122 |
+
ax.imshow(wordcloud, interpolation='bilinear')
|
| 1123 |
+
ax.axis('off')
|
| 1124 |
+
plt.tight_layout()
|
| 1125 |
+
|
| 1126 |
+
st.pyplot(fig, use_container_width=True)
|
| 1127 |
+
else:
|
| 1128 |
+
st.warning("No valid text remaining after cleaning.")
|
| 1129 |
else:
|
| 1130 |
+
st.warning("Column 'keyword_kategori' not available.")
|
| 1131 |
|
| 1132 |
else:
|
| 1133 |
st.info("WordCloud library not installed. Install `wordcloud` and `matplotlib` to enable this feature.")
|