Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ import numpy as np
|
|
| 9 |
import networkx as nx
|
| 10 |
from lime.lime_text import LimeTextExplainer
|
| 11 |
import shap
|
|
|
|
| 12 |
|
| 13 |
# ----------------- Streamlit Page Config -----------------
|
| 14 |
st.set_page_config(
|
|
@@ -38,7 +39,6 @@ class UrduTextSummarizer:
|
|
| 38 |
'ہو','ہوا','ہوئی','ہوئے','بھی','تو','ہی','لیے','ساتھ','بعد','پہلے'}
|
| 39 |
|
| 40 |
def tokenize(self, sentence: str) -> List[str]:
|
| 41 |
-
# Handle both string and bytes inputs
|
| 42 |
if isinstance(sentence, bytes):
|
| 43 |
sentence = sentence.decode('utf-8', errors='ignore')
|
| 44 |
elif not isinstance(sentence, str):
|
|
@@ -178,12 +178,12 @@ class UrduTextSummarizer:
|
|
| 178 |
norm = np.linalg.norm(temp_tfidf[0])
|
| 179 |
scores.append([norm])
|
| 180 |
except Exception as e:
|
| 181 |
-
scores.append([0.0])
|
| 182 |
return np.array(scores)
|
| 183 |
|
| 184 |
# LIME Explainer
|
| 185 |
lime_explainer = LimeTextExplainer(class_names=["Score"], bow=False)
|
| 186 |
-
top_indices = [s['position'] - 1 for s in sorted_scores[:2]] #
|
| 187 |
for idx in top_indices:
|
| 188 |
try:
|
| 189 |
exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
|
|
@@ -227,7 +227,7 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
|
|
| 227 |
# --- Sentences Tab ---
|
| 228 |
with tabs[1]:
|
| 229 |
st.subheader("جملوں کی اہمیت")
|
| 230 |
-
sorted_sents = sorted(result['sentences'], key=lambda x:x['score'], reverse=True)
|
| 231 |
for s in sorted_sents:
|
| 232 |
bar_width = min(int(s['score']*100), 100)
|
| 233 |
st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
|
|
@@ -239,7 +239,7 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
|
|
| 239 |
st.dataframe(df_kw)
|
| 240 |
fig_bubble = px.scatter(df_kw, x='word', y='frequency', size='frequency', color='importance',
|
| 241 |
size_max=60, color_continuous_scale='Viridis', title='Keywords by Frequency & Importance')
|
| 242 |
-
st.plotly_chart(fig_bubble, use_container_width=True)
|
| 243 |
|
| 244 |
# --- Graphs Tab ---
|
| 245 |
with tabs[3]:
|
|
@@ -252,30 +252,30 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
|
|
| 252 |
colorscale='Viridis'
|
| 253 |
))
|
| 254 |
heatmap.update_layout(title="Sentence Score Intensity")
|
| 255 |
-
st.plotly_chart(heatmap, use_container_width=True)
|
| 256 |
|
| 257 |
word_lengths = [len(w) for w in re.sub(r'[۔،؟!؛:]', '', user_input).split()]
|
| 258 |
fig_words = px.histogram(word_lengths, nbins=20, title="الفاظ کی لمبائی کی تقسیم",
|
| 259 |
labels={'value':'الفاظ کی لمبائی','count':'تعداد'})
|
| 260 |
-
st.plotly_chart(fig_words, use_container_width=True)
|
| 261 |
|
| 262 |
# --- Explainability Tab ---
|
| 263 |
with tabs[4]:
|
| 264 |
st.subheader("LIME Explanations (Word Contributions)")
|
| 265 |
-
for exp in result['explanations']['lime']:
|
| 266 |
-
st.markdown(f"**جملہ:** {exp['sentence']}")
|
| 267 |
df_lime = pd.DataFrame(exp['exp'], columns=['Word', 'Contribution'])
|
| 268 |
-
fig_lime = px.bar(df_lime, x='Contribution', y='Word', orientation='h', title='LIME Feature Contributions')
|
| 269 |
-
st.plotly_chart(fig_lime)
|
| 270 |
|
| 271 |
st.subheader("SHAP Explanations (Shapley Values)")
|
| 272 |
-
for exp in result['explanations']['shap']:
|
| 273 |
-
st.markdown(f"**جملہ:** {exp['sentence']}")
|
| 274 |
if 'error' in exp:
|
| 275 |
st.error(f"SHAP failed: {exp['error']}")
|
| 276 |
else:
|
| 277 |
df_shap = pd.DataFrame({'Word': exp['sentence'].split()[:len(exp['shap_values'])], 'SHAP Value': exp['shap_values']})
|
| 278 |
-
fig_shap = px.bar(df_shap, x='SHAP Value', y='Word', orientation='h', title='SHAP Feature Contributions')
|
| 279 |
-
st.plotly_chart(fig_shap)
|
| 280 |
|
| 281 |
st.info(f"⏱ خلاصہ تیار کرنے میں وقت: {result['duration']:.2f} سیکنڈ")
|
|
|
|
| 9 |
import networkx as nx
|
| 10 |
from lime.lime_text import LimeTextExplainer
|
| 11 |
import shap
|
| 12 |
+
import uuid
|
| 13 |
|
| 14 |
# ----------------- Streamlit Page Config -----------------
|
| 15 |
st.set_page_config(
|
|
|
|
| 39 |
'ہو','ہوا','ہوئی','ہوئے','بھی','تو','ہی','لیے','ساتھ','بعد','پہلے'}
|
| 40 |
|
| 41 |
def tokenize(self, sentence: str) -> List[str]:
|
|
|
|
| 42 |
if isinstance(sentence, bytes):
|
| 43 |
sentence = sentence.decode('utf-8', errors='ignore')
|
| 44 |
elif not isinstance(sentence, str):
|
|
|
|
| 178 |
norm = np.linalg.norm(temp_tfidf[0])
|
| 179 |
scores.append([norm])
|
| 180 |
except Exception as e:
|
| 181 |
+
scores.append([0.0])
|
| 182 |
return np.array(scores)
|
| 183 |
|
| 184 |
# LIME Explainer
|
| 185 |
lime_explainer = LimeTextExplainer(class_names=["Score"], bow=False)
|
| 186 |
+
top_indices = [s['position'] - 1 for s in sorted_scores[:2]] # Top 2 sentences
|
| 187 |
for idx in top_indices:
|
| 188 |
try:
|
| 189 |
exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
|
|
|
|
| 227 |
# --- Sentences Tab ---
|
| 228 |
with tabs[1]:
|
| 229 |
st.subheader("جملوں کی اہمیت")
|
| 230 |
+
sorted_sents = sorted(result['sentences'], key=lambda x: x['score'], reverse=True)
|
| 231 |
for s in sorted_sents:
|
| 232 |
bar_width = min(int(s['score']*100), 100)
|
| 233 |
st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
|
|
|
|
| 239 |
st.dataframe(df_kw)
|
| 240 |
fig_bubble = px.scatter(df_kw, x='word', y='frequency', size='frequency', color='importance',
|
| 241 |
size_max=60, color_continuous_scale='Viridis', title='Keywords by Frequency & Importance')
|
| 242 |
+
st.plotly_chart(fig_bubble, use_container_width=True, key="keyword_bubble")
|
| 243 |
|
| 244 |
# --- Graphs Tab ---
|
| 245 |
with tabs[3]:
|
|
|
|
| 252 |
colorscale='Viridis'
|
| 253 |
))
|
| 254 |
heatmap.update_layout(title="Sentence Score Intensity")
|
| 255 |
+
st.plotly_chart(heatmap, use_container_width=True, key="sentence_heatmap")
|
| 256 |
|
| 257 |
word_lengths = [len(w) for w in re.sub(r'[۔،؟!؛:]', '', user_input).split()]
|
| 258 |
fig_words = px.histogram(word_lengths, nbins=20, title="الفاظ کی لمبائی کی تقسیم",
|
| 259 |
labels={'value':'الفاظ کی لمبائی','count':'تعداد'})
|
| 260 |
+
st.plotly_chart(fig_words, use_container_width=True, key="word_histogram")
|
| 261 |
|
| 262 |
# --- Explainability Tab ---
|
| 263 |
with tabs[4]:
|
| 264 |
st.subheader("LIME Explanations (Word Contributions)")
|
| 265 |
+
for i, exp in enumerate(result['explanations']['lime']):
|
| 266 |
+
st.markdown(f"**جملہ {i+1}:** {exp['sentence']}")
|
| 267 |
df_lime = pd.DataFrame(exp['exp'], columns=['Word', 'Contribution'])
|
| 268 |
+
fig_lime = px.bar(df_lime, x='Contribution', y='Word', orientation='h', title=f'LIME Feature Contributions (Sentence {i+1})')
|
| 269 |
+
st.plotly_chart(fig_lime, use_container_width=True, key=f"lime_bar_{i}")
|
| 270 |
|
| 271 |
st.subheader("SHAP Explanations (Shapley Values)")
|
| 272 |
+
for i, exp in enumerate(result['explanations']['shap']):
|
| 273 |
+
st.markdown(f"**جملہ {i+1}:** {exp['sentence']}")
|
| 274 |
if 'error' in exp:
|
| 275 |
st.error(f"SHAP failed: {exp['error']}")
|
| 276 |
else:
|
| 277 |
df_shap = pd.DataFrame({'Word': exp['sentence'].split()[:len(exp['shap_values'])], 'SHAP Value': exp['shap_values']})
|
| 278 |
+
fig_shap = px.bar(df_shap, x='SHAP Value', y='Word', orientation='h', title=f'SHAP Feature Contributions (Sentence {i+1})')
|
| 279 |
+
st.plotly_chart(fig_shap, use_container_width=True, key=f"shap_bar_{i}")
|
| 280 |
|
| 281 |
st.info(f"⏱ خلاصہ تیار کرنے میں وقت: {result['duration']:.2f} سیکنڈ")
|