Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,12 +4,10 @@ import time
|
|
| 4 |
from typing import List, Dict
|
| 5 |
import pandas as pd
|
| 6 |
import plotly.express as px
|
| 7 |
-
import plotly.graph_objects as go
|
| 8 |
import numpy as np
|
| 9 |
import networkx as nx
|
| 10 |
from lime.lime_text import LimeTextExplainer
|
| 11 |
import shap
|
| 12 |
-
import uuid
|
| 13 |
|
| 14 |
# ----------------- Streamlit Page Config -----------------
|
| 15 |
st.set_page_config(
|
|
@@ -28,6 +26,7 @@ st.markdown("""
|
|
| 28 |
.summary-box { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin: 10px 0; }
|
| 29 |
.sentence-bar { height: 20px; border-radius: 5px; margin-bottom:5px; }
|
| 30 |
.sentence-text { font-size: 16px; margin-bottom:3px; }
|
|
|
|
| 31 |
</style>
|
| 32 |
""", unsafe_allow_html=True)
|
| 33 |
|
|
@@ -35,8 +34,8 @@ st.markdown("""
|
|
| 35 |
class UrduTextSummarizer:
|
| 36 |
def __init__(self):
|
| 37 |
self.urdu_stop_words = {'اور','کا','کی','کے','میں','سے','کو','نے','ہے','ہیں','تھا','تھی','تھے',
|
| 38 |
-
|
| 39 |
-
|
| 40 |
|
| 41 |
def tokenize(self, sentence: str) -> List[str]:
|
| 42 |
if isinstance(sentence, bytes):
|
|
@@ -163,7 +162,7 @@ class UrduTextSummarizer:
|
|
| 163 |
try:
|
| 164 |
words = self.tokenize(t)
|
| 165 |
if not words:
|
| 166 |
-
scores.append([0.0])
|
| 167 |
continue
|
| 168 |
temp_sent_words = [words]
|
| 169 |
temp_all_words = list(set(words))
|
|
@@ -176,27 +175,28 @@ class UrduTextSummarizer:
|
|
| 176 |
temp_idf = np.log(1 / (1 + temp_df))
|
| 177 |
temp_tfidf = temp_tf * temp_idf
|
| 178 |
norm = np.linalg.norm(temp_tfidf[0])
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
|
|
|
| 182 |
return np.array(scores)
|
| 183 |
|
| 184 |
# LIME Explainer
|
| 185 |
-
lime_explainer = LimeTextExplainer(class_names=["Score"], bow=False)
|
| 186 |
-
top_indices = [s['position'] - 1 for s in sorted_scores[:2]]
|
| 187 |
for idx in top_indices:
|
| 188 |
try:
|
| 189 |
exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
|
| 190 |
-
explanations['lime'].append({'sentence': sentences[idx], 'exp': exp.as_list()})
|
| 191 |
except Exception as e:
|
| 192 |
explanations['lime'].append({'sentence': sentences[idx], 'exp': [('Error', f'LIME failed: {str(e)}')]})
|
| 193 |
|
| 194 |
# SHAP Explainer
|
| 195 |
-
|
| 196 |
-
shap_explainer = shap.KernelExplainer(
|
| 197 |
for idx in top_indices:
|
| 198 |
try:
|
| 199 |
-
shap_values = shap_explainer.shap_values(sentences[idx], nsamples=100)
|
| 200 |
explanations['shap'].append({'sentence': sentences[idx], 'shap_values': shap_values})
|
| 201 |
except Exception as e:
|
| 202 |
explanations['shap'].append({'sentence': sentences[idx], 'shap_values': [0.0], 'error': str(e)})
|
|
@@ -231,7 +231,8 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
|
|
| 231 |
for s in sorted_sents:
|
| 232 |
bar_width = min(int(s['score']*100), 100)
|
| 233 |
st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
|
| 234 |
-
f"<div class='sentence-bar' style='width:{bar_width}%; background-color:#667eea'></div>"
|
|
|
|
| 235 |
|
| 236 |
# --- Keywords Tab ---
|
| 237 |
with tabs[2]:
|
|
@@ -243,22 +244,8 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
|
|
| 243 |
|
| 244 |
# --- Graphs Tab ---
|
| 245 |
with tabs[3]:
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
heatmap = go.Figure(data=go.Heatmap(
|
| 249 |
-
z=[df_sent['score'].tolist()],
|
| 250 |
-
x=[f"جملہ {i}" for i in df_sent['position']],
|
| 251 |
-
y=["Score"],
|
| 252 |
-
colorscale='Viridis'
|
| 253 |
-
))
|
| 254 |
-
heatmap.update_layout(title="Sentence Score Intensity")
|
| 255 |
-
st.plotly_chart(heatmap, use_container_width=True, key="sentence_heatmap")
|
| 256 |
-
|
| 257 |
-
word_lengths = [len(w) for w in re.sub(r'[۔،؟!؛:]', '', user_input).split()]
|
| 258 |
-
fig_words = px.histogram(word_lengths, nbins=20, title="الفاظ کی لمبائی کی تقسیم",
|
| 259 |
-
labels={'value':'الفاظ کی لمبائی','count':'تعداد'})
|
| 260 |
-
st.plotly_chart(fig_words, use_container_width=True, key="word_histogram")
|
| 261 |
-
|
| 262 |
# --- Explainability Tab ---
|
| 263 |
with tabs[4]:
|
| 264 |
st.subheader("LIME Explanations (Word Contributions)")
|
|
|
|
| 4 |
from typing import List, Dict
|
| 5 |
import pandas as pd
|
| 6 |
import plotly.express as px
|
|
|
|
| 7 |
import numpy as np
|
| 8 |
import networkx as nx
|
| 9 |
from lime.lime_text import LimeTextExplainer
|
| 10 |
import shap
|
|
|
|
| 11 |
|
| 12 |
# ----------------- Streamlit Page Config -----------------
|
| 13 |
st.set_page_config(
|
|
|
|
| 26 |
.summary-box { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin: 10px 0; }
|
| 27 |
.sentence-bar { height: 20px; border-radius: 5px; margin-bottom:5px; }
|
| 28 |
.sentence-text { font-size: 16px; margin-bottom:3px; }
|
| 29 |
+
.score-text { font-size: 14px; color: #333; margin-top: 5px; direction: rtl; }
|
| 30 |
</style>
|
| 31 |
""", unsafe_allow_html=True)
|
| 32 |
|
|
|
|
| 34 |
class UrduTextSummarizer:
|
| 35 |
def __init__(self):
|
| 36 |
self.urdu_stop_words = {'اور','کا','کی','کے','میں','سے','کو','نے','ہے','ہیں','تھا','تھی','تھے',
|
| 37 |
+
'گا','گی','گے','کہ','جو','یہ','وہ','اس','ان','پر','کر','کرنا','کیا',
|
| 38 |
+
'ہو','ہوا','ہوئی','ہوئے','بھی','تو','ہی','لیے','ساتھ','بعد','پہلے'}
|
| 39 |
|
| 40 |
def tokenize(self, sentence: str) -> List[str]:
|
| 41 |
if isinstance(sentence, bytes):
|
|
|
|
| 162 |
try:
|
| 163 |
words = self.tokenize(t)
|
| 164 |
if not words:
|
| 165 |
+
scores.append([0.0, 1.0])
|
| 166 |
continue
|
| 167 |
temp_sent_words = [words]
|
| 168 |
temp_all_words = list(set(words))
|
|
|
|
| 175 |
temp_idf = np.log(1 / (1 + temp_df))
|
| 176 |
temp_tfidf = temp_tf * temp_idf
|
| 177 |
norm = np.linalg.norm(temp_tfidf[0])
|
| 178 |
+
score = min(max(norm, 0.0), 1.0)
|
| 179 |
+
scores.append([score, 1.0 - score])
|
| 180 |
+
except Exception:
|
| 181 |
+
scores.append([0.0, 1.0])
|
| 182 |
return np.array(scores)
|
| 183 |
|
| 184 |
# LIME Explainer
|
| 185 |
+
lime_explainer = LimeTextExplainer(class_names=["Score", "Not Score"], bow=False)
|
| 186 |
+
top_indices = [s['position'] - 1 for s in sorted_scores[:2]]
|
| 187 |
for idx in top_indices:
|
| 188 |
try:
|
| 189 |
exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
|
| 190 |
+
explanations['lime'].append({'sentence': sentences[idx], 'exp': exp.as_list(label=0)})
|
| 191 |
except Exception as e:
|
| 192 |
explanations['lime'].append({'sentence': sentences[idx], 'exp': [('Error', f'LIME failed: {str(e)}')]})
|
| 193 |
|
| 194 |
# SHAP Explainer
|
| 195 |
+
background_texts = sentences[:min(10, len(sentences))]
|
| 196 |
+
shap_explainer = shap.KernelExplainer(predictor, background_texts)
|
| 197 |
for idx in top_indices:
|
| 198 |
try:
|
| 199 |
+
shap_values = shap_explainer.shap_values(sentences[idx], nsamples=100)[0]
|
| 200 |
explanations['shap'].append({'sentence': sentences[idx], 'shap_values': shap_values})
|
| 201 |
except Exception as e:
|
| 202 |
explanations['shap'].append({'sentence': sentences[idx], 'shap_values': [0.0], 'error': str(e)})
|
|
|
|
| 231 |
for s in sorted_sents:
|
| 232 |
bar_width = min(int(s['score']*100), 100)
|
| 233 |
st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
|
| 234 |
+
f"<div class='sentence-bar' style='width:{bar_width}%; background-color:#667eea'></div>"
|
| 235 |
+
f"<div class='score-text'>اسکور: {s['score']:.2f}</div>", unsafe_allow_html=True)
|
| 236 |
|
| 237 |
# --- Keywords Tab ---
|
| 238 |
with tabs[2]:
|
|
|
|
| 244 |
|
| 245 |
# --- Graphs Tab ---
|
| 246 |
with tabs[3]:
|
| 247 |
+
st.info("اس ٹیب میں فی الحال کوئی گراف شامل نہیں ہے۔")
|
| 248 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
# --- Explainability Tab ---
|
| 250 |
with tabs[4]:
|
| 251 |
st.subheader("LIME Explanations (Word Contributions)")
|