Subayyal commited on
Commit
b621a50
·
verified ·
1 Parent(s): 04c64a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -9,6 +9,7 @@ import numpy as np
9
  import networkx as nx
10
  from lime.lime_text import LimeTextExplainer
11
  import shap
 
12
 
13
  # ----------------- Streamlit Page Config -----------------
14
  st.set_page_config(
@@ -38,7 +39,6 @@ class UrduTextSummarizer:
38
  'ہو','ہوا','ہوئی','ہوئے','بھی','تو','ہی','لیے','ساتھ','بعد','پہلے'}
39
 
40
  def tokenize(self, sentence: str) -> List[str]:
41
- # Handle both string and bytes inputs
42
  if isinstance(sentence, bytes):
43
  sentence = sentence.decode('utf-8', errors='ignore')
44
  elif not isinstance(sentence, str):
@@ -178,12 +178,12 @@ class UrduTextSummarizer:
178
  norm = np.linalg.norm(temp_tfidf[0])
179
  scores.append([norm])
180
  except Exception as e:
181
- scores.append([0.0]) # Fallback for errors
182
  return np.array(scores)
183
 
184
  # LIME Explainer
185
  lime_explainer = LimeTextExplainer(class_names=["Score"], bow=False)
186
- top_indices = [s['position'] - 1 for s in sorted_scores[:2]] # Reduced to 2 for performance
187
  for idx in top_indices:
188
  try:
189
  exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
@@ -227,7 +227,7 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
227
  # --- Sentences Tab ---
228
  with tabs[1]:
229
  st.subheader("جملوں کی اہمیت")
230
- sorted_sents = sorted(result['sentences'], key=lambda x:x['score'], reverse=True)
231
  for s in sorted_sents:
232
  bar_width = min(int(s['score']*100), 100)
233
  st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
@@ -239,7 +239,7 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
239
  st.dataframe(df_kw)
240
  fig_bubble = px.scatter(df_kw, x='word', y='frequency', size='frequency', color='importance',
241
  size_max=60, color_continuous_scale='Viridis', title='Keywords by Frequency & Importance')
242
- st.plotly_chart(fig_bubble, use_container_width=True)
243
 
244
  # --- Graphs Tab ---
245
  with tabs[3]:
@@ -252,30 +252,30 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
252
  colorscale='Viridis'
253
  ))
254
  heatmap.update_layout(title="Sentence Score Intensity")
255
- st.plotly_chart(heatmap, use_container_width=True)
256
 
257
  word_lengths = [len(w) for w in re.sub(r'[۔،؟!؛:]', '', user_input).split()]
258
  fig_words = px.histogram(word_lengths, nbins=20, title="الفاظ کی لمبائی کی تقسیم",
259
  labels={'value':'الفاظ کی لمبائی','count':'تعداد'})
260
- st.plotly_chart(fig_words, use_container_width=True)
261
 
262
  # --- Explainability Tab ---
263
  with tabs[4]:
264
  st.subheader("LIME Explanations (Word Contributions)")
265
- for exp in result['explanations']['lime']:
266
- st.markdown(f"**جملہ:** {exp['sentence']}")
267
  df_lime = pd.DataFrame(exp['exp'], columns=['Word', 'Contribution'])
268
- fig_lime = px.bar(df_lime, x='Contribution', y='Word', orientation='h', title='LIME Feature Contributions')
269
- st.plotly_chart(fig_lime)
270
 
271
  st.subheader("SHAP Explanations (Shapley Values)")
272
- for exp in result['explanations']['shap']:
273
- st.markdown(f"**جملہ:** {exp['sentence']}")
274
  if 'error' in exp:
275
  st.error(f"SHAP failed: {exp['error']}")
276
  else:
277
  df_shap = pd.DataFrame({'Word': exp['sentence'].split()[:len(exp['shap_values'])], 'SHAP Value': exp['shap_values']})
278
- fig_shap = px.bar(df_shap, x='SHAP Value', y='Word', orientation='h', title='SHAP Feature Contributions')
279
- st.plotly_chart(fig_shap)
280
 
281
  st.info(f"⏱ خلاصہ تیار کرنے میں وقت: {result['duration']:.2f} سیکنڈ")
 
9
  import networkx as nx
10
  from lime.lime_text import LimeTextExplainer
11
  import shap
12
+ import uuid
13
 
14
  # ----------------- Streamlit Page Config -----------------
15
  st.set_page_config(
 
39
  'ہو','ہوا','ہوئی','ہوئے','بھی','تو','ہی','لیے','ساتھ','بعد','پہلے'}
40
 
41
  def tokenize(self, sentence: str) -> List[str]:
 
42
  if isinstance(sentence, bytes):
43
  sentence = sentence.decode('utf-8', errors='ignore')
44
  elif not isinstance(sentence, str):
 
178
  norm = np.linalg.norm(temp_tfidf[0])
179
  scores.append([norm])
180
  except Exception as e:
181
+ scores.append([0.0])
182
  return np.array(scores)
183
 
184
  # LIME Explainer
185
  lime_explainer = LimeTextExplainer(class_names=["Score"], bow=False)
186
+ top_indices = [s['position'] - 1 for s in sorted_scores[:2]] # Top 2 sentences
187
  for idx in top_indices:
188
  try:
189
  exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
 
227
  # --- Sentences Tab ---
228
  with tabs[1]:
229
  st.subheader("جملوں کی اہمیت")
230
+ sorted_sents = sorted(result['sentences'], key=lambda x: x['score'], reverse=True)
231
  for s in sorted_sents:
232
  bar_width = min(int(s['score']*100), 100)
233
  st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
 
239
  st.dataframe(df_kw)
240
  fig_bubble = px.scatter(df_kw, x='word', y='frequency', size='frequency', color='importance',
241
  size_max=60, color_continuous_scale='Viridis', title='Keywords by Frequency & Importance')
242
+ st.plotly_chart(fig_bubble, use_container_width=True, key="keyword_bubble")
243
 
244
  # --- Graphs Tab ---
245
  with tabs[3]:
 
252
  colorscale='Viridis'
253
  ))
254
  heatmap.update_layout(title="Sentence Score Intensity")
255
+ st.plotly_chart(heatmap, use_container_width=True, key="sentence_heatmap")
256
 
257
  word_lengths = [len(w) for w in re.sub(r'[۔،؟!؛:]', '', user_input).split()]
258
  fig_words = px.histogram(word_lengths, nbins=20, title="الفاظ کی لمبائی کی تقسیم",
259
  labels={'value':'الفاظ کی لمبائی','count':'تعداد'})
260
+ st.plotly_chart(fig_words, use_container_width=True, key="word_histogram")
261
 
262
  # --- Explainability Tab ---
263
  with tabs[4]:
264
  st.subheader("LIME Explanations (Word Contributions)")
265
+ for i, exp in enumerate(result['explanations']['lime']):
266
+ st.markdown(f"**جملہ {i+1}:** {exp['sentence']}")
267
  df_lime = pd.DataFrame(exp['exp'], columns=['Word', 'Contribution'])
268
+ fig_lime = px.bar(df_lime, x='Contribution', y='Word', orientation='h', title=f'LIME Feature Contributions (Sentence {i+1})')
269
+ st.plotly_chart(fig_lime, use_container_width=True, key=f"lime_bar_{i}")
270
 
271
  st.subheader("SHAP Explanations (Shapley Values)")
272
+ for i, exp in enumerate(result['explanations']['shap']):
273
+ st.markdown(f"**جملہ {i+1}:** {exp['sentence']}")
274
  if 'error' in exp:
275
  st.error(f"SHAP failed: {exp['error']}")
276
  else:
277
  df_shap = pd.DataFrame({'Word': exp['sentence'].split()[:len(exp['shap_values'])], 'SHAP Value': exp['shap_values']})
278
+ fig_shap = px.bar(df_shap, x='SHAP Value', y='Word', orientation='h', title=f'SHAP Feature Contributions (Sentence {i+1})')
279
+ st.plotly_chart(fig_shap, use_container_width=True, key=f"shap_bar_{i}")
280
 
281
  st.info(f"⏱ خلاصہ تیار کرنے میں وقت: {result['duration']:.2f} سیکنڈ")