Spaces:

Subayyal
/

Urdu-Summarizer

Sleeping

App Files Files Community

Subayyal commited on Sep 2, 2025

Commit

b621a50

verified ·

1 Parent(s): 04c64a8

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -15

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import numpy as np
 import networkx as nx
 from lime.lime_text import LimeTextExplainer
 import shap
 # ----------------- Streamlit Page Config -----------------
 st.set_page_config(
@@ -38,7 +39,6 @@ class UrduTextSummarizer:
                                 'ہو','ہوا','ہوئی','ہوئے','بھی','تو','ہی','لیے','ساتھ','بعد','پہلے'}
     def tokenize(self, sentence: str) -> List[str]:
-        # Handle both string and bytes inputs
         if isinstance(sentence, bytes):
             sentence = sentence.decode('utf-8', errors='ignore')
         elif not isinstance(sentence, str):
@@ -178,12 +178,12 @@ class UrduTextSummarizer:
                     norm = np.linalg.norm(temp_tfidf[0])
                     scores.append([norm])
                 except Exception as e:
-                    scores.append([0.0])  # Fallback for errors
             return np.array(scores)
         # LIME Explainer
         lime_explainer = LimeTextExplainer(class_names=["Score"], bow=False)
-        top_indices = [s['position'] - 1 for s in sorted_scores[:2]]  # Reduced to 2 for performance
         for idx in top_indices:
             try:
                 exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
@@ -227,7 +227,7 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
     # --- Sentences Tab ---
     with tabs[1]:
         st.subheader("جملوں کی اہمیت")
-        sorted_sents = sorted(result['sentences'], key=lambda x:x['score'], reverse=True)
         for s in sorted_sents:
             bar_width = min(int(s['score']*100), 100)
             st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
@@ -239,7 +239,7 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
         st.dataframe(df_kw)
         fig_bubble = px.scatter(df_kw, x='word', y='frequency', size='frequency', color='importance',
                                 size_max=60, color_continuous_scale='Viridis', title='Keywords by Frequency & Importance')
-        st.plotly_chart(fig_bubble, use_container_width=True)
     # --- Graphs Tab ---
     with tabs[3]:
@@ -252,30 +252,30 @@ if st.button("خلاصہ تیار کریں") and user_input.strip():
             colorscale='Viridis'
         ))
         heatmap.update_layout(title="Sentence Score Intensity")
-        st.plotly_chart(heatmap, use_container_width=True)
         word_lengths = [len(w) for w in re.sub(r'[۔،؟!؛:]', '', user_input).split()]
         fig_words = px.histogram(word_lengths, nbins=20, title="الفاظ کی لمبائی کی تقسیم",
                                  labels={'value':'الفاظ کی لمبائی','count':'تعداد'})
-        st.plotly_chart(fig_words, use_container_width=True)
     # --- Explainability Tab ---
     with tabs[4]:
         st.subheader("LIME Explanations (Word Contributions)")
-        for exp in result['explanations']['lime']:
-            st.markdown(f"**جملہ:** {exp['sentence']}")
             df_lime = pd.DataFrame(exp['exp'], columns=['Word', 'Contribution'])
-            fig_lime = px.bar(df_lime, x='Contribution', y='Word', orientation='h', title='LIME Feature Contributions')
-            st.plotly_chart(fig_lime)
         st.subheader("SHAP Explanations (Shapley Values)")
-        for exp in result['explanations']['shap']:
-            st.markdown(f"**جملہ:** {exp['sentence']}")
             if 'error' in exp:
                 st.error(f"SHAP failed: {exp['error']}")
             else:
                 df_shap = pd.DataFrame({'Word': exp['sentence'].split()[:len(exp['shap_values'])], 'SHAP Value': exp['shap_values']})
-                fig_shap = px.bar(df_shap, x='SHAP Value', y='Word', orientation='h', title='SHAP Feature Contributions')
-                st.plotly_chart(fig_shap)
     st.info(f"⏱ خلاصہ تیار کرنے میں وقت: {result['duration']:.2f} سیکنڈ")

 import networkx as nx
 from lime.lime_text import LimeTextExplainer
 import shap
+import uuid
 # ----------------- Streamlit Page Config -----------------
 st.set_page_config(
                                 'ہو','ہوا','ہوئی','ہوئے','بھی','تو','ہی','لیے','ساتھ','بعد','پہلے'}
     def tokenize(self, sentence: str) -> List[str]:
         if isinstance(sentence, bytes):
             sentence = sentence.decode('utf-8', errors='ignore')
         elif not isinstance(sentence, str):
                     norm = np.linalg.norm(temp_tfidf[0])
                     scores.append([norm])
                 except Exception as e:
+                    scores.append([0.0])
             return np.array(scores)
         # LIME Explainer
         lime_explainer = LimeTextExplainer(class_names=["Score"], bow=False)
+        top_indices = [s['position'] - 1 for s in sorted_scores[:2]]  # Top 2 sentences
         for idx in top_indices:
             try:
                 exp = lime_explainer.explain_instance(sentences[idx], predictor, num_features=10, num_samples=100)
     # --- Sentences Tab ---
     with tabs[1]:
         st.subheader("جملوں کی اہمیت")
+        sorted_sents = sorted(result['sentences'], key=lambda x: x['score'], reverse=True)
         for s in sorted_sents:
             bar_width = min(int(s['score']*100), 100)
             st.markdown(f"<div class='sentence-text'>{s['sentence']}</div>"
         st.dataframe(df_kw)
         fig_bubble = px.scatter(df_kw, x='word', y='frequency', size='frequency', color='importance',
                                 size_max=60, color_continuous_scale='Viridis', title='Keywords by Frequency & Importance')
+        st.plotly_chart(fig_bubble, use_container_width=True, key="keyword_bubble")
     # --- Graphs Tab ---
     with tabs[3]:
             colorscale='Viridis'
         ))
         heatmap.update_layout(title="Sentence Score Intensity")
+        st.plotly_chart(heatmap, use_container_width=True, key="sentence_heatmap")
         word_lengths = [len(w) for w in re.sub(r'[۔،؟!؛:]', '', user_input).split()]
         fig_words = px.histogram(word_lengths, nbins=20, title="الفاظ کی لمبائی کی تقسیم",
                                  labels={'value':'الفاظ کی لمبائی','count':'تعداد'})
+        st.plotly_chart(fig_words, use_container_width=True, key="word_histogram")
     # --- Explainability Tab ---
     with tabs[4]:
         st.subheader("LIME Explanations (Word Contributions)")
+        for i, exp in enumerate(result['explanations']['lime']):
+            st.markdown(f"**جملہ {i+1}:** {exp['sentence']}")
             df_lime = pd.DataFrame(exp['exp'], columns=['Word', 'Contribution'])
+            fig_lime = px.bar(df_lime, x='Contribution', y='Word', orientation='h', title=f'LIME Feature Contributions (Sentence {i+1})')
+            st.plotly_chart(fig_lime, use_container_width=True, key=f"lime_bar_{i}")
         st.subheader("SHAP Explanations (Shapley Values)")
+        for i, exp in enumerate(result['explanations']['shap']):
+            st.markdown(f"**جملہ {i+1}:** {exp['sentence']}")
             if 'error' in exp:
                 st.error(f"SHAP failed: {exp['error']}")
             else:
                 df_shap = pd.DataFrame({'Word': exp['sentence'].split()[:len(exp['shap_values'])], 'SHAP Value': exp['shap_values']})
+                fig_shap = px.bar(df_shap, x='SHAP Value', y='Word', orientation='h', title=f'SHAP Feature Contributions (Sentence {i+1})')
+                st.plotly_chart(fig_shap, use_container_width=True, key=f"shap_bar_{i}")
     st.info(f"⏱ خلاصہ تیار کرنے میں وقت: {result['duration']:.2f} سیکنڈ")