Spaces:

Tanmoy-AI
/

customer-connect

Sleeping

App Files Files Community

FarhinSadia commited on Jun 19, 2025

Commit

79b4249

1 Parent(s): 3452303

fixed UI

Browse files

Files changed (3) hide show

app.py +24 -10
src/data_processor.py +20 -9
src/insights_generator.py +12 -1

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # app.py
 import streamlit as st
 import pandas as pd
@@ -32,11 +33,20 @@ def load_and_process_data():
         dfs = []
         for f in files_list:
             try:
-                df = pd.read_csv(f) if f.endswith('.csv') else pd.DataFrame({'text': [p.strip() for p in open(f, 'r', encoding='utf-8').read().split('\n') if p.strip()]})
                 df['source_file'] = os.path.basename(f)
                 dfs.append(df)
             except Exception as e:
                 st.error(f"Error reading {os.path.basename(f)}: {e}")
         return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
     raw_posts_df = read_files_to_dataframe(post_files)
@@ -47,8 +57,7 @@ def load_and_process_data():
     processed_posts_df = processor.process_all_data(raw_posts_df) if not raw_posts_df.empty else pd.DataFrame()
     processed_comments_df = processor.process_all_data(raw_comments_df) if not raw_comments_df.empty else pd.DataFrame()
-    # --- SECONDARY DEFENSIVE CHECK ---
-    # Ensure the columns were actually added. If not, revert to an empty DataFrame.
     if not processed_posts_df.empty and 'prime_mentions' not in processed_posts_df.columns:
         st.warning("Could not process 'posts' data correctly. Check data format.")
         processed_posts_df = pd.DataFrame()
@@ -56,7 +65,6 @@ def load_and_process_data():
     if not processed_comments_df.empty and 'prime_mentions' not in processed_comments_df.columns:
         st.warning("Could not process 'comments' data correctly. Check data format.")
         processed_comments_df = pd.DataFrame()
-    # --- END OF CHECK ---
     all_text_df = pd.concat([processed_posts_df, processed_comments_df], ignore_index=True)
@@ -83,9 +91,6 @@ if all_text_df.empty or insights is None:
 prime_posts_df = posts_df[posts_df['prime_mentions'] > 0].copy() if not posts_df.empty and 'prime_mentions' in posts_df else pd.DataFrame()
 prime_all_text_df = all_text_df[all_text_df['prime_mentions'] > 0].copy() if not all_text_df.empty and 'prime_mentions' in all_text_df else pd.DataFrame()
-# The rest of the app.py file remains the same as the previous version...
-# ... (KPI Section, Tabbed Interface, etc.) ...
 # --- KPI Section ---
 st.header("📈 Prime Bank Mention KPIs")
 kpi1, kpi2 = st.columns(2)
@@ -141,10 +146,15 @@ with tab2:
             with st.expander("Read Emotion Insights"):
                 emotion_insight = insights.get('emotion', {})
                 st.markdown(f"**Summary:** {emotion_insight.get('summary', 'N/A')}")
                 for emotion, data in emotion_insight.get('details', {}).items():
                     st.markdown(f"**{emotion} is often about:** {data['themes']}")
-                    st.write(f"Example:")
-                    st.info(f"- \"{data['example'][:150]}...\"")
         with col2:
             st.subheader("Post & Comment Categories")
@@ -163,6 +173,10 @@ with tab3:
     if not posts_df.empty:
         st.subheader("Processed Posts Data")
         st.dataframe(posts_df)
     if not all_text_df.empty and len(all_text_df) > len(posts_df):
         st.subheader("Processed Comments & Reviews Data")
-        st.dataframe(all_text_df.iloc[len(posts_df):].reset_index(drop=True))

 # app.py
+# THIS IS THE START OF THE FILE
 import streamlit as st
 import pandas as pd
         dfs = []
         for f in files_list:
             try:
+                # Use different readers for different file types
+                if f.endswith('.csv'):
+                    df = pd.read_csv(f)
+                else: # for .txt files
+                    with open(f, 'r', encoding='utf-8') as file:
+                        content = file.read()
+                    posts = content.split('\n')
+                    df = pd.DataFrame({'text': [p.strip() for p in posts if p.strip()]})
                 df['source_file'] = os.path.basename(f)
                 dfs.append(df)
             except Exception as e:
                 st.error(f"Error reading {os.path.basename(f)}: {e}")
         return pd.concat(dfs, ignore_index=True) if dfs else pd.DataFrame()
     raw_posts_df = read_files_to_dataframe(post_files)
     processed_posts_df = processor.process_all_data(raw_posts_df) if not raw_posts_df.empty else pd.DataFrame()
     processed_comments_df = processor.process_all_data(raw_comments_df) if not raw_comments_df.empty else pd.DataFrame()
+    # Secondary defensive check to ensure columns were added.
     if not processed_posts_df.empty and 'prime_mentions' not in processed_posts_df.columns:
         st.warning("Could not process 'posts' data correctly. Check data format.")
         processed_posts_df = pd.DataFrame()
     if not processed_comments_df.empty and 'prime_mentions' not in processed_comments_df.columns:
         st.warning("Could not process 'comments' data correctly. Check data format.")
         processed_comments_df = pd.DataFrame()
     all_text_df = pd.concat([processed_posts_df, processed_comments_df], ignore_index=True)
 prime_posts_df = posts_df[posts_df['prime_mentions'] > 0].copy() if not posts_df.empty and 'prime_mentions' in posts_df else pd.DataFrame()
 prime_all_text_df = all_text_df[all_text_df['prime_mentions'] > 0].copy() if not all_text_df.empty and 'prime_mentions' in all_text_df else pd.DataFrame()
 # --- KPI Section ---
 st.header("📈 Prime Bank Mention KPIs")
 kpi1, kpi2 = st.columns(2)
             with st.expander("Read Emotion Insights"):
                 emotion_insight = insights.get('emotion', {})
                 st.markdown(f"**Summary:** {emotion_insight.get('summary', 'N/A')}")
+                # --- THIS IS THE CORRECTED CODE BLOCK ---
                 for emotion, data in emotion_insight.get('details', {}).items():
                     st.markdown(f"**{emotion} is often about:** {data['themes']}")
+                    # Only show the example box if an example exists and is valid
+                    if data.get('example') and data['example'] != "N/A":
+                        st.write("Example:")
+                        st.info(f"- \"{data['example'][:150]}...\"")
+                # --- END OF CORRECTED CODE BLOCK ---
         with col2:
             st.subheader("Post & Comment Categories")
     if not posts_df.empty:
         st.subheader("Processed Posts Data")
         st.dataframe(posts_df)
+    # Check if there are any comments to display
     if not all_text_df.empty and len(all_text_df) > len(posts_df):
         st.subheader("Processed Comments & Reviews Data")
+        # Correctly slice the comments from the combined dataframe
+        st.dataframe(all_text_df.iloc[len(posts_df):].reset_index(drop=True))
+# THIS IS THE END OF THE FILE

src/data_processor.py CHANGED Viewed

@@ -219,10 +219,16 @@ class DataProcessor:
         else:
             return 'Other', 'General discussion or observation'
     def process_all_data(self, df):
         """Apply all processing to dataframe"""
-        # Find text column
-        text_columns = ['text', 'content', 'message', 'review', 'comment', 'post', 'Text', 'Content']
         text_col = None
         for col in text_columns:
@@ -230,11 +236,15 @@ class DataProcessor:
                 text_col = col
                 break
-        if text_col and text_col != 'text':
-            df['text'] = df[text_col]
-        if 'text' not in df.columns:
-            return df
         # Identify which bank each post is about
         df[['primary_bank', 'all_banks_mentioned']] = df['text'].apply(
@@ -265,10 +275,11 @@ class DataProcessor:
             df['viral_score'] += df['likes'].fillna(0)
         if 'shares' in df.columns:
             df['viral_score'] += df['shares'].fillna(0) * 2
-        if 'comments' in df.columns:
             df['viral_score'] += df['comments'].fillna(0) * 1.5
         # Add Prime Bank specific viral score boost
-        df.loc[df['prime_mentions'] > 0, 'viral_score'] *= 1.2
         return df

         else:
             return 'Other', 'General discussion or observation'
+    # In src/data_processor.py
     def process_all_data(self, df):
         """Apply all processing to dataframe"""
+        # --- NEW, MORE ROBUST TEXT COLUMN FINDER ---
+        # If the dataframe is empty, return it immediately.
+        if df.empty:
+            return df
+        text_columns = ['text', 'content', 'message', 'review', 'comment', 'post', 'Text', 'Content', 'Post', 'Review Text']
         text_col = None
         for col in text_columns:
                 text_col = col
                 break
+        # If no text column is found, we cannot proceed. Return the empty shell.
+        if not text_col:
+            st.warning(f"Could not find a text column in one of the data sources.")
+            return pd.DataFrame(columns=df.columns) # Return with columns but no data
+        # If the found column is not 'text', rename it to 'text' for consistency.
+        if text_col != 'text':
+            df.rename(columns={text_col: 'text'}, inplace=True)
+        # --- END OF FIX ---
         # Identify which bank each post is about
         df[['primary_bank', 'all_banks_mentioned']] = df['text'].apply(
             df['viral_score'] += df['likes'].fillna(0)
         if 'shares' in df.columns:
             df['viral_score'] += df['shares'].fillna(0) * 2
+        if 'comments' in df.columns: # This column name was missing from your old code
             df['viral_score'] += df['comments'].fillna(0) * 1.5
         # Add Prime Bank specific viral score boost
+        if not df.empty and 'prime_mentions' in df.columns:
+            df.loc[df['prime_mentions'] > 0, 'viral_score'] *= 1.2
         return df

src/insights_generator.py CHANGED Viewed

@@ -39,12 +39,23 @@ class InsightsGenerator:
         return self.insights
     def _get_common_words(self, text_series, top_n=5):
         """Helper function to find common keywords in a series of text."""
         if text_series.empty:
             return "No data"
-        stop_words = {'the', 'a', 'an', 'is', 'i', 'to', 'for', 'in', 'it', 'and', 'my', 'of', 'prime', 'bank'}
         all_text = ' '.join(text_series.astype(str).tolist()).lower()
         words = re.findall(r'\b[a-z]{4,}\b', all_text) # Find words with 4+ letters
         filtered_words = [word for word in words if word not in stop_words]

         return self.insights
+    # In src/insights_generator.py
     def _get_common_words(self, text_series, top_n=5):
         """Helper function to find common keywords in a series of text."""
         if text_series.empty:
             return "No data"
+        # --- NEW, IMPROVED STOP WORD LIST ---
+        stop_words = {
+            'the', 'a', 'an', 'is', 'i', 'to', 'for', 'in', 'it', 'and', 'my', 'of', 'prime', 'bank', 'banker',
+            'was', 'do', 'with', 'that', 'this', 'have', 'has', 'are', 'not',
+            # Common "Banglish" and filler words
+            'er', 'ta', 'ki', 'ami', 'amar', 'kore', 'hocche', 'bhalo', 'asholei', 'onek', 'apnar',
+            'sir', 'bro', 'please', 'help', 'need', 'know', 'want'
+        }
+        # --- END OF FIX ---
         all_text = ' '.join(text_series.astype(str).tolist()).lower()
         words = re.findall(r'\b[a-z]{4,}\b', all_text) # Find words with 4+ letters
         filtered_words = [word for word in words if word not in stop_words]