Spaces:

Harshb11
/

mca_comment_analyzer

Sleeping

App Files Files Community

Harshb11 commited on Sep 14, 2025

Commit

2d8c6ff

verified ·

1 Parent(s): d785ac6

Update mca_comment_analyzer.py

Browse files

Files changed (1) hide show

mca_comment_analyzer.py +44 -37

mca_comment_analyzer.py CHANGED Viewed

@@ -10,51 +10,45 @@ import nltk
 from nltk.corpus import stopwords
 import random
 from datetime import datetime, timedelta
-from langdetect import detect
-from deep_translator import GoogleTranslator
 # ---- Config
-st.set_option('browser.gatherUsageStats', False)  # Disable usage stats
-os.environ["MPLCONFIGDIR"] = "/tmp/.matplotlib"  # Fix matplotlib cache warning
-st.set_page_config(page_title="MCA Comment Analyzer", layout="wide")
-# ---- NLTK setup
 nltk.download('stopwords', quiet=True)
 STOPWORDS = set(stopwords.words('english'))
-# ---- MCA Analyzer Class
 class MCACommentAnalyzer:
     def __init__(self):
         device = 0 if torch.cuda.is_available() else -1
         print("Using device:", "GPU" if device==0 else "CPU")
         self.sentiment_model = pipeline(
             "sentiment-analysis",
             model="distilbert-base-uncased-finetuned-sst-2-english",
             device=device
         )
         self.summarizer = pipeline(
             "summarization",
-            model="sshleifer/distilbart-cnn-12-6",
             device=device
         )
         self.stop_words = STOPWORDS
-    def translate_to_english(self, text):
-        try:
-            lang = detect(text)
-            if lang != "en":
-                return GoogleTranslator(source='auto', target='en').translate(text)
-            return text
-        except:
-            return text
     def map_sentiment(self, pred, text):
         text_lower = text.lower()
-        violation_keywords = ["violation", "violates", "illegal", "non-compliant", "breach", "unlawful", "risk", "penalty"]
-        suggestion_keywords = ["should", "recommend", "suggest", "advise", "better if", "could", "need to"]
         positive_keywords = ["clear", "helpful", "good", "appreciate", "support"]
-        negative_keywords = ["confusing", "unclear", "bad", "problem", "needs clarification"]
         if any(w in text_lower for w in violation_keywords):
             return "Violation"
@@ -74,24 +68,20 @@ class MCACommentAnalyzer:
             return "Neutral"
     def process_comment(self, comment):
-        translated_comment = self.translate_to_english(comment)
-        pred = self.sentiment_model(translated_comment)[0]
-        sentiment = self.map_sentiment(pred, translated_comment)
-        # Summary
-        if len(translated_comment.split()) < 10:
-            summary_text = " ".join(translated_comment.split()[:10])
         else:
             try:
-                summary_text = self.summarizer(translated_comment, max_length=30, min_length=5, do_sample=False)[0]['summary_text']
             except:
-                summary_text = translated_comment
-        # Keywords
-        words = [w for w in translated_comment.lower().split() if w.isalpha() and w not in self.stop_words]
         keywords = list(Counter(words).keys())
         top_keywords = ", ".join(keywords[:3])
         return sentiment, summary_text, keywords, top_keywords
     def process_comments(self, comments_list):
@@ -104,7 +94,7 @@ class MCACommentAnalyzer:
             summaries.append(summary)
             all_keywords.extend(keywords)
             top_keywords_list.append(top_kw)
-            timestamps.append(start_date + timedelta(days=random.randint(0, 30)))
         df = pd.DataFrame({
             "Timestamp": timestamps,
@@ -133,14 +123,31 @@ class MCACommentAnalyzer:
         return plt
 # ---- Streamlit UI
-st.title("📊 MCA eConsultation Comment Analyzer")
 st.sidebar.header("Upload or Enter Comments")
-upload_file = st.sidebar.file_uploader("Upload a text file with comments", type=["txt"])
-manual_input = st.sidebar.text_area("Or enter comments (one per line):")
 comments = []
 if upload_file:
-    comments = upload_file.read().decode("utf-8").splitlines()
 elif manual_input.strip():
     comments = manual_input.strip().split("\n")
@@ -159,4 +166,4 @@ if st.sidebar.button("Analyze"):
         plt_obj = analyzer.generate_wordcloud(keyword_freq)
         st.pyplot(plt_obj)
     else:
-        st.warning("⚠️ Please provide comments to analyze.")

 from nltk.corpus import stopwords
 import random
 from datetime import datetime, timedelta
 # ---- Config
+st.set_option('browser.gatherUsageStats', False)
+os.environ["MPLCONFIGDIR"] = "/tmp/.matplotlib"
+st.set_page_config(page_title="MCA Demo Comment Analyzer", layout="wide")
+# ---- NLTK
 nltk.download('stopwords', quiet=True)
 STOPWORDS = set(stopwords.words('english'))
+# ---- Lightweight MCA Analyzer
 class MCACommentAnalyzer:
     def __init__(self):
         device = 0 if torch.cuda.is_available() else -1
         print("Using device:", "GPU" if device==0 else "CPU")
+        # Lightweight sentiment model
         self.sentiment_model = pipeline(
             "sentiment-analysis",
             model="distilbert-base-uncased-finetuned-sst-2-english",
             device=device
         )
+        # Lightweight summarizer
         self.summarizer = pipeline(
             "summarization",
+            model="t5-small",
             device=device
         )
         self.stop_words = STOPWORDS
     def map_sentiment(self, pred, text):
         text_lower = text.lower()
+        violation_keywords = ["violation", "violates", "illegal", "non-compliant"]
+        suggestion_keywords = ["should", "recommend", "suggest", "advise", "better if"]
         positive_keywords = ["clear", "helpful", "good", "appreciate", "support"]
+        negative_keywords = ["confusing", "unclear", "bad", "problem"]
         if any(w in text_lower for w in violation_keywords):
             return "Violation"
             return "Neutral"
     def process_comment(self, comment):
+        pred = self.sentiment_model(comment)[0]
+        sentiment = self.map_sentiment(pred, comment)
+        if len(comment.split()) < 10:
+            summary_text = " ".join(comment.split()[:10])
         else:
             try:
+                summary_text = self.summarizer(comment, max_length=30, min_length=5, do_sample=False)[0]['summary_text']
             except:
+                summary_text = comment
+        words = [w for w in comment.lower().split() if w.isalpha() and w not in self.stop_words]
         keywords = list(Counter(words).keys())
         top_keywords = ", ".join(keywords[:3])
         return sentiment, summary_text, keywords, top_keywords
     def process_comments(self, comments_list):
             summaries.append(summary)
             all_keywords.extend(keywords)
             top_keywords_list.append(top_kw)
+            timestamps.append(start_date + timedelta(days=random.randint(0,30)))
         df = pd.DataFrame({
             "Timestamp": timestamps,
         return plt
 # ---- Streamlit UI
+st.title("📊 MCA Demo Comment Analyzer")
 st.sidebar.header("Upload or Enter Comments")
+upload_file = st.sidebar.file_uploader("Upload CSV/Excel/TXT", type=["csv","xlsx","txt"])
+manual_input = st.sidebar.text_area("Or enter comments manually (one per line)")
 comments = []
 if upload_file:
+    try:
+        if upload_file.name.endswith(".csv"):
+            df_file = pd.read_csv(upload_file)
+            if 'comment' in df_file.columns:
+                comments = df_file['comment'].astype(str).tolist()
+            else:
+                comments = df_file.iloc[:,0].astype(str).tolist()
+        elif upload_file.name.endswith(".xlsx"):
+            df_file = pd.read_excel(upload_file)
+            if 'comment' in df_file.columns:
+                comments = df_file['comment'].astype(str).tolist()
+            else:
+                comments = df_file.iloc[:,0].astype(str).tolist()
+        else:
+            comments = upload_file.read().decode("utf-8").splitlines()
+    except Exception as e:
+        st.error(f"File format not supported or corrupted. {e}")
 elif manual_input.strip():
     comments = manual_input.strip().split("\n")
         plt_obj = analyzer.generate_wordcloud(keyword_freq)
         st.pyplot(plt_obj)
     else:
+        st.warning("⚠️ Provide comments manually or upload a supported file.")