Harshb11 commited on
Commit
03c0ebe
·
verified ·
1 Parent(s): 405c478

Update mca_comment_analyzer.py

Browse files
Files changed (1) hide show
  1. mca_comment_analyzer.py +17 -60
mca_comment_analyzer.py CHANGED
@@ -1,5 +1,6 @@
 
 
1
  import os
2
- import streamlit as st
3
  import pandas as pd
4
  import torch
5
  from transformers import pipeline
@@ -7,40 +8,42 @@ from wordcloud import WordCloud
7
  import matplotlib.pyplot as plt
8
  from collections import Counter
9
  import nltk
10
- from nltk.corpus import stopwords
11
  import random
12
  from datetime import datetime, timedelta
13
 
14
- # ---- Config
15
-
 
16
  os.environ["MPLCONFIGDIR"] = "/tmp/.matplotlib"
 
17
 
18
- st.set_page_config(page_title="MCA Demo Comment Analyzer", layout="wide")
19
-
20
- # ---- NLTK
21
- nltk.download('stopwords', quiet=True)
22
  STOPWORDS = set(stopwords.words('english'))
23
 
24
- # ---- Lightweight MCA Analyzer
 
 
25
  class MCACommentAnalyzer:
26
  def __init__(self):
27
  device = 0 if torch.cuda.is_available() else -1
28
  print("Using device:", "GPU" if device==0 else "CPU")
29
-
30
- # Lightweight sentiment model
31
  self.sentiment_model = pipeline(
32
  "sentiment-analysis",
33
  model="distilbert-base-uncased-finetuned-sst-2-english",
34
  device=device
35
  )
36
-
37
- # Lightweight summarizer
38
  self.summarizer = pipeline(
39
  "summarization",
40
  model="t5-small",
41
  device=device
42
  )
43
-
44
  self.stop_words = STOPWORDS
45
 
46
  def map_sentiment(self, pred, text):
@@ -121,49 +124,3 @@ class MCACommentAnalyzer:
121
  if filename:
122
  plt.savefig(filename, bbox_inches='tight')
123
  return plt
124
-
125
- # ---- Streamlit UI
126
- st.title("📊 MCA Demo Comment Analyzer")
127
- st.sidebar.header("Upload or Enter Comments")
128
-
129
- upload_file = st.sidebar.file_uploader("Upload CSV/Excel/TXT", type=["csv","xlsx","txt"])
130
- manual_input = st.sidebar.text_area("Or enter comments manually (one per line)")
131
-
132
- comments = []
133
- if upload_file:
134
- try:
135
- if upload_file.name.endswith(".csv"):
136
- df_file = pd.read_csv(upload_file)
137
- if 'comment' in df_file.columns:
138
- comments = df_file['comment'].astype(str).tolist()
139
- else:
140
- comments = df_file.iloc[:,0].astype(str).tolist()
141
- elif upload_file.name.endswith(".xlsx"):
142
- df_file = pd.read_excel(upload_file)
143
- if 'comment' in df_file.columns:
144
- comments = df_file['comment'].astype(str).tolist()
145
- else:
146
- comments = df_file.iloc[:,0].astype(str).tolist()
147
- else:
148
- comments = upload_file.read().decode("utf-8").splitlines()
149
- except Exception as e:
150
- st.error(f"File format not supported or corrupted. {e}")
151
- elif manual_input.strip():
152
- comments = manual_input.strip().split("\n")
153
-
154
- if st.sidebar.button("Analyze"):
155
- if comments:
156
- analyzer = MCACommentAnalyzer()
157
- df, keyword_freq = analyzer.process_comments(comments)
158
-
159
- st.subheader("📌 Analysis Results")
160
- st.dataframe(df, use_container_width=True)
161
-
162
- st.subheader("📊 Sentiment Distribution")
163
- st.bar_chart(df["Sentiment"].value_counts())
164
-
165
- st.subheader("☁️ Word Cloud")
166
- plt_obj = analyzer.generate_wordcloud(keyword_freq)
167
- st.pyplot(plt_obj)
168
- else:
169
- st.warning("⚠️ Provide comments manually or upload a supported file.")
 
1
+ # mca_comment_analyzer.py
2
+
3
  import os
 
4
  import pandas as pd
5
  import torch
6
  from transformers import pipeline
 
8
  import matplotlib.pyplot as plt
9
  from collections import Counter
10
  import nltk
 
11
  import random
12
  from datetime import datetime, timedelta
13
 
14
+ # -----------------------------
15
+ # Configs
16
+ # -----------------------------
17
  os.environ["MPLCONFIGDIR"] = "/tmp/.matplotlib"
18
+ os.environ["NLTK_DATA"] = "/tmp/nltk_data"
19
 
20
+ # NLTK Stopwords
21
+ nltk.download('stopwords', download_dir="/tmp/nltk_data", quiet=True)
22
+ from nltk.corpus import stopwords
 
23
  STOPWORDS = set(stopwords.words('english'))
24
 
25
+ # -----------------------------
26
+ # MCA Comment Analyzer
27
+ # -----------------------------
28
  class MCACommentAnalyzer:
29
  def __init__(self):
30
  device = 0 if torch.cuda.is_available() else -1
31
  print("Using device:", "GPU" if device==0 else "CPU")
32
+
33
+ # Sentiment model
34
  self.sentiment_model = pipeline(
35
  "sentiment-analysis",
36
  model="distilbert-base-uncased-finetuned-sst-2-english",
37
  device=device
38
  )
39
+
40
+ # Summarizer
41
  self.summarizer = pipeline(
42
  "summarization",
43
  model="t5-small",
44
  device=device
45
  )
46
+
47
  self.stop_words = STOPWORDS
48
 
49
  def map_sentiment(self, pred, text):
 
124
  if filename:
125
  plt.savefig(filename, bbox_inches='tight')
126
  return plt