Arjon07CSE commited on
Commit
8367aa6
·
verified ·
1 Parent(s): 93ad989

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +280 -38
src/streamlit_app.py CHANGED
@@ -1,40 +1,282 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
-
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
-
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
15
-
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
+ import json
6
+ import plotly.express as px
7
+ import re
8
+
9
+ # --- CONFIG & SETUP ---
10
+ st.set_page_config(
11
+ page_title="BD Political Sentinel AI",
12
+ page_icon="🇧🇩",
13
+ layout="wide"
14
+ )
15
+
16
+ # --- KEYWORD DATABASE (To make the AI Smarter) ---
17
+ # This dictionary helps the AI explicitly understand symbols associated with parties.
18
+ POLITICAL_CONTEXT = {
19
+ "BNP": {
20
+ "keywords": "ধানের শীষ, জিন্দাবাদ, জিয়ার সৈনিক, দেশনেত্রী, তারেক, Sheaf of Paddy",
21
+ "rival_keywords": "নৌকা, ভোট চোর, হাসিনা, লীগ"
22
+ },
23
+ "Awami League": {
24
+ "keywords": "নৌকা, জয় বাংলা, মুজিব, হাসিনা, শেখের বেটি, Boat",
25
+ "rival_keywords": "ধানের শীষ, চোর, বিএনপি, জামায়াত"
26
+ },
27
+ "Jamaat-e-Islami": {
28
+ "keywords": "দাড়িপাল্লা, আল্লাহ, নারায়ে তাকবির, দ্বীন, ইসলাম, Mamunul",
29
+ "rival_keywords": "নাস্তিক, লীগ, শাহবাগ"
30
+ },
31
+ "General/Interim Govt": {
32
+ "keywords": "ইউনূস, ছাত্র সমাজ, সংস্কার, জেনারেশন জেড, ইনসাফ",
33
+ "rival_keywords": "স্বৈরাচার, ফ্যাসিস্ট, হাসিনা"
34
+ }
35
+ }
36
+
37
+ # --- MODEL LOADER ---
38
+ @st.cache_resource
39
+ def load_model():
40
+ model_id = "hishab/titulm-llama-3.2-3b-v2.0"
41
+ try:
42
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
43
+ # Load in 4-bit or float16 depending on available hardware
44
+ # For Hugging Face Spaces (CPU), we use float32 or float16.
45
+ # For GPU, float16 is best.
46
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
47
+
48
+ model = AutoModelForCausalLM.from_pretrained(
49
+ model_id,
50
+ torch_dtype=dtype,
51
+ device_map="auto"
52
+ )
53
+
54
+ pipe = pipeline(
55
+ "text-generation",
56
+ model=model,
57
+ tokenizer=tokenizer,
58
+ max_new_tokens=150, # Keep it short for JSON
59
+ do_sample=True,
60
+ temperature=0.2, # Lower temperature = More strict/logical
61
+ top_p=0.9
62
+ )
63
+ return pipe
64
+ except Exception as e:
65
+ return None
66
+
67
+ # Load Model
68
+ with st.sidebar:
69
+ st.image("https://cdn-icons-png.flaticon.com/512/6656/6656046.png", width=50)
70
+ st.title("AI Settings")
71
+ if torch.cuda.is_available():
72
+ st.success("🚀 GPU Detected! Inference will be fast.")
73
+ else:
74
+ st.warning("⚠️ Running on CPU. Inference might be slow.")
75
+
76
+ with st.spinner("Waking up the Neural Network..."):
77
+ llm = load_model()
78
+
79
+ if not llm:
80
+ st.error("Model failed to load.")
81
+ st.stop()
82
+
83
+ # --- HELPER FUNCTIONS ---
84
+ def clean_json_output(text):
85
+ """Robustly extract JSON from the LLM's chatter."""
86
+ # Look for the last occurrence of { and the matching }
87
+ try:
88
+ # Regex to find JSON block
89
+ matches = re.findall(r'\{.*?\}', text, re.DOTALL)
90
+ if matches:
91
+ # Get the last match as it's usually the actual answer after the reasoning
92
+ return json.loads(matches[-1])
93
+ else:
94
+ return None
95
+ except:
96
+ return None
97
+
98
+ # --- PROMPT GENERATORS ---
99
+
100
+ def generate_news_prompt(news_text, target):
101
+ return [
102
+ {"role": "system", "content": f"""You are a Political Analyst for Bangladesh.
103
+ Task: Analyze if the news is FAVOURABLE or UNFAVORABLE for: {target}.
104
+
105
+ DEFINITIONS:
106
+ - FAVOURABLE: Positive news, legal wins, return to power, praise.
107
+ - UNFAVORABLE: Negative news, arrest, criticism, loss.
108
+ - NEUTRAL: Factual news with no clear bias.
109
+
110
+ Response Format: JSON only -> {{"label": "FAVOURABLE"|"UNFAVORABLE"|"NEUTRAL", "reasoning": "Bangla sentence"}}
111
+ """},
112
+ {"role": "user", "content": f"News: {news_text}"}
113
+ ]
114
+
115
+ def generate_comment_prompt(comment_text, target, party, keywords, rival_keywords):
116
+ return [
117
+ {"role": "system", "content": f"""You are an Expert Bangla Sentiment Analyzer.
118
+ Task: Analyze the sentiment of the comment TOWARDS the target: {target} ({party}).
119
+
120
+ RULES:
121
+ 1. If comment mentions {party} symbols ({keywords}) or praises {target} -> POSITIVE.
122
+ 2. If comment supports {party}'s rivals ({rival_keywords}) or attacks {target} -> NEGATIVE.
123
+ 3. If comment is sarcastic (mocking praise) -> NEGATIVE.
124
+
125
+ Examples:
126
+ - Comment: "Zindabad!" (Context: {party}) -> POSITIVE
127
+ - Comment: "Chor!" (Context: {party}) -> NEGATIVE
128
+
129
+ Response Format: JSON only -> {{"label": "POSITIVE"|"NEGATIVE"|"NEUTRAL", "reasoning": "Short Bangla explanation"}}
130
+ """},
131
+ {"role": "user", "content": f"Comment: {comment_text}"}
132
+ ]
133
+
134
+ # --- MAIN UI ---
135
+
136
+ st.title("🇧🇩 Smart Political Sentiment Analyzer")
137
+ st.markdown("Context-Aware Analysis for Bangladesh Politics")
138
+
139
+ # Tabs for the two sections
140
+ tab_news, tab_comments = st.tabs(["📰 Political News Analysis", "📣 Public Sentiment (Comments)"])
141
+
142
+ # =======================
143
+ # SECTION 1: NEWS
144
+ # =======================
145
+ with tab_news:
146
+ st.header("Is this news Good or Bad for the Candidate?")
147
+
148
+ col1, col2 = st.columns(2)
149
+ with col1:
150
+ target_name_news = st.text_input("Candidate Name (Who is this about?)", "তারেক রহমান")
151
+ with col2:
152
+ news_input_method = st.radio("Input Method", ["Paste Text", "Upload CSV"])
153
+
154
+ if news_input_method == "Paste Text":
155
+ news_text = st.text_area("Paste News Headline:", height=100)
156
+ if st.button("Analyze News Impact", type="primary"):
157
+ if news_text:
158
+ with st.spinner("Analyzing impact..."):
159
+ prompt = generate_news_prompt(news_text, target_name_news)
160
+ res = llm(prompt)
161
+ output_text = res[0]['generated_text'][-1]['content']
162
+ data = clean_json_output(output_text)
163
+
164
+ if data:
165
+ st.subheader(f"Result: {data.get('label', 'ERROR')}")
166
+ st.write(f"**Reasoning:** {data.get('reasoning', '')}")
167
+ else:
168
+ st.error("Could not parse AI response.")
169
+ st.code(output_text)
170
+
171
+ elif news_input_method == "Upload CSV":
172
+ uploaded_news = st.file_uploader("Upload News CSV", type=["csv"])
173
+ if uploaded_news:
174
+ df_news = pd.read_csv(uploaded_news)
175
+ text_col = st.selectbox("Select Headline Column", df_news.columns)
176
+
177
+ if st.button("Analyze Batch News"):
178
+ results = []
179
+ prog_bar = st.progress(0)
180
+
181
+ for i, row in df_news.iterrows():
182
+ prompt = generate_news_prompt(str(row[text_col]), target_name_news)
183
+ res = llm(prompt)
184
+ data = clean_json_output(res[0]['generated_text'][-1]['content'])
185
+
186
+ results.append({
187
+ "News": row[text_col],
188
+ "Impact": data['label'] if data else "ERROR",
189
+ "Reasoning": data['reasoning'] if data else ""
190
+ })
191
+ prog_bar.progress((i+1)/len(df_news))
192
+
193
+ res_df = pd.DataFrame(results)
194
+ st.dataframe(res_df)
195
+
196
+ # Chart
197
+ fig = px.pie(res_df, names="Impact", title=f"Media Sentiment for {target_name_news}")
198
+ st.plotly_chart(fig)
199
 
200
+ # =======================
201
+ # SECTION 2: COMMENTS
202
+ # =======================
203
+ with tab_comments:
204
+ st.header("Context-Aware Comment Labeling")
205
+ st.info("The AI uses the 'Target Party' to understand slogans like 'Dhaner Sheesh' or 'Nouka'.")
206
+
207
+ # 1. ESTABLISH CONTEXT
208
+ c1, c2 = st.columns(2)
209
+ with c1:
210
+ target_entity_cmt = st.text_input("Target Person (e.g., Khaleda Zia)", "Khaleda Zia")
211
+ with c2:
212
+ party_context = st.selectbox("Political Affiliation (Defines Symbols)", list(POLITICAL_CONTEXT.keys()))
213
+
214
+ # Get keywords based on selection
215
+ selected_keywords = POLITICAL_CONTEXT[party_context]["keywords"]
216
+ selected_rivals = POLITICAL_CONTEXT[party_context]["rival_keywords"]
217
+
218
+ st.caption(f"**AI Context Memory:** Positive Keywords = [{selected_keywords}] | Negative Keywords = [{selected_rivals}]")
219
+
220
+ # 2. INPUT
221
+ uploaded_comments = st.file_uploader("Upload Comments CSV", type=["csv"], key="cmt_up")
222
+
223
+ if uploaded_comments:
224
+ df_cmt = pd.read_csv(uploaded_comments)
225
+ st.write("Preview:", df_cmt.head(3))
226
+ comment_col = st.selectbox("Which column contains the comments?", df_cmt.columns)
227
+
228
+ if st.button("Start Intelligent Labeling", type="primary"):
229
+ final_data = []
230
+ bar = st.progress(0)
231
+
232
+ total = len(df_cmt)
233
+ for idx, row in df_cmt.iterrows():
234
+ txt = str(row[comment_col])
235
+
236
+ # Skip empty or very short comments
237
+ if len(txt) < 3:
238
+ continue
239
+
240
+ prompt = generate_comment_prompt(txt, target_entity_cmt, party_context, selected_keywords, selected_rivals)
241
+
242
+ try:
243
+ out = llm(prompt)
244
+ raw_str = out[0]['generated_text'][-1]['content']
245
+ json_dat = clean_json_output(raw_str)
246
+
247
+ label = json_dat.get("label", "NEUTRAL") if json_dat else "ERROR"
248
+ reason = json_dat.get("reasoning", "Parse Fail") if json_dat else raw_str
249
+
250
+ except Exception as e:
251
+ label = "ERROR"
252
+ reason = str(e)
253
+
254
+ final_data.append({
255
+ "Original Comment": txt,
256
+ "Sentiment": label,
257
+ "Why?": reason
258
+ })
259
+ bar.progress((idx+1)/total)
260
+
261
+ # RESULTS
262
+ res_df_cmt = pd.DataFrame(final_data)
263
+ st.success("Analysis Complete!")
264
+
265
+ # Visualization
266
+ row1, row2 = st.columns([2, 1])
267
+ with row1:
268
+ st.dataframe(res_df_cmt)
269
+ with row2:
270
+ # Custom colors for politics
271
+ color_map = {
272
+ "POSITIVE": "#00CC96", # Green
273
+ "NEGATIVE": "#EF553B", # Red
274
+ "NEUTRAL": "#636EFA", # Blue
275
+ "ERROR": "#000000"
276
+ }
277
+ fig = px.pie(res_df_cmt, names="Sentiment", title="Public Sentiment", color="Sentiment", color_discrete_map=color_map)
278
+ st.plotly_chart(fig)
279
+
280
+ # Download
281
+ csv_dl = res_df_cmt.to_csv(index=False).encode('utf-8')
282
+ st.download_button("Download Labeled Data", csv_dl, "analyzed_comments.csv", "text/csv")