Spaces:

Arjon07CSE
/

spf_sentiment

Sleeping

App Files Files Community

spf_sentiment / src /streamlit_app.py

Arjon07CSE

Update src/streamlit_app.py

33c09e3 verified 21 days ago

raw

history blame contribute delete

10.1 kB

	import streamlit as st
	import pandas as pd
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	import json
	import plotly.express as px
	import re

	# --- CONFIG & SETUP ---
	st.set_page_config(
	page_title="BD Political Sentinel AI",
	page_icon="🇧🇩",
	layout="wide"
	)

	# --- ADVANCED KEYWORD DATABASE (Tuned for your CSV Data) ---
	POLITICAL_CONTEXT = {
	"BNP": {
	"keywords": "ধানের শীষ, জিন্দাবাদ, জিয়ার সৈনিক, দেশনেত্রী, তারেক, Sheaf of Paddy, BNP, 71 chetona",
	"rival_keywords": "নৌকা, ভোট চোর, হাসিনা, লীগ, চাঁদাবাজ, চান্দা, দুর্নীতি, terrorist, arson"
	},
	"Awami League": {
	"keywords": "নৌকা, জয় বাংলা, মুজিব, হাসিনা, শেখের বেটি, Boat, development, 71 er chetona",
	"rival_keywords": "ধানের শীষ, চোর, বিএনপি, জামায়াত, rajakar, killer, dictator, fascist"
	},
	"Jamaat-e-Islami": {
	"keywords": "দাড়িপাল্লা, আল্লাহ, নারায়ে তাকবির, দ্বীন, ইসলাম, Mamunul, Jammat, Shibir, Islamic",
	"rival_keywords": "নাস্তিক, লীগ, শাহবাগ, rajakar, war criminal, terrorist, jongi"
	},
	"General/Interim Govt": {
	"keywords": "ইউনূস, ছাত্র সমাজ, সংস্কার, জেনারেশন জেড, ইনসাফ, Yunus, Student Power",
	"rival_keywords": "স্বৈরাচার, ফ্যাসিস্ট, হাসিনা, anarchy, instability"
	}
	}

	# --- MODEL LOADER ---
	@st.cache_resource
	def load_model():
	# Using the Llama-3.2-3B model which fits on Free Tier (CPU) or GPU
	model_id = "hishab/titulm-llama-3.2-3b-v2.0"
	try:
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	# Auto-detect device: use float32 for CPU stability, float16 for GPU speed
	dtype = torch.float16 if torch.cuda.is_available() else torch.float32

	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=dtype,
	device_map="auto"
	)

	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=150,
	do_sample=True,
	temperature=0.2, # Low temp = Logic focused
	top_p=0.9
	)
	return pipe
	except Exception as e:
	return None

	# Sidebar Status
	with st.sidebar:
	st.title("⚙️ System Status")
	if torch.cuda.is_available():
	st.success("🟢 GPU Active (Fast Mode)")
	else:
	st.warning("🟠 CPU Mode (Standard Speed)")

	with st.spinner("Initializing AI Engine..."):
	llm = load_model()

	if not llm:
	st.error("❌ Model Failed to Load. Check HuggingFace Logs.")
	st.stop()
	else:
	st.success("✅ AI Brain Ready")

	# --- HELPER FUNCTIONS ---
	def clean_json_output(text):
	"""Robustly extract JSON from the LLM's chatter."""
	try:
	# Find the last JSON-like structure
	matches = re.findall(r'\{.*?\}', text, re.DOTALL)
	if matches:
	return json.loads(matches[-1])
	return None
	except:
	return None

	def generate_comment_prompt(comment_text, target, party, keywords, rival_keywords):
	return [
	{"role": "system", "content": f"""You are an Expert Bangla Sentiment Analyzer.
	Task: Analyze the sentiment of the comment TOWARDS the target: {target} ({party}).

	CRITICAL RULES:
	1. Support for {party} or '{keywords}' = POSITIVE.
	2. Attacks on {party}, calling them '{rival_keywords}' = NEGATIVE.
	3. Support for RIVAL parties = NEGATIVE.
	4. Mixed: "Hate X, Love {party}" = POSITIVE. "Love X, Hate {party}" = NEGATIVE.

	Examples:
	- Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: BNP) -> POSITIVE (Loves BNP)
	- Input: "Jammat shibir boycott ❌ Bnp 🥰" (Target: Jamaat) -> NEGATIVE (Hates Jamaat)
	- Input: "Chadabaz BNP" (Target: BNP) -> NEGATIVE

	Response Format: JSON only -> {{"label": "POSITIVE"\|"NEGATIVE"\|"NEUTRAL", "reasoning": "Short explanation"}}
	"""},
	{"role": "user", "content": f"Comment: {comment_text}"}
	]

	# --- MAIN UI ---
	st.title("🇧🇩 Smart Political Sentiment Analyzer")
	st.markdown("Context-Aware Analysis for Bangla & Banglish Comments")

	# 1. SETUP CONTEXT
	st.subheader("1. Analysis Configuration")
	col1, col2 = st.columns(2)
	with col1:
	target_entity = st.text_input("Target Candidate/Party Name", "BNP")
	with col2:
	party_context = st.selectbox("Political Affiliation (Logic Mapping)", list(POLITICAL_CONTEXT.keys()))

	selected_keywords = POLITICAL_CONTEXT[party_context]["keywords"]
	selected_rivals = POLITICAL_CONTEXT[party_context]["rival_keywords"]

	st.info(f"AI Logic: Detecting Support for {target_entity} using keywords: [{selected_keywords}] and flagging attacks like: [{selected_rivals}]")

	# 2. UPLOAD DATA
	st.subheader("2. Upload Data")
	uploaded_file = st.file_uploader("Upload CSV File (Must have 'Comment' column)", type=["csv"])

	if uploaded_file:
	try:
	df = pd.read_csv(uploaded_file)
	st.success(f"Loaded {len(df)} comments successfully!")

	# Data Cleanup & Preview
	st.dataframe(df.head(3))

	# Column Auto-Detection
	cols = df.columns.tolist()
	comment_col = next((c for c in cols if 'comment' in c.lower()), cols[0])
	date_col = next((c for c in cols if 'date' in c.lower()), None)

	col_sel1, col_sel2 = st.columns(2)
	with col_sel1:
	comment_col = st.selectbox("Select Comment Column", cols, index=cols.index(comment_col))
	with col_sel2:
	if date_col:
	date_col = st.selectbox("Select Date Column (Optional)", cols, index=cols.index(date_col))
	else:
	st.write("No Date column detected.")

	# 3. RUN ANALYSIS
	if st.button("🚀 Start AI Analysis", type="primary"):
	results = []
	progress_bar = st.progress(0)
	status_text = st.empty()

	total = len(df)

	for i, row in df.iterrows():
	text = str(row[comment_col])

	# Basic filtering
	if len(text) < 2 or text.lower() == "nan":
	continue

	# Construct Prompt
	prompt = generate_comment_prompt(text, target_entity, party_context, selected_keywords, selected_rivals)

	# Run Inference
	try:
	out = llm(prompt)
	raw_res = out[0]['generated_text'][-1]['content']
	data = clean_json_output(raw_res)

	label = data.get("label", "NEUTRAL") if data else "ERROR"
	reason = data.get("reasoning", "Parse Error") if data else raw_res
	except Exception as e:
	label = "ERROR"
	reason = str(e)

	# Store Result
	results.append({
	"Date": row[date_col] if date_col else None,
	"Comment": text,
	"Sentiment": label,
	"Reasoning": reason
	})

	# Update UI
	progress_bar.progress((i + 1) / total)
	status_text.text(f"Processing {i+1}/{total}: {label}")

	# 4. VISUALIZATION
	res_df = pd.DataFrame(results)
	st.divider()
	st.header("📊 Analysis Results")

	# Layout: Pie Chart + Time Series
	row1_1, row1_2 = st.columns([1, 2])

	with row1_1:
	color_map = {"POSITIVE": "#00CC96", "NEGATIVE": "#EF553B", "NEUTRAL": "#636EFA", "ERROR": "grey"}
	fig_pie = px.pie(res_df, names="Sentiment", title="Overall Sentiment", color="Sentiment", color_discrete_map=color_map)
	st.plotly_chart(fig_pie, use_container_width=True)

	# Sentiment Score Calculation
	pos_count = len(res_df[res_df['Sentiment']=='POSITIVE'])
	neg_count = len(res_df[res_df['Sentiment']=='NEGATIVE'])
	total_valid = pos_count + neg_count + 1 # avoid div/0
	favourability = (pos_count / total_valid) * 100
	st.metric("Favourability Score", f"{favourability:.1f}%")

	with row1_2:
	if date_col:
	try:
	# Convert Date and Aggregate
	res_df['Date'] = pd.to_datetime(res_df['Date'], errors='coerce')
	time_df = res_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().reset_index(name='Count')

	fig_line = px.line(time_df, x='Date', y='Count', color='Sentiment',
	title="Sentiment Trends Over Time",
	color_discrete_map=color_map, markers=True)
	st.plotly_chart(fig_line, use_container_width=True)
	except Exception as e:
	st.warning("Could not create timeline chart (Date format issue).")

	# Data Table & Download
	st.dataframe(res_df)
	csv = res_df.to_csv(index=False).encode('utf-8')
	st.download_button("📥 Download Analysis Report", csv, "political_sentiment_report.csv", "text/csv")

	except Exception as e:
	st.error(f"Error reading CSV: {e}")