AA_FinalFC / app.py
ahm14's picture
Update app.py
8106444 verified
import streamlit as st
import re
from langdetect import detect
from transformers import pipeline
import nltk
from docx import Document
import io
# Download required NLTK resources
nltk.download('punkt')
# Tone categories
tone_categories = {
"Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
"Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
"Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief", "devastation"],
"Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
"Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
"Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
"Happy": ["joy", "celebration", "cheer", "success", "smile", "gratitude", "harmony"],
"Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
"Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
"Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
"Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
}
# Frame categories
frame_categories = {
"Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
"Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
"Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
"Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
"Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
"Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
"Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
"Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
"Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
"Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
"Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
"Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
"Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
"Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
"Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
}
# Detect language
def detect_language(text):
try:
return detect(text)
except:
return "unknown"
# Extract tone
def extract_tone(text):
detected_tones = []
for category, keywords in tone_categories.items():
if any(keyword in text.lower() for keyword in keywords):
detected_tones.append(category)
return detected_tones if detected_tones else ["Neutral"]
# Categorize frames based on importance
def categorize_frame_importance(text, keywords):
keyword_count = sum(text.lower().count(keyword) for keyword in keywords)
if keyword_count > 3:
return "Major Focus"
elif keyword_count == 2 or keyword_count == 3:
return "Significant Focus"
else:
return "Minor Mention"
# Extract frames with categorization
def extract_frames(text):
detected_frames = {}
for category, keywords in frame_categories.items():
importance = categorize_frame_importance(text, keywords)
if importance != "Minor Mention":
detected_frames[category] = importance
if not detected_frames:
frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
for label in model_result["labels"][:2]: # Top 2 frames
detected_frames[label] = "Significant Focus"
return detected_frames
# Extract hashtags
def extract_hashtags(text):
return re.findall(r"#\w+", text)
# Extract captions from DOCX
def extract_captions_from_docx(docx_file):
doc = Document(docx_file)
captions = {}
current_post = None
for para in doc.paragraphs:
text = para.text.strip()
if re.match(r"Post \d+", text, re.IGNORECASE):
current_post = text
captions[current_post] = []
elif current_post:
captions[current_post].append(text)
return {post: " ".join(lines) for post, lines in captions.items() if lines}
# Generate a DOCX file
def generate_docx(output_data):
doc = Document()
doc.add_heading('Activism Message Analysis', 0)
for index, (caption, result) in enumerate(output_data.items(), start=1):
doc.add_heading(f"{index}. {caption}", level=1)
doc.add_paragraph("Full Caption:")
doc.add_paragraph(result['Full Caption'], style="Quote")
doc.add_paragraph(f"Language: {result['Language']}")
doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
doc.add_heading('Frames:', level=2)
for frame, importance in result['Frames'].items():
doc.add_paragraph(f"{frame}: {importance}")
doc_io = io.BytesIO()
doc.save(doc_io)
doc_io.seek(0)
return doc_io
# Streamlit UI
st.title('AI-Powered Activism Message Analyzer with Tone & Frame Categorization')
st.write("Enter text or upload a DOCX file for analysis:")
# Text input
input_text = st.text_area("Input Text", height=200)
# File upload
uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
output_data = {}
if input_text:
output_data["Manual Input"] = {
'Full Caption': input_text,
'Language': detect_language(input_text),
'Tone of Caption': extract_tone(input_text),
'Hashtags': extract_hashtags(input_text),
'Hashtag Count': len(extract_hashtags(input_text)),
'Frames': extract_frames(input_text)
}
st.success("Text analysis completed.")
if uploaded_file:
captions = extract_captions_from_docx(uploaded_file)
for caption, text in captions.items():
output_data[caption] = {
'Full Caption': text,
'Language': detect_language(text),
'Tone of Caption': extract_tone(text),
'Hashtags': extract_hashtags(text),
'Hashtag Count': len(extract_hashtags(text)),
'Frames': extract_frames(text)
}
st.success("DOCX file analysis completed.")
if output_data:
docx_file = generate_docx(output_data)
st.download_button("Download Analysis as DOCX", data=docx_file, file_name="analysis.docx")