import streamlit as st import re from langdetect import detect from transformers import pipeline import nltk from docx import Document import io # Download required NLTK resources nltk.download('punkt') # Tone categories tone_categories = { "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"], "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"], "Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief", "devastation"], "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"], "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"], "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"], "Happy": ["joy", "celebration", "cheer", "success", "smile", "gratitude", "harmony"], "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"], "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"], "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"], "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"] } # Frame categories frame_categories = { "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"], "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"], "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"], "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"], "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"], "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"], "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"], "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"], "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"], "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"], "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"], "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"], "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"], "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"], "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"] } # Detect language def detect_language(text): try: return detect(text) except: return "unknown" # Extract tone def extract_tone(text): detected_tones = [] for category, keywords in tone_categories.items(): if any(keyword in text.lower() for keyword in keywords): detected_tones.append(category) return detected_tones if detected_tones else ["Neutral"] # Categorize frames based on importance def categorize_frame_importance(text, keywords): keyword_count = sum(text.lower().count(keyword) for keyword in keywords) if keyword_count > 3: return "Major Focus" elif keyword_count == 2 or keyword_count == 3: return "Significant Focus" else: return "Minor Mention" # Extract frames with categorization def extract_frames(text): detected_frames = {} for category, keywords in frame_categories.items(): importance = categorize_frame_importance(text, keywords) if importance != "Minor Mention": detected_frames[category] = importance if not detected_frames: frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") model_result = frame_model(text, candidate_labels=list(frame_categories.keys())) for label in model_result["labels"][:2]: # Top 2 frames detected_frames[label] = "Significant Focus" return detected_frames # Extract hashtags def extract_hashtags(text): return re.findall(r"#\w+", text) # Extract captions from DOCX def extract_captions_from_docx(docx_file): doc = Document(docx_file) captions = {} current_post = None for para in doc.paragraphs: text = para.text.strip() if re.match(r"Post \d+", text, re.IGNORECASE): current_post = text captions[current_post] = [] elif current_post: captions[current_post].append(text) return {post: " ".join(lines) for post, lines in captions.items() if lines} # Generate a DOCX file def generate_docx(output_data): doc = Document() doc.add_heading('Activism Message Analysis', 0) for index, (caption, result) in enumerate(output_data.items(), start=1): doc.add_heading(f"{index}. {caption}", level=1) doc.add_paragraph("Full Caption:") doc.add_paragraph(result['Full Caption'], style="Quote") doc.add_paragraph(f"Language: {result['Language']}") doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}") doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}") doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}") doc.add_heading('Frames:', level=2) for frame, importance in result['Frames'].items(): doc.add_paragraph(f"{frame}: {importance}") doc_io = io.BytesIO() doc.save(doc_io) doc_io.seek(0) return doc_io # Streamlit UI st.title('AI-Powered Activism Message Analyzer with Tone & Frame Categorization') st.write("Enter text or upload a DOCX file for analysis:") # Text input input_text = st.text_area("Input Text", height=200) # File upload uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"]) output_data = {} if input_text: output_data["Manual Input"] = { 'Full Caption': input_text, 'Language': detect_language(input_text), 'Tone of Caption': extract_tone(input_text), 'Hashtags': extract_hashtags(input_text), 'Hashtag Count': len(extract_hashtags(input_text)), 'Frames': extract_frames(input_text) } st.success("Text analysis completed.") if uploaded_file: captions = extract_captions_from_docx(uploaded_file) for caption, text in captions.items(): output_data[caption] = { 'Full Caption': text, 'Language': detect_language(text), 'Tone of Caption': extract_tone(text), 'Hashtags': extract_hashtags(text), 'Hashtag Count': len(extract_hashtags(text)), 'Frames': extract_frames(text) } st.success("DOCX file analysis completed.") if output_data: docx_file = generate_docx(output_data) st.download_button("Download Analysis as DOCX", data=docx_file, file_name="analysis.docx")