| | import streamlit as st |
| | import re |
| | from langdetect import detect |
| | from transformers import pipeline |
| | import nltk |
| | from nltk.tokenize import word_tokenize |
| | from nltk.stem import WordNetLemmatizer |
| | from docx import Document |
| | import io |
| |
|
| | |
| | nltk.download('punkt') |
| | nltk.download('wordnet') |
| |
|
| | |
| | lemmatizer = WordNetLemmatizer() |
| |
|
| | |
| | @st.cache_resource |
| | def load_pipeline(): |
| | return pipeline("zero-shot-classification", model="facebook/bart-large-mnli") |
| |
|
| | tone_model = load_pipeline() |
| | frame_model = load_pipeline() |
| |
|
| | |
| | tone_categories = { |
| | "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis"], |
| | "Critical": ["corrupt", "oppression", "failure", "repression", "unjust"], |
| | "Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief"], |
| | "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change"], |
| | "Informative": ["announcement", "event", "scheduled", "update", "details"], |
| | "Positive": ["progress", "unity", "hope", "victory", "solidarity"], |
| | "Urgent": ["urgent", "violence", "disappearances", "forced", "killing", "concern", "crisis"], |
| | "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust"], |
| | "Negative": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief"], |
| | "Empowering": ["rise", "resist", "mobilize", "inspire", "courage", "change"], |
| | "Neutral": ["announcement", "event", "scheduled", "update", "details", "protest on"], |
| | "Hopeful": ["progress", "unity", "hope", "victory", "together", "solidarity"] |
| | } |
| |
|
| | |
| | frame_categories = { |
| | "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"], |
| | "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"], |
| | "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"], |
| | "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"], |
| | "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"], |
| | "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"], |
| | "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"], |
| | "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"], |
| | "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"], |
| | "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"], |
| | "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"], |
| | "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"], |
| | "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"], |
| | "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"], |
| | "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"] |
| | } |
| |
|
| | |
| | def detect_language(text): |
| | try: |
| | return detect(text) |
| | except Exception: |
| | return "unknown" |
| |
|
| | |
| | def contains_keywords(text, keywords): |
| | words = word_tokenize(text.lower()) |
| | lemmatized_words = [lemmatizer.lemmatize(word) for word in words] |
| | return any(keyword in lemmatized_words for keyword in keywords) |
| |
|
| | |
| | def analyze_tone(text): |
| | detected_tones = set() |
| | for category, keywords in tone_categories.items(): |
| | if contains_keywords(text, keywords): |
| | detected_tones.add(category) |
| |
|
| | if not detected_tones: |
| | model_result = tone_model(text, candidate_labels=list(tone_categories.keys())) |
| | detected_tones.update(model_result["labels"][:2]) |
| |
|
| | return list(detected_tones) |
| |
|
| | |
| | def extract_frames(text): |
| | detected_frames = set() |
| | for category, keywords in frame_categories.items(): |
| | if contains_keywords(text, keywords): |
| | detected_frames.add(category) |
| |
|
| | if not detected_frames: |
| | model_result = frame_model(text, candidate_labels=list(frame_categories.keys())) |
| | detected_frames.update(model_result["labels"][:4]) |
| |
|
| | return list(detected_frames)[:4] |
| |
|
| | |
| | def extract_hashtags(text): |
| | return re.findall(r"#\w+", text) |
| |
|
| | |
| | def extract_captions_from_docx(docx_file): |
| | doc = Document(docx_file) |
| | captions = {} |
| | current_post = None |
| | for para in doc.paragraphs: |
| | text = para.text.strip() |
| | if re.match(r"Post \d+", text, re.IGNORECASE): |
| | current_post = text |
| | captions[current_post] = [] |
| | elif current_post: |
| | captions[current_post].append(text) |
| |
|
| | return {post: " ".join(lines) for post, lines in captions.items() if lines} |
| |
|
| | |
| | def generate_docx(output_data): |
| | doc = Document() |
| | doc.add_heading('Activism Message Analysis', 0) |
| |
|
| | for index, (caption, result) in enumerate(output_data.items(), start=1): |
| | doc.add_heading(f"{index}. {caption}", level=1) |
| | doc.add_paragraph("Full Caption:") |
| | doc.add_paragraph(result['Full Caption'], style="Quote") |
| |
|
| | doc.add_paragraph(f"Language: {result['Language']}") |
| | doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}") |
| | doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}") |
| | doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}") |
| |
|
| | doc.add_heading('Frames:', level=2) |
| | for frame in result['Frames']: |
| | doc.add_paragraph(frame) |
| |
|
| | doc_io = io.BytesIO() |
| | doc.save(doc_io) |
| | doc_io.seek(0) |
| |
|
| | return doc_io |
| |
|
| | |
| | st.title('AI-Powered Activism Message Analyzer') |
| |
|
| | st.write("Enter the text to analyze or upload a DOCX file containing captions:") |
| |
|
| | |
| | input_text = st.text_area("Input Text", height=200) |
| |
|
| | |
| | uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"]) |
| |
|
| | |
| | output_data = {} |
| |
|
| | if input_text: |
| | language = detect_language(input_text) |
| | tone = analyze_tone(input_text) |
| | hashtags = extract_hashtags(input_text) |
| | frames = extract_frames(input_text) |
| |
|
| | output_data["Manual Input"] = { |
| | 'Full Caption': input_text, |
| | 'Language': language, |
| | 'Tone of Caption': tone, |
| | 'Hashtags': hashtags, |
| | 'Hashtag Count': len(hashtags), |
| | 'Frames': frames |
| | } |
| |
|
| | st.success("Analysis completed for text input.") |
| |
|
| | if uploaded_file: |
| | captions = extract_captions_from_docx(uploaded_file) |
| | for caption, text in captions.items(): |
| | language = detect_language(text) |
| | tone = analyze_tone(text) |
| | hashtags = extract_hashtags(text) |
| | frames = extract_frames(text) |
| |
|
| | output_data[caption] = { |
| | 'Full Caption': text, |
| | 'Language': language, |
| | 'Tone of Caption': tone, |
| | 'Hashtags': hashtags, |
| | 'Hashtag Count': len(hashtags), |
| | 'Frames': frames |
| | } |
| |
|
| | st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.") |
| |
|
| | |
| | if output_data: |
| | with st.expander("Generated Output"): |
| | st.subheader("Analysis Results") |
| | for index, (caption, result) in enumerate(output_data.items(), start=1): |
| | st.write(f"### {index}. {caption}") |
| | st.write("**Full Caption:**") |
| | st.write(f"> {result['Full Caption']}") |
| | st.write(f"**Language**: {result['Language']}") |
| | st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}") |
| | st.write(f"**Number of Hashtags**: {result['Hashtag Count']}") |
| | st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}") |
| | st.write("**Frames**:") |
| | for frame in result['Frames']: |
| | st.write(f"- {frame}") |
| |
|
| | docx_file = generate_docx(output_data) |
| |
|
| | if docx_file: |
| | st.download_button( |
| | label="Download Analysis as DOCX", |
| | data=docx_file, |
| | file_name="activism_message_analysis.docx", |
| | mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" |
| | ) |
| |
|