Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import re | |
| from langdetect import detect | |
| from transformers import pipeline | |
| import nltk | |
| from docx import Document | |
| import io | |
| # Download required NLTK resources | |
| nltk.download('punkt') | |
| # Tone categories | |
| tone_categories = { | |
| "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"], | |
| "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"], | |
| "Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief", "devastation"], | |
| "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"], | |
| "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"], | |
| "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"], | |
| "Happy": ["joy", "celebration", "cheer", "success", "smile", "gratitude", "harmony"], | |
| "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"], | |
| "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"], | |
| "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"], | |
| "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"] | |
| } | |
| # Frame categories | |
| frame_categories = { | |
| "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"], | |
| "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"], | |
| "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"], | |
| "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"], | |
| "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"], | |
| "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"], | |
| "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"], | |
| "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"], | |
| "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"], | |
| "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"], | |
| "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"], | |
| "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"], | |
| "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"], | |
| "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"], | |
| "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"] | |
| } | |
| # Detect language | |
| def detect_language(text): | |
| try: | |
| return detect(text) | |
| except: | |
| return "unknown" | |
| # Extract tone | |
| def extract_tone(text): | |
| detected_tones = [] | |
| for category, keywords in tone_categories.items(): | |
| if any(keyword in text.lower() for keyword in keywords): | |
| detected_tones.append(category) | |
| return detected_tones if detected_tones else ["Neutral"] | |
| # Categorize frames based on importance | |
| def categorize_frame_importance(text, keywords): | |
| keyword_count = sum(text.lower().count(keyword) for keyword in keywords) | |
| if keyword_count > 3: | |
| return "Major Focus" | |
| elif keyword_count == 2 or keyword_count == 3: | |
| return "Significant Focus" | |
| else: | |
| return "Minor Mention" | |
| # Extract frames with categorization | |
| def extract_frames(text): | |
| detected_frames = {} | |
| for category, keywords in frame_categories.items(): | |
| importance = categorize_frame_importance(text, keywords) | |
| if importance != "Minor Mention": | |
| detected_frames[category] = importance | |
| if not detected_frames: | |
| frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
| model_result = frame_model(text, candidate_labels=list(frame_categories.keys())) | |
| for label in model_result["labels"][:2]: # Top 2 frames | |
| detected_frames[label] = "Significant Focus" | |
| return detected_frames | |
| # Extract hashtags | |
| def extract_hashtags(text): | |
| return re.findall(r"#\w+", text) | |
| # Extract captions from DOCX | |
| def extract_captions_from_docx(docx_file): | |
| doc = Document(docx_file) | |
| captions = {} | |
| current_post = None | |
| for para in doc.paragraphs: | |
| text = para.text.strip() | |
| if re.match(r"Post \d+", text, re.IGNORECASE): | |
| current_post = text | |
| captions[current_post] = [] | |
| elif current_post: | |
| captions[current_post].append(text) | |
| return {post: " ".join(lines) for post, lines in captions.items() if lines} | |
| # Generate a DOCX file | |
| def generate_docx(output_data): | |
| doc = Document() | |
| doc.add_heading('Activism Message Analysis', 0) | |
| for index, (caption, result) in enumerate(output_data.items(), start=1): | |
| doc.add_heading(f"{index}. {caption}", level=1) | |
| doc.add_paragraph("Full Caption:") | |
| doc.add_paragraph(result['Full Caption'], style="Quote") | |
| doc.add_paragraph(f"Language: {result['Language']}") | |
| doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}") | |
| doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}") | |
| doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}") | |
| doc.add_heading('Frames:', level=2) | |
| for frame, importance in result['Frames'].items(): | |
| doc.add_paragraph(f"{frame}: {importance}") | |
| doc_io = io.BytesIO() | |
| doc.save(doc_io) | |
| doc_io.seek(0) | |
| return doc_io | |
| # Streamlit UI | |
| st.title('AI-Powered Activism Message Analyzer with Tone & Frame Categorization') | |
| st.write("Enter text or upload a DOCX file for analysis:") | |
| # Text input | |
| input_text = st.text_area("Input Text", height=200) | |
| # File upload | |
| uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"]) | |
| output_data = {} | |
| if input_text: | |
| output_data["Manual Input"] = { | |
| 'Full Caption': input_text, | |
| 'Language': detect_language(input_text), | |
| 'Tone of Caption': extract_tone(input_text), | |
| 'Hashtags': extract_hashtags(input_text), | |
| 'Hashtag Count': len(extract_hashtags(input_text)), | |
| 'Frames': extract_frames(input_text) | |
| } | |
| st.success("Text analysis completed.") | |
| if uploaded_file: | |
| captions = extract_captions_from_docx(uploaded_file) | |
| for caption, text in captions.items(): | |
| output_data[caption] = { | |
| 'Full Caption': text, | |
| 'Language': detect_language(text), | |
| 'Tone of Caption': extract_tone(text), | |
| 'Hashtags': extract_hashtags(text), | |
| 'Hashtag Count': len(extract_hashtags(text)), | |
| 'Frames': extract_frames(text) | |
| } | |
| st.success("DOCX file analysis completed.") | |
| if output_data: | |
| docx_file = generate_docx(output_data) | |
| st.download_button("Download Analysis as DOCX", data=docx_file, file_name="analysis.docx") | |