File size: 7,152 Bytes
706fc89
 
 
 
 
 
 
 
 
 
 
773ca30
706fc89
5c3fa48
 
 
 
 
 
 
 
 
 
 
706fc89
 
773ca30
706fc89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
773ca30
706fc89
 
773ca30
 
 
706fc89
773ca30
 
 
 
 
 
 
8106444
773ca30
8106444
773ca30
 
 
 
 
706fc89
773ca30
706fc89
773ca30
 
 
706fc89
 
 
 
773ca30
 
706fc89
773ca30
 
 
 
 
706fc89
773ca30
706fc89
 
 
 
 
 
 
 
 
 
 
 
 
 
773ca30
706fc89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
773ca30
 
706fc89
 
 
 
 
 
773ca30
 
706fc89
773ca30
706fc89
773ca30
706fc89
 
773ca30
706fc89
 
 
 
 
 
 
773ca30
 
 
 
 
706fc89
773ca30
706fc89
 
 
 
 
 
773ca30
 
 
 
 
706fc89
773ca30
706fc89
 
 
773ca30
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import streamlit as st
import re
from langdetect import detect
from transformers import pipeline
import nltk
from docx import Document
import io

# Download required NLTK resources
nltk.download('punkt')

# Tone categories
tone_categories = {
    "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
    "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
    "Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief", "devastation"],
    "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
    "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
    "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
    "Happy": ["joy", "celebration", "cheer", "success", "smile", "gratitude", "harmony"],
    "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
    "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
    "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
    "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
}

# Frame categories
frame_categories = {
    "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
    "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
    "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
    "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
    "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
    "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
    "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
    "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
    "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
    "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
    "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
    "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
    "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
    "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
    "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
}

# Detect language
def detect_language(text):
    try:
        return detect(text)
    except:
        return "unknown"

# Extract tone
def extract_tone(text):
    detected_tones = []
    for category, keywords in tone_categories.items():
        if any(keyword in text.lower() for keyword in keywords):
            detected_tones.append(category)
    return detected_tones if detected_tones else ["Neutral"]

# Categorize frames based on importance
def categorize_frame_importance(text, keywords):
    keyword_count = sum(text.lower().count(keyword) for keyword in keywords)
    if keyword_count > 3:
        return "Major Focus"
    elif keyword_count == 2 or keyword_count == 3:
        return "Significant Focus"
    else:
        return "Minor Mention"

# Extract frames with categorization
def extract_frames(text):
    detected_frames = {}
    for category, keywords in frame_categories.items():
        importance = categorize_frame_importance(text, keywords)
        if importance != "Minor Mention":
            detected_frames[category] = importance

    if not detected_frames:
        frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
        model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
        for label in model_result["labels"][:2]:  # Top 2 frames
            detected_frames[label] = "Significant Focus"  

    return detected_frames

# Extract hashtags
def extract_hashtags(text):
    return re.findall(r"#\w+", text)

# Extract captions from DOCX
def extract_captions_from_docx(docx_file):
    doc = Document(docx_file)
    captions = {}
    current_post = None
    for para in doc.paragraphs:
        text = para.text.strip()
        if re.match(r"Post \d+", text, re.IGNORECASE):
            current_post = text
            captions[current_post] = []
        elif current_post:
            captions[current_post].append(text)

    return {post: " ".join(lines) for post, lines in captions.items() if lines}

# Generate a DOCX file
def generate_docx(output_data):
    doc = Document()
    doc.add_heading('Activism Message Analysis', 0)

    for index, (caption, result) in enumerate(output_data.items(), start=1):
        doc.add_heading(f"{index}. {caption}", level=1)
        doc.add_paragraph("Full Caption:")
        doc.add_paragraph(result['Full Caption'], style="Quote")

        doc.add_paragraph(f"Language: {result['Language']}")
        doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
        doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
        doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")

        doc.add_heading('Frames:', level=2)
        for frame, importance in result['Frames'].items():
            doc.add_paragraph(f"{frame}: {importance}")

    doc_io = io.BytesIO()
    doc.save(doc_io)
    doc_io.seek(0)
    return doc_io

# Streamlit UI
st.title('AI-Powered Activism Message Analyzer with Tone & Frame Categorization')

st.write("Enter text or upload a DOCX file for analysis:")

# Text input
input_text = st.text_area("Input Text", height=200)

# File upload
uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])

output_data = {}

if input_text:
    output_data["Manual Input"] = {
        'Full Caption': input_text,
        'Language': detect_language(input_text),
        'Tone of Caption': extract_tone(input_text),
        'Hashtags': extract_hashtags(input_text),
        'Hashtag Count': len(extract_hashtags(input_text)),
        'Frames': extract_frames(input_text)
    }
    st.success("Text analysis completed.")

if uploaded_file:
    captions = extract_captions_from_docx(uploaded_file)
    for caption, text in captions.items():
        output_data[caption] = {
            'Full Caption': text,
            'Language': detect_language(text),
            'Tone of Caption': extract_tone(text),
            'Hashtags': extract_hashtags(text),
            'Hashtag Count': len(extract_hashtags(text)),
            'Frames': extract_frames(text)
        }
    st.success("DOCX file analysis completed.")

if output_data:
    docx_file = generate_docx(output_data)
    st.download_button("Download Analysis as DOCX", data=docx_file, file_name="analysis.docx")