ahm14 commited on
Commit
defcae2
·
verified ·
1 Parent(s): 34cf74e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -109
app.py CHANGED
@@ -9,21 +9,27 @@ import io
9
  # Download required NLTK resources
10
  nltk.download('punkt')
11
 
12
- # Predefined tone categories
13
  tone_categories = {
14
- "Activism and Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign"],
15
- "Solidarity and Support": ["stand with", "support", "unite", "together", "solidarity"],
16
- "Critical and Urgent": ["shame", "oppression", "violence", "urgent", "repress"],
17
- "Empowerment and Resistance": ["empower", "resist", "challenge", "freedom", "independent"]
 
 
 
18
  }
19
 
20
- # Predefined frame categories
21
  frame_categories = {
22
- "Systemic Oppression": ["patriarchy", "repression", "violence", "oppression", "honor killing"],
23
- "Climate Justice": ["climate", "environment", "biodiversity", "mining", "farmers"],
24
- "Human Rights Advocacy": ["safety", "education", "freedom", "law reform", "rights"],
25
- "Call to Action": ["march", "protest", "mobilize", "join us", "rally"],
26
- "Empowerment and Resistance": ["women's rights", "aurat march", "feminism", "power"]
 
 
 
27
  }
28
 
29
  # Detect language
@@ -36,135 +42,148 @@ def detect_language(text):
36
 
37
  # Analyze tone based on predefined categories
38
  def analyze_tone(text):
39
- try:
40
- tone_model = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
41
- model_result = tone_model(text)[0]['label'].lower()
42
-
43
- # Match with predefined tone categories
44
- detected_tones = set()
45
- for category, keywords in tone_categories.items():
46
- if any(word in text.lower() for word in keywords):
47
- detected_tones.add(category)
48
 
49
- if not detected_tones:
50
- detected_tones.add(model_result.capitalize()) # Fallback to AI-predicted label
 
 
51
 
52
- return list(detected_tones)
53
- except Exception as e:
54
- st.write(f"Error analyzing tone: {e}")
55
- return ["Error"]
56
 
57
  # Extract hashtags
58
  def extract_hashtags(text):
59
- try:
60
- return re.findall(r"#\w+", text)
61
- except Exception as e:
62
- st.write(f"Error extracting hashtags: {e}")
63
- return []
64
 
65
  # Extract frames based on predefined categories
66
  def extract_frames(text):
67
- try:
 
 
 
 
 
68
  frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
69
  model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
70
-
71
- # Match with predefined frame categories
72
- detected_frames = set()
73
- for category, keywords in frame_categories.items():
74
- if any(word in text.lower() for word in keywords):
75
- detected_frames.add(category)
76
-
77
- # Combine with AI model predictions
78
- detected_frames.update(model_result["labels"][:2]) # Take top 2 predictions
79
-
80
- return list(detected_frames)
81
- except Exception as e:
82
- st.write(f"Error extracting frames: {e}")
83
- return []
84
-
85
- # Generate a DOCX file in-memory
86
- def generate_docx(output):
87
- try:
88
- doc = Document()
89
- doc.add_heading('Activism Message Analysis', 0)
90
-
91
- doc.add_heading('Generated Output:', level=1)
92
- doc.add_paragraph(f"Language: {output['Language']}")
93
- doc.add_paragraph(f"Tone of Caption: {', '.join(output['Tone of Caption'])}")
94
- doc.add_paragraph(f"Number of Hashtags: {output['Hashtag Count']}")
95
- doc.add_paragraph(f"Hashtags Found: {', '.join(output['Hashtags'])}")
 
 
 
 
 
 
 
96
 
97
  doc.add_heading('Frames:', level=2)
98
- for frame in output['Frames']:
99
  doc.add_paragraph(frame)
100
 
101
- # Save the document in-memory
102
- doc_io = io.BytesIO()
103
- doc.save(doc_io)
104
- doc_io.seek(0)
105
 
106
- return doc_io
107
- except Exception as e:
108
- st.write(f"Error generating DOCX file: {e}")
109
- return None
110
 
111
  # Streamlit app
112
  st.title('AI-Powered Activism Message Analyzer with Intersectionality')
113
 
114
- st.write("Enter the text to analyze and generate output:")
115
 
116
- # Input box for user to paste their text
117
  input_text = st.text_area("Input Text", height=200)
118
 
119
- if input_text:
120
- try:
121
- # Detect language
122
- language = detect_language(input_text)
123
 
124
- # Analyze tone
125
- tone = analyze_tone(input_text)
126
 
127
- # Extract hashtags
128
- hashtags = extract_hashtags(input_text)
129
- hashtag_count = len(hashtags)
130
-
131
- # Extract frames
132
- frames = extract_frames(input_text)
133
-
134
- # Prepare output
135
- output = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  'Language': language,
137
  'Tone of Caption': tone,
138
  'Hashtags': hashtags,
139
- 'Hashtag Count': hashtag_count,
140
  'Frames': frames
141
  }
142
 
143
- # Display results
144
- with st.expander("Generated Output"):
145
- st.subheader("Analysis Result")
146
- st.write(f"**Language**: {output['Language']}")
147
- st.write(f"**Tone of Caption**: {', '.join(output['Tone of Caption'])}")
148
- st.write(f"**Number of Hashtags**: {output['Hashtag Count']}")
149
- st.write(f"**Hashtags Found:**")
150
- for hashtag in output['Hashtags']:
151
- st.write(f"- {hashtag}")
 
 
 
 
 
152
  st.write("**Frames**:")
153
- for frame in output['Frames']:
154
  st.write(f"- {frame}")
155
 
156
- # Generate docx file
157
- docx_file = generate_docx(output)
158
 
159
- if docx_file:
160
- st.download_button(
161
- label="Download Analysis as DOCX",
162
- data=docx_file,
163
- file_name="activism_message_analysis.docx",
164
- mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
165
- )
166
-
167
- except Exception as e:
168
- st.write(f"Error during analysis: {e}")
169
- else:
170
- st.error("Please enter some text to analyze.")
 
9
  # Download required NLTK resources
10
  nltk.download('punkt')
11
 
12
+ # Updated tone categories
13
  tone_categories = {
14
+ "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
15
+ "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
16
+ "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
17
+ "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
18
+ "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
19
+ "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
20
+ "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
21
  }
22
 
23
+ # Updated frame categories
24
  frame_categories = {
25
+ "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
26
+ "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
27
+ "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
28
+ "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
29
+ "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
30
+ "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
31
+ "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
32
+ "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"]
33
  }
34
 
35
  # Detect language
 
42
 
43
  # Analyze tone based on predefined categories
44
  def analyze_tone(text):
45
+ detected_tones = set()
46
+ for category, keywords in tone_categories.items():
47
+ if any(word in text.lower() for word in keywords):
48
+ detected_tones.add(category)
 
 
 
 
 
49
 
50
+ if not detected_tones:
51
+ tone_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
52
+ model_result = tone_model(text, candidate_labels=list(tone_categories.keys()))
53
+ detected_tones.update(model_result["labels"][:2])
54
 
55
+ return list(detected_tones)
 
 
 
56
 
57
  # Extract hashtags
58
  def extract_hashtags(text):
59
+ return re.findall(r"#\w+", text)
 
 
 
 
60
 
61
  # Extract frames based on predefined categories
62
  def extract_frames(text):
63
+ detected_frames = set()
64
+ for category, keywords in frame_categories.items():
65
+ if any(word in text.lower() for word in keywords):
66
+ detected_frames.add(category)
67
+
68
+ if not detected_frames:
69
  frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
70
  model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
71
+ detected_frames.update(model_result["labels"][:2])
72
+
73
+ return list(detected_frames)
74
+
75
+ # Extract captions from DOCX file based on "Post X"
76
+ def extract_captions_from_docx(docx_file):
77
+ doc = Document(docx_file)
78
+ captions = {}
79
+ current_post = None
80
+ for para in doc.paragraphs:
81
+ text = para.text.strip()
82
+ if re.match(r"Post \d+", text, re.IGNORECASE):
83
+ current_post = text
84
+ captions[current_post] = []
85
+ elif current_post:
86
+ captions[current_post].append(text)
87
+
88
+ return {post: " ".join(lines) for post, lines in captions.items() if lines}
89
+
90
+ # Generate a DOCX file in-memory with full captions
91
+ def generate_docx(output_data):
92
+ doc = Document()
93
+ doc.add_heading('Activism Message Analysis', 0)
94
+
95
+ for index, (caption, result) in enumerate(output_data.items(), start=1):
96
+ doc.add_heading(f"{index}. {caption}", level=1)
97
+ doc.add_paragraph("Full Caption:")
98
+ doc.add_paragraph(result['Full Caption'], style="Quote")
99
+
100
+ doc.add_paragraph(f"Language: {result['Language']}")
101
+ doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
102
+ doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
103
+ doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
104
 
105
  doc.add_heading('Frames:', level=2)
106
+ for frame in result['Frames']:
107
  doc.add_paragraph(frame)
108
 
109
+ doc_io = io.BytesIO()
110
+ doc.save(doc_io)
111
+ doc_io.seek(0)
 
112
 
113
+ return doc_io
 
 
 
114
 
115
  # Streamlit app
116
  st.title('AI-Powered Activism Message Analyzer with Intersectionality')
117
 
118
+ st.write("Enter the text to analyze or upload a DOCX file containing captions:")
119
 
120
+ # Text Input
121
  input_text = st.text_area("Input Text", height=200)
122
 
123
+ # File Upload
124
+ uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
 
 
125
 
126
+ # Initialize output dictionary
127
+ output_data = {}
128
 
129
+ if input_text:
130
+ language = detect_language(input_text)
131
+ tone = analyze_tone(input_text)
132
+ hashtags = extract_hashtags(input_text)
133
+ frames = extract_frames(input_text)
134
+
135
+ output_data["Manual Input"] = {
136
+ 'Full Caption': input_text,
137
+ 'Language': language,
138
+ 'Tone of Caption': tone,
139
+ 'Hashtags': hashtags,
140
+ 'Hashtag Count': len(hashtags),
141
+ 'Frames': frames
142
+ }
143
+
144
+ st.success("Analysis completed for text input.")
145
+
146
+ if uploaded_file:
147
+ captions = extract_captions_from_docx(uploaded_file)
148
+ for caption, text in captions.items():
149
+ language = detect_language(text)
150
+ tone = analyze_tone(text)
151
+ hashtags = extract_hashtags(text)
152
+ frames = extract_frames(text)
153
+
154
+ output_data[caption] = {
155
+ 'Full Caption': text,
156
  'Language': language,
157
  'Tone of Caption': tone,
158
  'Hashtags': hashtags,
159
+ 'Hashtag Count': len(hashtags),
160
  'Frames': frames
161
  }
162
 
163
+ st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.")
164
+
165
+ # Display results
166
+ if output_data:
167
+ with st.expander("Generated Output"):
168
+ st.subheader("Analysis Results")
169
+ for index, (caption, result) in enumerate(output_data.items(), start=1):
170
+ st.write(f"### {index}. {caption}")
171
+ st.write("**Full Caption:**")
172
+ st.write(f"> {result['Full Caption']}")
173
+ st.write(f"**Language**: {result['Language']}")
174
+ st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}")
175
+ st.write(f"**Number of Hashtags**: {result['Hashtag Count']}")
176
+ st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}")
177
  st.write("**Frames**:")
178
+ for frame in result['Frames']:
179
  st.write(f"- {frame}")
180
 
181
+ docx_file = generate_docx(output_data)
 
182
 
183
+ if docx_file:
184
+ st.download_button(
185
+ label="Download Analysis as DOCX",
186
+ data=docx_file,
187
+ file_name="activism_message_analysis.docx",
188
+ mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
189
+ )