Spaces:

ahm14
/

AA_Adv

Build error

App Files Files Community

ahm14 commited on Jan 29, 2025

Commit

defcae2

verified ·

1 Parent(s): 34cf74e

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -109

app.py CHANGED Viewed

@@ -9,21 +9,27 @@ import io
 # Download required NLTK resources
 nltk.download('punkt')
-# Predefined tone categories
 tone_categories = {
-    "Activism and Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign"],
-    "Solidarity and Support": ["stand with", "support", "unite", "together", "solidarity"],
-    "Critical and Urgent": ["shame", "oppression", "violence", "urgent", "repress"],
-    "Empowerment and Resistance": ["empower", "resist", "challenge", "freedom", "independent"]
 }
-# Predefined frame categories
 frame_categories = {
-    "Systemic Oppression": ["patriarchy", "repression", "violence", "oppression", "honor killing"],
-    "Climate Justice": ["climate", "environment", "biodiversity", "mining", "farmers"],
-    "Human Rights Advocacy": ["safety", "education", "freedom", "law reform", "rights"],
-    "Call to Action": ["march", "protest", "mobilize", "join us", "rally"],
-    "Empowerment and Resistance": ["women's rights", "aurat march", "feminism", "power"]
 }
 # Detect language
@@ -36,135 +42,148 @@ def detect_language(text):
 # Analyze tone based on predefined categories
 def analyze_tone(text):
-    try:
-        tone_model = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
-        model_result = tone_model(text)[0]['label'].lower()
-        # Match with predefined tone categories
-        detected_tones = set()
-        for category, keywords in tone_categories.items():
-            if any(word in text.lower() for word in keywords):
-                detected_tones.add(category)
-        if not detected_tones:
-            detected_tones.add(model_result.capitalize())  # Fallback to AI-predicted label
-        return list(detected_tones)
-    except Exception as e:
-        st.write(f"Error analyzing tone: {e}")
-        return ["Error"]
 # Extract hashtags
 def extract_hashtags(text):
-    try:
-        return re.findall(r"#\w+", text)
-    except Exception as e:
-        st.write(f"Error extracting hashtags: {e}")
-        return []
 # Extract frames based on predefined categories
 def extract_frames(text):
-    try:
         frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
         model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
-        # Match with predefined frame categories
-        detected_frames = set()
-        for category, keywords in frame_categories.items():
-            if any(word in text.lower() for word in keywords):
-                detected_frames.add(category)
-        # Combine with AI model predictions
-        detected_frames.update(model_result["labels"][:2])  # Take top 2 predictions
-        return list(detected_frames)
-    except Exception as e:
-        st.write(f"Error extracting frames: {e}")
-        return []
-# Generate a DOCX file in-memory
-def generate_docx(output):
-    try:
-        doc = Document()
-        doc.add_heading('Activism Message Analysis', 0)
-        doc.add_heading('Generated Output:', level=1)
-        doc.add_paragraph(f"Language: {output['Language']}")
-        doc.add_paragraph(f"Tone of Caption: {', '.join(output['Tone of Caption'])}")
-        doc.add_paragraph(f"Number of Hashtags: {output['Hashtag Count']}")
-        doc.add_paragraph(f"Hashtags Found: {', '.join(output['Hashtags'])}")
         doc.add_heading('Frames:', level=2)
-        for frame in output['Frames']:
             doc.add_paragraph(frame)
-        # Save the document in-memory
-        doc_io = io.BytesIO()
-        doc.save(doc_io)
-        doc_io.seek(0)
-        return doc_io
-    except Exception as e:
-        st.write(f"Error generating DOCX file: {e}")
-        return None
 # Streamlit app
 st.title('AI-Powered Activism Message Analyzer with Intersectionality')
-st.write("Enter the text to analyze and generate output:")
-# Input box for user to paste their text
 input_text = st.text_area("Input Text", height=200)
-if input_text:
-    try:
-        # Detect language
-        language = detect_language(input_text)
-        # Analyze tone
-        tone = analyze_tone(input_text)
-        # Extract hashtags
-        hashtags = extract_hashtags(input_text)
-        hashtag_count = len(hashtags)
-        # Extract frames
-        frames = extract_frames(input_text)
-        # Prepare output
-        output = {
             'Language': language,
             'Tone of Caption': tone,
             'Hashtags': hashtags,
-            'Hashtag Count': hashtag_count,
             'Frames': frames
         }
-        # Display results
-        with st.expander("Generated Output"):
-            st.subheader("Analysis Result")
-            st.write(f"**Language**: {output['Language']}")
-            st.write(f"**Tone of Caption**: {', '.join(output['Tone of Caption'])}")
-            st.write(f"**Number of Hashtags**: {output['Hashtag Count']}")
-            st.write(f"**Hashtags Found:**")
-            for hashtag in output['Hashtags']:
-                st.write(f"- {hashtag}")
             st.write("**Frames**:")
-            for frame in output['Frames']:
                 st.write(f"- {frame}")
-        # Generate docx file
-        docx_file = generate_docx(output)
-        if docx_file:
-            st.download_button(
-                label="Download Analysis as DOCX",
-                data=docx_file,
-                file_name="activism_message_analysis.docx",
-                mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
-            )
-    except Exception as e:
-        st.write(f"Error during analysis: {e}")
-else:
-    st.error("Please enter some text to analyze.")

 # Download required NLTK resources
 nltk.download('punkt')
+# Updated tone categories
 tone_categories = {
+    "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
+    "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
+    "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
+    "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
+    "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
+    "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
+    "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
 }
+# Updated frame categories
 frame_categories = {
+    "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
+    "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
+    "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
+    "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
+    "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
+    "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
+    "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
+    "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"]
 }
 # Detect language
 # Analyze tone based on predefined categories
 def analyze_tone(text):
+    detected_tones = set()
+    for category, keywords in tone_categories.items():
+        if any(word in text.lower() for word in keywords):
+            detected_tones.add(category)
+    if not detected_tones:
+        tone_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+        model_result = tone_model(text, candidate_labels=list(tone_categories.keys()))
+        detected_tones.update(model_result["labels"][:2])
+    return list(detected_tones)
 # Extract hashtags
 def extract_hashtags(text):
+    return re.findall(r"#\w+", text)
 # Extract frames based on predefined categories
 def extract_frames(text):
+    detected_frames = set()
+    for category, keywords in frame_categories.items():
+        if any(word in text.lower() for word in keywords):
+            detected_frames.add(category)
+    if not detected_frames:
         frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
         model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
+        detected_frames.update(model_result["labels"][:2])
+    return list(detected_frames)
+# Extract captions from DOCX file based on "Post X"
+def extract_captions_from_docx(docx_file):
+    doc = Document(docx_file)
+    captions = {}
+    current_post = None
+    for para in doc.paragraphs:
+        text = para.text.strip()
+        if re.match(r"Post \d+", text, re.IGNORECASE):
+            current_post = text
+            captions[current_post] = []
+        elif current_post:
+            captions[current_post].append(text)
+    return {post: " ".join(lines) for post, lines in captions.items() if lines}
+# Generate a DOCX file in-memory with full captions
+def generate_docx(output_data):
+    doc = Document()
+    doc.add_heading('Activism Message Analysis', 0)
+    for index, (caption, result) in enumerate(output_data.items(), start=1):
+        doc.add_heading(f"{index}. {caption}", level=1)
+        doc.add_paragraph("Full Caption:")
+        doc.add_paragraph(result['Full Caption'], style="Quote")
+        doc.add_paragraph(f"Language: {result['Language']}")
+        doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
+        doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
+        doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")
         doc.add_heading('Frames:', level=2)
+        for frame in result['Frames']:
             doc.add_paragraph(frame)
+    doc_io = io.BytesIO()
+    doc.save(doc_io)
+    doc_io.seek(0)
+    return doc_io
 # Streamlit app
 st.title('AI-Powered Activism Message Analyzer with Intersectionality')
+st.write("Enter the text to analyze or upload a DOCX file containing captions:")
+# Text Input
 input_text = st.text_area("Input Text", height=200)
+# File Upload
+uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])
+# Initialize output dictionary
+output_data = {}
+if input_text:
+    language = detect_language(input_text)
+    tone = analyze_tone(input_text)
+    hashtags = extract_hashtags(input_text)
+    frames = extract_frames(input_text)
+    output_data["Manual Input"] = {
+        'Full Caption': input_text,
+        'Language': language,
+        'Tone of Caption': tone,
+        'Hashtags': hashtags,
+        'Hashtag Count': len(hashtags),
+        'Frames': frames
+    }
+    st.success("Analysis completed for text input.")
+if uploaded_file:
+    captions = extract_captions_from_docx(uploaded_file)
+    for caption, text in captions.items():
+        language = detect_language(text)
+        tone = analyze_tone(text)
+        hashtags = extract_hashtags(text)
+        frames = extract_frames(text)
+        output_data[caption] = {
+            'Full Caption': text,
             'Language': language,
             'Tone of Caption': tone,
             'Hashtags': hashtags,
+            'Hashtag Count': len(hashtags),
             'Frames': frames
         }
+    st.success(f"Analysis completed for {len(captions)} posts from the DOCX file.")
+# Display results
+if output_data:
+    with st.expander("Generated Output"):
+        st.subheader("Analysis Results")
+        for index, (caption, result) in enumerate(output_data.items(), start=1):
+            st.write(f"### {index}. {caption}")
+            st.write("**Full Caption:**")
+            st.write(f"> {result['Full Caption']}")
+            st.write(f"**Language**: {result['Language']}")
+            st.write(f"**Tone of Caption**: {', '.join(result['Tone of Caption'])}")
+            st.write(f"**Number of Hashtags**: {result['Hashtag Count']}")
+            st.write(f"**Hashtags Found:** {', '.join(result['Hashtags'])}")
             st.write("**Frames**:")
+            for frame in result['Frames']:
                 st.write(f"- {frame}")
+    docx_file = generate_docx(output_data)
+    if docx_file:
+        st.download_button(
+            label="Download Analysis as DOCX",
+            data=docx_file,
+            file_name="activism_message_analysis.docx",
+            mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+        )