import streamlit as st
import re
from langdetect import detect
from transformers import pipeline
import nltk
import numpy as np  # Import numpy
from docx import Document
import torch

# Download required NLTK resources
nltk.download('punkt')

# Initialize Hugging Face's pre-trained sentiment analysis pipeline
try:
    tone_model = pipeline("sentiment-analysis")
    st.write("Sentiment analysis model loaded successfully!")
except Exception as e:
    st.write(f"Error loading sentiment analysis model: {e}")

# Initialize Hugging Face's pre-trained text classification pipeline for frames
try:
    frame_model = pipeline("zero-shot-classification")
    st.write("Frame classification model loaded successfully!")
except Exception as e:
    st.write(f"Error loading frame classification model: {e}")

# Define a function to detect language
def detect_language(text):
    try:
        return detect(text)
    except Exception as e:
        st.write(f"Error detecting language: {e}")
        return "unknown"

# Define a function to categorize tone based on predefined categories
def categorize_tone(text):
    tone_categories = {
        "Activism and Advocacy": ["justice", "demand", "fight", "rights", "protest"],
        "Solidarity and Support": ["stand with", "support", "unity", "together", "community"],
        "Critical and Urgent": ["shame", "oppose", "urgent", "critical", "call for"],
        "Empowerment and Resistance": ["empower", "resistance", "challenge", "fight back", "strength"]
    }

    # Default tone if no match is found
    tone = "Neutral"

    for category, keywords in tone_categories.items():
        for keyword in keywords:
            if keyword.lower() in text.lower():
                tone = category
                break
        if tone != "Neutral":
            break

    return tone

# Define a function to extract hashtags from text
def extract_hashtags(text):
    try:
        hashtags = re.findall(r"#\w+", text)
        return hashtags
    except Exception as e:
        st.write(f"Error extracting hashtags: {e}")
        return []

# Define a function to categorize frames based on predefined categories
def categorize_frames(text):
    frame_categories = {
        "Systemic Oppression": ["honor killings", "state violence", "patriarchy", "religious extremism"],
        "Intersectionality": ["women", "minorities", "marginalized", "Khwajasira", "Baloch"],
        "Climate Justice": ["environmental", "mining", "farmers", "biodiversity", "climate"],
        "Human Rights Advocacy": ["safety", "education", "freedom", "violence", "conversion laws"],
        "Call to Action": ["march", "protest", "mobilize", "join", "participate"],
        "Empowerment and Resistance": ["resilience", "empowerment", "strength", "Aurad March", "resist"]
    }

    # Initialize a list to store identified frames
    frames = []

    for frame, keywords in frame_categories.items():
        if any(keyword.lower() in text.lower() for keyword in keywords):
            frames.append(frame)
        
        # Limit to 4 frames
        if len(frames) >= 4:
            break

    return frames

# Function to generate and download the output as docx
def generate_docx(output):
    try:
        doc = Document()
        doc.add_heading('Activism Message Analysis', 0)

        doc.add_heading('Generated Output:', level=1)
        doc.add_paragraph(f"Language: {output['Language']}")
        doc.add_paragraph(f"Tone of Caption: {output['Tone of Caption']}")
        doc.add_paragraph(f"Number of Hashtags: {output['Hashtag Count']}")
        doc.add_paragraph(f"Hashtags Found: {', '.join(output['Hashtags'])}")
        
        doc.add_heading('Frames:', level=2)
        for frame in output['Frames']:
            doc.add_paragraph(frame)

        # Save the document
        file_path = "/mnt/data/activism_message_analysis.docx"
        doc.save(file_path)
        
        return file_path
    except Exception as e:
        st.write(f"Error generating DOCX file: {e}")
        return None


# Streamlit app
st.title('AI-Powered Activism Message Analyzer with Intersectionality')

st.write("Enter the text to analyze and generate output:")

# Input box for user to paste their text
input_text = st.text_area("Input Text", height=200)

if input_text:
    try:
        # Detect language
        language = detect_language(input_text)

        # Analyze tone using the AI model
        tone = categorize_tone(input_text)

        # Extract hashtags
        hashtags = extract_hashtags(input_text)
        hashtag_count = len(hashtags)

        # Extract frames using the AI model
        frames = categorize_frames(input_text)

        # Prepare the output
        output = {
            'Language': language,
            'Tone of Caption': tone,
            'Hashtags': hashtags,
            'Hashtag Count': hashtag_count,
            'Frames': frames
        }

        # Display results in the output box
        with st.expander("Generated Output"):
            st.subheader("Analysis Result")
            st.write(f"**Language**: {output['Language']}")
            st.write(f"**Tone of Caption**: {output['Tone of Caption']}")
            st.write(f"**Number of Hashtags**: {output['Hashtag Count']}")
            st.write(f"**Hashtags Found**:")
            for hashtag in output['Hashtags']:
                st.write(f"- {hashtag}")
            st.write("**Frames**:")
            for frame in output['Frames']:
                st.write(f"- {frame}")

        # Add download button for the generated docx
        download_button = st.download_button(
            label="Download Analysis as DOCX",
            data=generate_docx(output),
            file_name="activism_message_analysis.docx",
            mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
        )

    except Exception as e:
        st.write(f"Error during analysis: {e}")
else:
    st.error("Please enter some text to analyze.")