import streamlit as st
import re
from langdetect import detect
from transformers import pipeline
import nltk
from docx import Document
import io

# Download required NLTK resources
nltk.download('punkt')

# Tone categories
tone_categories = {
    "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
    "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
    "Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief", "devastation"],
    "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
    "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
    "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
    "Happy": ["joy", "celebration", "cheer", "success", "smile", "gratitude", "harmony"],
    "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
    "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
    "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
    "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
}

# Frame categories
frame_categories = {
    "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
    "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
    "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
    "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
    "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
    "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
    "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
    "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
    "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
    "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
    "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
    "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
    "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
    "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
    "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
}

# Detect language
def detect_language(text):
    try:
        return detect(text)
    except:
        return "unknown"

# Extract tone
def extract_tone(text):
    detected_tones = []
    for category, keywords in tone_categories.items():
        if any(keyword in text.lower() for keyword in keywords):
            detected_tones.append(category)
    return detected_tones if detected_tones else ["Neutral"]

# Categorize frames based on importance
def categorize_frame_importance(text, keywords):
    keyword_count = sum(text.lower().count(keyword) for keyword in keywords)
    if keyword_count > 3:
        return "Major Focus"
    elif keyword_count == 2 or keyword_count == 3:
        return "Significant Focus"
    else:
        return "Minor Mention"

# Extract frames with categorization
def extract_frames(text):
    detected_frames = {}
    for category, keywords in frame_categories.items():
        importance = categorize_frame_importance(text, keywords)
        if importance != "Minor Mention":
            detected_frames[category] = importance

    if not detected_frames:
        frame_model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
        model_result = frame_model(text, candidate_labels=list(frame_categories.keys()))
        for label in model_result["labels"][:2]:  # Top 2 frames
            detected_frames[label] = "Significant Focus"  

    return detected_frames

# Extract hashtags
def extract_hashtags(text):
    return re.findall(r"#\w+", text)

# Extract captions from DOCX
def extract_captions_from_docx(docx_file):
    doc = Document(docx_file)
    captions = {}
    current_post = None
    for para in doc.paragraphs:
        text = para.text.strip()
        if re.match(r"Post \d+", text, re.IGNORECASE):
            current_post = text
            captions[current_post] = []
        elif current_post:
            captions[current_post].append(text)

    return {post: " ".join(lines) for post, lines in captions.items() if lines}

# Generate a DOCX file
def generate_docx(output_data):
    doc = Document()
    doc.add_heading('Activism Message Analysis', 0)

    for index, (caption, result) in enumerate(output_data.items(), start=1):
        doc.add_heading(f"{index}. {caption}", level=1)
        doc.add_paragraph("Full Caption:")
        doc.add_paragraph(result['Full Caption'], style="Quote")

        doc.add_paragraph(f"Language: {result['Language']}")
        doc.add_paragraph(f"Tone of Caption: {', '.join(result['Tone of Caption'])}")
        doc.add_paragraph(f"Number of Hashtags: {result['Hashtag Count']}")
        doc.add_paragraph(f"Hashtags Found: {', '.join(result['Hashtags'])}")

        doc.add_heading('Frames:', level=2)
        for frame, importance in result['Frames'].items():
            doc.add_paragraph(f"{frame}: {importance}")

    doc_io = io.BytesIO()
    doc.save(doc_io)
    doc_io.seek(0)
    return doc_io

# Streamlit UI
st.title('AI-Powered Activism Message Analyzer with Tone & Frame Categorization')

st.write("Enter text or upload a DOCX file for analysis:")

# Text input
input_text = st.text_area("Input Text", height=200)

# File upload
uploaded_file = st.file_uploader("Upload a DOCX file", type=["docx"])

output_data = {}

if input_text:
    output_data["Manual Input"] = {
        'Full Caption': input_text,
        'Language': detect_language(input_text),
        'Tone of Caption': extract_tone(input_text),
        'Hashtags': extract_hashtags(input_text),
        'Hashtag Count': len(extract_hashtags(input_text)),
        'Frames': extract_frames(input_text)
    }
    st.success("Text analysis completed.")

if uploaded_file:
    captions = extract_captions_from_docx(uploaded_file)
    for caption, text in captions.items():
        output_data[caption] = {
            'Full Caption': text,
            'Language': detect_language(text),
            'Tone of Caption': extract_tone(text),
            'Hashtags': extract_hashtags(text),
            'Hashtag Count': len(extract_hashtags(text)),
            'Frames': extract_frames(text)
        }
    st.success("DOCX file analysis completed.")

if output_data:
    docx_file = generate_docx(output_data)
    st.download_button("Download Analysis as DOCX", data=docx_file, file_name="analysis.docx")