Spaces:

ahm14
/

AI_based_Sentimental_Analysis

Build error

App Files Files Community

ahm14 commited on Mar 15, 2025

Commit

d5c0fd1

verified ·

1 Parent(s): fe89b54

Create app.py

Browse files

Files changed (1) hide show

app.py +297 -0

app.py ADDED Viewed

	@@ -0,0 +1,297 @@

+import os
+import pandas as pd
+import streamlit as st
+import re
+import logging
+import nltk
+from docx import Document
+from docx.enum.text import WD_ALIGN_PARAGRAPH
+from docx.shared import Pt
+import io
+from langdetect import detect
+from collections import Counter
+from dotenv import load_dotenv
+from langchain_groq import ChatGroq
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from transformers import pipeline
+# Load environment variables
+load_dotenv()
+# Check if Groq API key is available
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+if not GROQ_API_KEY:
+    logging.error("Missing Groq API key. Please set the GROQ_API_KEY environment variable.")
+    st.error("API key is missing. Please provide a valid API key.")
+# Initialize logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+# Initialize LLM (Groq API)
+llm = ChatGroq(temperature=0.5, groq_api_key=GROQ_API_KEY, model_name="llama3-8b-8192")
+# Download required NLTK resources
+nltk.download("punkt")
+# Tone categories for fallback method
+tone_categories = {
+    "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
+    "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
+    "Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief", "devastation"],
+    "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
+    "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
+    "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
+    "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
+    "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
+    "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
+    "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
+}
+# Frame categories for fallback method
+frame_categories = {
+    "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
+    "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
+    "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
+    "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
+    "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
+    "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
+    "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
+    "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
+    "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
+    "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
+    "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
+    "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
+    "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
+    "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
+    "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
+}
+# Detect language
+def detect_language(text):
+    try:
+        return detect(text)
+    except Exception as e:
+        logging.error(f"Error detecting language: {e}")
+        return "unknown"
+# Extract tone using Groq API (or fallback method)
+def extract_tone(text):
+    try:
+        response = llm.chat([{"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
+                             {"role": "user", "content": text}])
+        return response["choices"][0]["message"]["content"].split(", ")
+    except Exception as e:
+        logging.error(f"Groq API error: {e}")
+        return extract_tone_fallback(text)
+# Fallback method for tone extraction
+def extract_tone_fallback(text):
+    detected_tones = set()
+    text_lower = text.lower()
+    for category, keywords in tone_categories.items():
+        if any(word in text_lower for word in keywords):
+            detected_tones.add(category)
+    return list(detected_tones) if detected_tones else ["Neutral"]
+# Extract hashtags
+def extract_hashtags(text):
+    return re.findall(r"#\w+", text)
+# -------------------------------------------------------------------
+# New functions for frame categorization and display
+# -------------------------------------------------------------------
+def get_frame_category_mapping(text):
+    """
+    Returns a mapping of every frame (from frame_categories) to one of the four categories.
+    Detected frames are assigned a focus level based on keyword frequency:
+      - Top detected: "Major Focus"
+      - Next up to two: "Significant Focus"
+      - Remaining detected: "Minor Mention"
+    Frames not detected get "Not Applicable".
+    """
+    text_lower = text.lower()
+    # Calculate frequency for each frame
+    frame_freq = {}
+    for frame, keywords in frame_categories.items():
+        freq = sum(1 for word in keywords if word in text_lower)
+        frame_freq[frame] = freq
+    # Identify detected frames (frequency > 0) and sort descending
+    detected = [(frame, freq) for frame, freq in frame_freq.items() if freq > 0]
+    detected.sort(key=lambda x: x[1], reverse=True)
+    category_mapping = {}
+    if detected:
+        # Highest frequency frame as Major Focus
+        category_mapping[detected[0][0]] = "Major Focus"
+        # Next up to two frames as Significant Focus
+        for frame, _ in detected[1:3]:
+            category_mapping[frame] = "Significant Focus"
+        # Remaining detected frames as Minor Mention
+        for frame, _ in detected[3:]:
+            category_mapping[frame] = "Minor Mention"
+    # For frames not detected, assign Not Applicable
+    for frame in frame_categories.keys():
+        if frame not in category_mapping:
+            category_mapping[frame] = "Not Applicable"
+    return category_mapping
+def format_frame_categories_table(category_mapping):
+    """
+    Returns a markdown-formatted table displaying each frame with columns:
+    Major Focus, Significant Focus, Minor Mention, and Not Applicable.
+    A tick (✓) marks the assigned category.
+    """
+    header = "| Frame | Major Focus | Significant Focus | Minor Mention | Not Applicable |\n"
+    header += "| --- | --- | --- | --- | --- |\n"
+    tick = "✓"
+    rows = ""
+    for frame, category in category_mapping.items():
+        major = tick if category == "Major Focus" else ""
+        significant = tick if category == "Significant Focus" else ""
+        minor = tick if category == "Minor Mention" else ""
+        not_applicable = tick if category == "Not Applicable" else ""
+        rows += f"| {frame} | {major} | {significant} | {minor} | {not_applicable} |\n"
+    return header + rows
+# -------------------------------------------------------------------
+# Existing functions for file processing
+# -------------------------------------------------------------------
+def extract_captions_from_docx(docx_file):
+    doc = Document(docx_file)
+    captions = {}
+    current_post = None
+    for para in doc.paragraphs:
+        text = para.text.strip()
+        if re.match(r"Post \d+", text, re.IGNORECASE):
+            current_post = text
+            captions[current_post] = []
+        elif current_post:
+            captions[current_post].append(text)
+    return {post: " ".join(lines) for post, lines in captions.items() if lines}
+def extract_metadata_from_excel(excel_file):
+    try:
+        df = pd.read_excel(excel_file)
+        extracted_data = df.to_dict(orient="records")
+        return extracted_data
+    except Exception as e:
+        logging.error(f"Error processing Excel file: {e}")
+        return []
+def merge_metadata_with_generated_data(generated_data, excel_metadata):
+    for post_data in excel_metadata:
+        post_number = f"Post {post_data.get('Post Number', len(generated_data) + 1)}"
+        if post_number in generated_data:
+            generated_data[post_number].update(post_data)
+        else:
+            generated_data[post_number] = post_data
+    return generated_data
+def create_docx_from_data(extracted_data):
+    doc = Document()
+    for post_number, data in extracted_data.items():
+        doc.add_heading(post_number, level=1)
+        ordered_keys = [
+            "Post Number", "Date of Post", "Media Type", "Number of Pictures",
+            "Number of Videos", "Number of Audios", "Likes", "Comments", "Tagged Audience",
+            "Full Caption", "Language", "Tone", "Hashtags"
+        ]
+        for key in ordered_keys:
+            value = data.get(key, "N/A")
+            if key in ["Tone", "Hashtags"]:
+                value = ", ".join(value) if isinstance(value, list) else value
+            para = doc.add_paragraph()
+            run = para.add_run(f"**{key}:** {value}")
+            run.font.size = Pt(11)
+        # Add a proper table for Frames if a mapping is available.
+        if "FramesMapping" in data:
+            doc.add_paragraph("Frames:")
+            category_mapping = data["FramesMapping"]
+            table = doc.add_table(rows=1, cols=5)
+            table.style = "Light List Accent 1"
+            hdr_cells = table.rows[0].cells
+            hdr_cells[0].text = "Frame"
+            hdr_cells[1].text = "Major Focus"
+            hdr_cells[2].text = "Significant Focus"
+            hdr_cells[3].text = "Minor Mention"
+            hdr_cells[4].text = "Not Applicable"
+            tick = "✓"
+            for frame, category in category_mapping.items():
+                row_cells = table.add_row().cells
+                row_cells[0].text = frame
+                row_cells[1].text = tick if category == "Major Focus" else ""
+                row_cells[2].text = tick if category == "Significant Focus" else ""
+                row_cells[3].text = tick if category == "Minor Mention" else ""
+                row_cells[4].text = tick if category == "Not Applicable" else ""
+        else:
+            value = data.get("Frames", "N/A")
+            doc.add_paragraph(f"**Frames:** {value}")
+        doc.add_paragraph("\n")
+    return doc
+# -------------------------------------------------------------------
+# Streamlit App UI
+# -------------------------------------------------------------------
+st.title("AI-Powered Coding Sheet Generator")
+st.write("Enter text or upload a DOCX/Excel file for analysis:")
+input_text = st.text_area("Input Text", height=200)
+uploaded_docx = st.file_uploader("Upload a DOCX file", type=["docx"])
+uploaded_excel = st.file_uploader("Upload an Excel file", type=["xlsx"])
+output_data = {}
+if input_text:
+    # Process manual input text
+    frame_mapping = get_frame_category_mapping(input_text)
+    frames_table = format_frame_categories_table(frame_mapping)
+    output_data["Manual Input"] = {
+        "Full Caption": input_text,
+        "Language": detect_language(input_text),
+        "Tone": extract_tone(input_text),
+        "Hashtags": extract_hashtags(input_text),
+        "Frames": frames_table,  # Markdown table displaying frame categories
+    }
+if uploaded_docx:
+    captions = extract_captions_from_docx(uploaded_docx)
+    for caption, text in captions.items():
+        frame_mapping = get_frame_category_mapping(text)
+        frames_table = format_frame_categories_table(frame_mapping)
+        output_data[caption] = {
+            "Full Caption": text,
+            "Language": detect_language(text),
+            "Tone": extract_tone(text),
+            "Hashtags": extract_hashtags(text),
+            "Frames": frames_table,
+        }
+if uploaded_excel:
+    excel_metadata = extract_metadata_from_excel(uploaded_excel)
+    output_data = merge_metadata_with_generated_data(output_data, excel_metadata)
+# Display results in collapsible sections
+if output_data:
+    for post_number, data in output_data.items():
+        with st.expander(post_number):
+            for key, value in data.items():
+                if key == "Frames":
+                    st.markdown(f"**{key}:**\n{value}")
+                else:
+                    st.write(f"**{key}:** {value}")
+# Generate DOCX output for download
+if output_data:
+    docx_output = create_docx_from_data(output_data)
+    docx_io = io.BytesIO()
+    docx_output.save(docx_io)
+    docx_io.seek(0)
+    st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="coding_sheet.docx")