Spaces:

PradAgrawal
/

NeuroShieldApp

Sleeping

App Files Files Community

PradAgrawal commited on Apr 2, 2025

Commit

68635d7

verified ·

1 Parent(s): 6557ad0

Upload 4 files

Browse files

Files changed (4) hide show

README.md +66 -11
app.py +232 -0
coaching_voices.json +128 -0
requirements.txt +11 -0

README.md CHANGED Viewed

@@ -1,14 +1,69 @@
 ---
-title: NeuroShieldApp
-emoji: 👀
-colorFrom: pink
-colorTo: pink
-sdk: streamlit
-sdk_version: 1.44.1
-app_file: app.py
-pinned: false
-license: mit
-short_description: An AI App for MultiModal Moderation & Rewrite Coaching
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# 🛡️ NeuroShield PoC (Enhanced Edition)
+A powerful AI-based moderation assistant built with Streamlit, Hugging Face Transformers, and Groq API. Designed for nuanced, voice-guided responses to online toxicity.
+---
+## 🚀 Features
+- ✅ **14-label toxicity classification** (simulated Jigsaw + extended logic)
+- 🧠 **Coaching voice personas** (choose tone: compassionate, assertive, reflective, etc.)
+- 🔥 **Visual indicators** (emoji SAFE/UNSAFE + toxicity heatmap)
+- 🎚️ **Tolerance control** for each toxicity category
+- 🧒 **Kids Mode** and **NSFW Filters**
+- ✍️ **Groq LLM Rewrites** in selected tone/strategy
+---
+## 📦 Files Included
+- `app.py` — Streamlit frontend and logic
+- `requirements.txt` — Python dependencies
+- `coaching_voices.json` — Tone-guided response schema
 ---
+## 🧠 Coaching Voice Selector
+This system uses customizable tones like:
+- The Boundary Setter
+- The Mirror
+- The Compassionate Reframer
+- The Challenger
+→ Add more in `coaching_voices.json`
 ---
+## 💻 Local Setup
+```bash
+pip install -r requirements.txt
+streamlit run app.py
+```
+---
+## 🧠 Deployment on Hugging Face Spaces
+1. Create a new Space (Python + Streamlit)
+2. Upload:
+   - `app.py`
+   - `requirements.txt`
+   - `coaching_voices.json`
+3. Add `GROQ_API_KEY` in **Secrets** (Settings → Repository secrets)
+---
+## 🔐 Secrets Configuration
+Add the following in Hugging Face Spaces under `Repository secrets`:
+```
+GROQ_API_KEY=your-groq-api-key
+```
+---
+## 🌐 License
+MIT © 2025 — Built for research, teaching, and safe digital conversation.

app.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import streamlit as st
+import os
+import time
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from groq import Groq
+# --------------------------------------------------------------------------
+# Configuration & Model Loading (Cached for efficiency)
+# --------------------------------------------------------------------------
+CLASSIFIER_MODEL_NAME = "unitary/toxic-bert"
+LLM_MODEL_GROQ = "llama3-8b-8192"  # Or mixtral-8x7b-32768
+st.set_page_config(page_title="NeuroShield PoC", layout="wide")
+# Use Streamlit's caching for expensive operations like model loading
+@st.cache_resource
+def load_classifier_model():
+    """Loads the classifier model and tokenizer."""
+    print("Loading classifier model and tokenizer...")
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(CLASSIFIER_MODEL_NAME)
+        model = AutoModelForSequenceClassification.from_pretrained(CLASSIFIER_MODEL_NAME)
+        # Determine device (use CPU on free HF Spaces usually, unless GPU assigned)
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model.to(device)
+        model.eval()
+        print(f"Classifier model loaded on {device}.")
+        # Get labels from model config
+        model_labels = [model.config.id2label[i] for i in range(model.config.num_labels)]
+        return tokenizer, model, device, model_labels
+    except Exception as e:
+        st.error(f"Error loading classifier model: {e}")
+        print(f"Error loading classifier model: {e}")
+        return None, None, None, []
+@st.cache_resource
+def initialize_groq_client():
+    """Initializes the Groq client using API key from secrets."""
+    print("Initializing Groq client...")
+    try:
+        # Use st.secrets for Streamlit Community Cloud or os.environ for HF Spaces
+        groq_api_key = os.environ.get('GROQ_API_KEY')
+        if not groq_api_key:
+            # Fallback for local testing if using secrets.toml
+            try:
+                groq_api_key = st.secrets["GROQ_API_KEY"]
+            except Exception:
+                 st.warning("GROQ_API_KEY not found in environment variables or st.secrets.")
+                 return None
+        if not groq_api_key:
+             st.warning("Groq API Key not configured.")
+             return None
+        else:
+             client = Groq(api_key=groq_api_key)
+             print("Groq client initialized.")
+             return client
+    except Exception as e:
+        st.error(f"Error initializing Groq client: {e}")
+        print(f"Error initializing Groq client: {e}")
+        return None
+# --- Load models and clients ---
+tokenizer, model, device, model_labels = load_classifier_model()
+groq_client = initialize_groq_client()
+# --------------------------------------------------------------------------
+# Core Logic Functions
+# --------------------------------------------------------------------------
+def classify_text(text, threshold=0.5):
+    """Classifies input text using the loaded multi-label model."""
+    if model is None or tokenizer is None or device is None or not model_labels:
+        st.error("Classifier model/tokenizer not loaded properly.")
+        return None
+    start_time = time.time()
+    try:
+        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = model(**inputs)
+        probabilities = torch.sigmoid(outputs.logits).squeeze().cpu().numpy()
+        results = {}
+        for i, label in enumerate(model_labels):
+            if i < len(probabilities):
+                prob = probabilities[i]
+                if prob > threshold:
+                    results[label] = round(float(prob), 4)
+            else:
+                print(f"Warning: Index {i} out of bounds for probabilities")
+        end_time = time.time()
+        print(f"Classification took {end_time - start_time:.4f} seconds.")
+        return results
+    except Exception as e:
+        st.error(f"An error occurred during classification: {e}")
+        print(f"An error occurred during classification: {e}")
+        return None
+def rewrite_text_groq(original_text, detected_labels_dict, persona="helpful assistant", tone="neutral"):
+    """Rewrites the input text using the Groq API."""
+    if not groq_client:
+        st.error("Groq client not initialized. Cannot perform rewrite.")
+        return "Error: Groq client not initialized."
+    # Construct the prompt (same logic as before)
+    if not detected_labels_dict:
+        detected_labels_list_str = "None relevant"
+        prompt_template = f"""You are a {persona}. A user wrote: "{original_text}"
+Rewrite the message in a {tone} tone while keeping its essential meaning intact. Since no specific problematic categories were flagged, focus on ensuring the tone is appropriate and constructive."""
+    else:
+        detected_labels_list_str = ", ".join(detected_labels_dict.keys())
+        prompt_template = f"""You are a {persona}. A user wrote: "{original_text}"
+Rewrite the message in a {tone} tone while keeping its essential meaning intact.
+Explain briefly why the original might be perceived as unsafe or negative, focusing on the potential impact rather than just listing labels.
+Ensure the rewritten message does NOT contain content related to the following categories: {detected_labels_list_str}. The goal is a safer, constructive alternative."""
+    print("\n--- Sending Request to Groq ---")
+    print(f"Model: {LLM_MODEL_GROQ}")
+    # print(f"Prompt:\n{prompt_template}\n" + "-"*20) # Avoid printing long prompts in logs
+    start_time = time.time()
+    try:
+        chat_completion = groq_client.chat.completions.create(
+            messages=[{"role": "user", "content": prompt_template}],
+            model=LLM_MODEL_GROQ,
+            temperature=0.6,
+            max_tokens=350, # Increased slightly
+        )
+        end_time = time.time()
+        print(f"Groq response received in {end_time - start_time:.2f} seconds.")
+        rewritten_content = chat_completion.choices[0].message.content.strip()
+        return rewritten_content
+    except Exception as e:
+        st.error(f"Error interacting with Groq: {e}")
+        print(f"Error interacting with Groq: {e}")
+        return f"Error: Failed to get rewrite from Groq. {e}"
+def moderation_pipeline(input_text, classification_threshold=0.5):
+    """Runs the full classification and rewrite pipeline."""
+    print(f"\n--- Running Streamlit Pipeline for input ---")
+    pipeline_results = {
+        "original_text": input_text,
+        "detected_labels": {},
+        "rewrite_attempt": "(Not Attempted)",
+        "error": None
+    }
+    # 1. Classification
+    class_results = classify_text(input_text, threshold=classification_threshold)
+    if class_results is None:
+        pipeline_results["error"] = "Classification failed. Check logs."
+        return pipeline_results
+    pipeline_results["detected_labels"] = class_results
+    print(f"Classification Results: {class_results if class_results else 'None above threshold'}")
+    # 2. Rewrite (using Groq)
+    rewrite = rewrite_text_groq(input_text, class_results, persona="content moderator", tone="neutral and constructive")
+    pipeline_results["rewrite_attempt"] = rewrite
+    print("--- Pipeline Finished ---")
+    return pipeline_results
+# --------------------------------------------------------------------------
+# Streamlit UI Layout
+# --------------------------------------------------------------------------
+st.title("NeuroShield Proof-of-Concept")
+st.markdown("A demonstration using a pre-trained toxicity classifier (`unitary/toxic-bert`) and an LLM rewrite suggestion via Groq API (`llama3-8b`). Enter text below and click 'Moderate'.")
+st.markdown("---") # Separator
+# Initialize session state to hold results
+if 'pipeline_results' not in st.session_state:
+    st.session_state.pipeline_results = None
+# Input Text Area
+user_input = st.text_area("Enter text to moderate:", height=100, key="user_input_area")
+# Moderate Button
+if st.button("Moderate Text", key="moderate_button"):
+    if user_input:
+        # Show a spinner while processing
+        with st.spinner("Moderating..."):
+            # Check if prerequisites are loaded
+            if model and tokenizer and groq_client:
+                results = moderation_pipeline(user_input)
+                st.session_state.pipeline_results = results # Store results in session state
+            else:
+                 st.error("Models or API client failed to load. Cannot moderate.")
+                 st.session_state.pipeline_results = {"error": "Models or API client failed to load."}
+    else:
+        st.warning("Please enter some text to moderate.")
+        st.session_state.pipeline_results = None # Clear results if input is empty
+# Display Results (using columns for better layout)
+if st.session_state.pipeline_results:
+    results = st.session_state.pipeline_results
+    st.markdown("---") # Separator
+    st.subheader("Moderation Results")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.metric(label="Input Text Status", value="Processed")
+        st.markdown("**Detected Labels & Scores**")
+        if results.get("error"):
+             st.error(f"Pipeline Error: {results['error']}")
+        elif results.get("detected_labels"):
+             st.json(results["detected_labels"])
+        else:
+             st.success("No problematic labels detected above threshold.")
+    with col2:
+        st.markdown("**Rewrite Suggestion**")
+        rewrite_text = results.get("rewrite_attempt", "Rewrite not generated.")
+        # Use a text area to display the rewrite, making it copyable
+        st.text_area("Suggested Rewrite:", value=rewrite_text, height=250, disabled=True, key="rewrite_output_area")
+# Optional: Add footer or more info
+st.markdown("---")
+st.caption("Powered by Hugging Face Transformers and Groq API.")

coaching_voices.json ADDED Viewed

	@@ -0,0 +1,128 @@

+[
+  {
+    "voice_id": "boundary_setter",
+    "name": "The Boundary Setter",
+    "tone": "firm_respectful",
+    "response_strategy": "Name the behavior, assert limit, disengage",
+    "emotional_attitude": "assertive",
+    "communication_goal": "psychological safety, clear limits",
+    "example_response": "That comment crosses a line. I\u2019m not okay with this tone, and I won\u2019t engage further unless we can have a respectful conversation.",
+    "response_templates": [
+      "I hear what you said, and I want to be clear that [boundary]. I\u2019m stepping away from this.",
+      "Let\u2019s pause here. I won\u2019t engage in conversations that feel [emotionally unsafe/disrespectful].",
+      "This doesn\u2019t work for me. We can continue only if we shift the tone."
+    ],
+    "keywords_triggered_by": [
+      "stop",
+      "enough",
+      "crossed a line",
+      "disrespect",
+      "tone"
+    ],
+    "usage_contexts": [
+      "harassment",
+      "hate",
+      "violence"
+    ],
+    "applicable_toxicity_categories": [
+      "harassment",
+      "harassment threatening",
+      "hate",
+      "violence"
+    ],
+    "default_response_length": "short",
+    "escalation_sensitivity": 0.85,
+    "persona_notes": "Use when asserting boundaries is more important than reconciliation."
+  },
+  {
+    "voice_id": "mirror",
+    "name": "The Mirror",
+    "tone": "calm_reflective",
+    "response_strategy": "Restate the toxic statement in neutral terms to expose its nature",
+    "emotional_attitude": "dispassionate",
+    "communication_goal": "de-escalation and reflection",
+    "example_response": "You\u2019re saying I\u2019m stupid\u2014can you help me understand what you hoped that would accomplish?",
+    "response_templates": [
+      "You said '[quote]'. I\u2019m curious\u2014what were you hoping to achieve with that?",
+      "Let\u2019s look at what was just said: '[quote]'. That\u2019s worth reflecting on."
+    ],
+    "keywords_triggered_by": [
+      "idiot",
+      "stupid",
+      "dumb"
+    ],
+    "usage_contexts": [
+      "gaslighting",
+      "trolling",
+      "conflict"
+    ],
+    "applicable_toxicity_categories": [
+      "harassment",
+      "insult",
+      "hate"
+    ],
+    "default_response_length": "medium",
+    "escalation_sensitivity": 0.5,
+    "persona_notes": "Useful for showing people their behavior without adding emotional fuel."
+  },
+  {
+    "voice_id": "compassionate_reframer",
+    "name": "The Compassionate Reframer",
+    "tone": "gentle",
+    "response_strategy": "Acknowledge pain, redirect energy, invite empathy",
+    "emotional_attitude": "empathetic",
+    "communication_goal": "emotional repair and reconnection",
+    "example_response": "I can hear there\u2019s frustration behind your words. Maybe there\u2019s a better way to talk about what\u2019s bothering you?",
+    "response_templates": [
+      "Sounds like you\u2019re upset. Want to tell me what\u2019s really going on?",
+      "That felt harsh\u2014want to try again in a way that helps us understand each other?"
+    ],
+    "keywords_triggered_by": [
+      "shut up",
+      "annoying",
+      "angry"
+    ],
+    "usage_contexts": [
+      "emotional conflict",
+      "relational tension"
+    ],
+    "applicable_toxicity_categories": [
+      "harassment",
+      "insult",
+      "self harm intent"
+    ],
+    "default_response_length": "medium",
+    "escalation_sensitivity": 0.4,
+    "persona_notes": "For people who prefer to meet aggression with care and redirect the conversation."
+  },
+  {
+    "voice_id": "challenger",
+    "name": "The Challenger",
+    "tone": "bold",
+    "response_strategy": "Call out bad behavior directly, use logic or ethics",
+    "emotional_attitude": "provocative",
+    "communication_goal": "confrontation and accountability",
+    "example_response": "If you believe that\u2019s okay to say, let\u2019s examine that. What if someone said that to someone you care about?",
+    "response_templates": [
+      "That sounds wrong\u2014why do you believe that\u2019s acceptable?",
+      "Let\u2019s be honest: would you say that to someone in person?"
+    ],
+    "keywords_triggered_by": [
+      "you people",
+      "always",
+      "never"
+    ],
+    "usage_contexts": [
+      "hate",
+      "bullying"
+    ],
+    "applicable_toxicity_categories": [
+      "hate",
+      "hate instructions",
+      "violence"
+    ],
+    "default_response_length": "medium",
+    "escalation_sensitivity": 0.7,
+    "persona_notes": "Use when users want to stand their ground while staying thoughtful."
+  }
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+transformers
+torch
+accelerate
+# ipywidgets is usually not needed for streamlit deployment
+streamlit
+groq
+# Pin versions if needed for stability, e.g.:
+# streamlit==1.32.0
+# transformers==4.38.0
+# torch==2.1.0 # Check compatibility with HF Spaces hardware/CUDA if needed
+# groq==0.5.0