Spaces:

moseleydev
/

oxicity-engine

Sleeping

App Files Files Community

moseleydev commited on Mar 19

Commit

8514a3c

verified ·

1 Parent(s): e94fef1

uploaded to hugging face

Browse files

Files changed (3) hide show

Dockerfile +23 -0
main.py +72 -0
requirements.txt +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Use official Python image
+FROM python:3.11
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" context
+USER user
+# Set home directory and path variables
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user ./requirements.txt $HOME/app/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r $HOME/app/requirements.txt
+COPY --chown=user . $HOME/app
+EXPOSE 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from transformers import pipeline
+import re
+import time
+app = FastAPI(
+    title="Toxicity Classification API",
+    description="A microservice for detecting hate speech in social media text."
+)
+# 1. ADD CORS MIDDLEWARE
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Load the Cardiff NLP Twitter RoBERTa model
+print("Loading RoBERTa Model... This may take a moment.")
+classifier = pipeline(
+    "text-classification",
+    model="cardiffnlp/twitter-roberta-base-hate-latest"
+)
+print("Model loaded successfully!")
+class TweetRequest(BaseModel):
+    text: str
+def preprocess_tweet(text: str) -> str:
+    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
+    text = re.sub(r'\@\w+', '@user', text)
+    return text.strip()
+@app.post("/api/detect")
+def detect_toxicity(request: TweetRequest):
+    start_time = time.time()
+    clean_text = preprocess_tweet(request.text)
+    # Edge case: If the tweet was only a link and is now empty
+    if not clean_text:
+        return {"error": "Tweet contains no readable text after preprocessing."}
+    try:
+        # 2. ADD TRUNCATION: Prevents the API from crashing if the text is over 512 tokens
+        result = classifier(clean_text, truncation=True, max_length=512)[0]
+        # Clean up the output for the frontend
+        label = result['label'].upper()
+        confidence = round(result['score'] * 100, 2)
+        process_time = round((time.time() - start_time) * 1000, 2)
+        return {
+            "original_tweet": request.text,
+            "clean_text": clean_text,
+            "prediction": {
+                "label": label,
+                "confidence_score": f"{confidence}%",
+                "is_toxic": label == "HATE"
+            },
+            "metadata": {
+                "processing_time_ms": process_time,
+                "model_used": "cardiffnlp/twitter-roberta-base-hate-latest"
+            }
+        }
+    except Exception as e:
+        # Catch any unexpected model errors cleanly instead of crashing the server
+        raise HTTPException(status_code=500, detail=f"Model inference failed: {str(e)}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi
+uvicorn
+pydantic
+transformers
+torch