moseleydev commited on
Commit
8514a3c
·
verified ·
1 Parent(s): e94fef1

uploaded to hugging face

Browse files
Files changed (3) hide show
  1. Dockerfile +23 -0
  2. main.py +72 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python image
2
+ FROM python:3.11
3
+
4
+ # Set up a new user named "user" with user ID 1000
5
+ RUN useradd -m -u 1000 user
6
+
7
+ # Switch to the "user" context
8
+ USER user
9
+
10
+ # Set home directory and path variables
11
+ ENV HOME=/home/user \
12
+ PATH=/home/user/.local/bin:$PATH
13
+
14
+ WORKDIR $HOME/app
15
+
16
+ COPY --chown=user ./requirements.txt $HOME/app/requirements.txt
17
+ RUN pip install --no-cache-dir --upgrade -r $HOME/app/requirements.txt
18
+
19
+ COPY --chown=user . $HOME/app
20
+
21
+ EXPOSE 7860
22
+
23
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from transformers import pipeline
5
+ import re
6
+ import time
7
+
8
+ app = FastAPI(
9
+ title="Toxicity Classification API",
10
+ description="A microservice for detecting hate speech in social media text."
11
+ )
12
+
13
+ # 1. ADD CORS MIDDLEWARE
14
+ app.add_middleware(
15
+ CORSMiddleware,
16
+ allow_origins=["*"],
17
+ allow_credentials=True,
18
+ allow_methods=["*"],
19
+ allow_headers=["*"],
20
+ )
21
+
22
+ # Load the Cardiff NLP Twitter RoBERTa model
23
+ print("Loading RoBERTa Model... This may take a moment.")
24
+ classifier = pipeline(
25
+ "text-classification",
26
+ model="cardiffnlp/twitter-roberta-base-hate-latest"
27
+ )
28
+ print("Model loaded successfully!")
29
+
30
+ class TweetRequest(BaseModel):
31
+ text: str
32
+
33
+ def preprocess_tweet(text: str) -> str:
34
+ text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
35
+ text = re.sub(r'\@\w+', '@user', text)
36
+ return text.strip()
37
+
38
+ @app.post("/api/detect")
39
+ def detect_toxicity(request: TweetRequest):
40
+ start_time = time.time()
41
+
42
+ clean_text = preprocess_tweet(request.text)
43
+
44
+ # Edge case: If the tweet was only a link and is now empty
45
+ if not clean_text:
46
+ return {"error": "Tweet contains no readable text after preprocessing."}
47
+
48
+ try:
49
+ # 2. ADD TRUNCATION: Prevents the API from crashing if the text is over 512 tokens
50
+ result = classifier(clean_text, truncation=True, max_length=512)[0]
51
+
52
+ # Clean up the output for the frontend
53
+ label = result['label'].upper()
54
+ confidence = round(result['score'] * 100, 2)
55
+ process_time = round((time.time() - start_time) * 1000, 2)
56
+
57
+ return {
58
+ "original_tweet": request.text,
59
+ "clean_text": clean_text,
60
+ "prediction": {
61
+ "label": label,
62
+ "confidence_score": f"{confidence}%",
63
+ "is_toxic": label == "HATE"
64
+ },
65
+ "metadata": {
66
+ "processing_time_ms": process_time,
67
+ "model_used": "cardiffnlp/twitter-roberta-base-hate-latest"
68
+ }
69
+ }
70
+ except Exception as e:
71
+ # Catch any unexpected model errors cleanly instead of crashing the server
72
+ raise HTTPException(status_code=500, detail=f"Model inference failed: {str(e)}")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pydantic
4
+ transformers
5
+ torch