agentsay commited on
Commit
e66fc58
·
verified ·
1 Parent(s): 97bea1f

Upload 3 files

Browse files
Files changed (3) hide show
  1. DockerFIle +22 -0
  2. main.py +70 -0
  3. requirements.txt +5 -0
DockerFIle ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Use an official Python runtime as the base image
3
+ FROM python:3.9-slim
4
+
5
+ # Set working directory
6
+ WORKDIR /app
7
+
8
+ # Copy requirements file
9
+ COPY requirements.txt .
10
+
11
+ # Install dependencies
12
+ RUN pip install --no-cache-dir -r requirements.txt
13
+
14
+ # Copy the FastAPI application code
15
+ COPY main.py .
16
+
17
+ # Expose port 8000 for the FastAPI app
18
+ EXPOSE 8000
19
+
20
+ # Command to run the application with Uvicorn
21
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
22
+ ```
main.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from transformers import pipeline
4
+ from typing import Dict
5
+
6
+ # Initialize FastAPI app
7
+ app = FastAPI(title="Toxicity Detection API", description="API to detect hate/toxicity in text using unitary/unbiased-toxic-roberta")
8
+
9
+ # Load pretrained hate/toxicity detection model
10
+ classifier = pipeline("text-classification", model="unitary/unbiased-toxic-roberta", top_k=None)
11
+
12
+ THRESHOLD = 0.6 # 60%
13
+
14
+ # Pydantic model for request body
15
+ class TextInput(BaseModel):
16
+ text: str
17
+
18
+ def check_hate(text: str) -> Dict:
19
+ # Get model predictions
20
+ results = classifier(text)[0] # List of dicts with label and score
21
+
22
+ # Define toxic labels as per the model
23
+ toxic_labels = {"toxic", "insult", "obscene", "identity_attack", "threat", "sexual_explicit"}
24
+
25
+ # Initialize variables
26
+ flagged = False
27
+ prediction = "✅ Clean"
28
+ max_toxic_score = 0.0
29
+ max_toxic_label = "non_toxic"
30
+
31
+ # Check all labels for toxicity
32
+ for result in results:
33
+ label = result['label'].lower()
34
+ score = result['score']
35
+ if label in toxic_labels and score >= THRESHOLD:
36
+ flagged = True
37
+ prediction = "⚠️ Hate/Toxic"
38
+ if score > max_toxic_score:
39
+ max_toxic_score = score
40
+ max_toxic_label = label
41
+
42
+ # If no toxic labels are found, use the highest-scoring label
43
+ if not flagged:
44
+ best = max(results, key=lambda x: x['score'])
45
+ max_toxic_label = best['label'].lower()
46
+ max_toxic_score = best['score']
47
+
48
+ return {
49
+ "text": text,
50
+ "prediction": prediction,
51
+ "confidence": round(max_toxic_score, 2),
52
+ "flagged": flagged,
53
+ "label": max_toxic_label
54
+ }
55
+
56
+ # API endpoint to check toxicity
57
+ @app.post("/check-toxicity", response_model=Dict)
58
+ async def check_toxicity(input: TextInput):
59
+ try:
60
+ if not input.text.strip():
61
+ raise HTTPException(status_code=400, detail="Text input cannot be empty")
62
+ result = check_hate(input.text)
63
+ return result
64
+ except Exception as e:
65
+ raise HTTPException(status_code=500, detail=f"Error processing text: {str(e)}")
66
+
67
+ # Root endpoint for API welcome message
68
+ @app.get("/")
69
+ async def root():
70
+ return {"message": "Welcome to the Toxicity Detection API. Use POST /check-toxicity with a JSON body containing 'text'."}
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi==0.115.2
2
+ uvicorn==0.32.0
3
+ transformers==4.44.2
4
+ pydantic==2.9.2
5
+ torch==2.4.1