AI_Detector_2

Sleeping

App Files Files Community

mahmoudsaber0 commited on Oct 17

Commit

a03c764

verified ·

1 Parent(s): 9cf13c3

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -79

app.py CHANGED Viewed

@@ -1,99 +1,78 @@
-import re
-import torch
 from fastapi import FastAPI, Request
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from pydantic import BaseModel
-from typing import List
-import uvicorn
-# ========== CONFIG ==========
-MODEL_PATH = "roberta-base-openai-detector"  # or your preferred detector
-device = "cuda" if torch.cuda.is_available() else "cpu"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
-model_1 = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH).to(device)
-model_2 = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH).to(device)
-model_3 = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH).to(device)
-label_mapping = {
-    0: "gpt2", 1: "gpt3", 2: "gpt4", 3: "chatgpt", 4: "dolly", 5: "human", 24: "human"
-}
-app = FastAPI(title="AI Text Classifier API", version="1.0.0")
-# ========== HELPERS ==========
 def clean_text(text: str) -> str:
-    text = re.sub(r'\s+', ' ', text)
     return text.strip()
-# ========== INPUT MODEL ==========
-class TextInput(BaseModel):
-    text: str
-# ========== MAIN LOGIC ==========
-@app.post("/analyze")
-async def analyze_text(data: TextInput):
-    cleaned_text = clean_text(data.text)
-    if not cleaned_text.strip():
-        return {"success": False, "error": "Empty text provided"}
-    paragraphs = [p.strip() for p in re.split(r'\n{2,}', cleaned_text) if p.strip()]
-    if not paragraphs:
-        paragraphs = [cleaned_text]
-    chunk_scores = []
-    all_probs = []
-    for paragraph in paragraphs:
-        inputs = tokenizer(paragraph, return_tensors="pt", truncation=True, padding=True).to(device)
         with torch.no_grad():
-            logits_1 = model_1(**inputs).logits
-            logits_2 = model_2(**inputs).logits
-            logits_3 = model_3(**inputs).logits
-            softmax_1 = torch.softmax(logits_1, dim=1)
-            softmax_2 = torch.softmax(logits_2, dim=1)
-            softmax_3 = torch.softmax(logits_3, dim=1)
-            averaged = (softmax_1 + softmax_2 + softmax_3) / 3
-            probs = averaged[0]
-            all_probs.append(probs.cpu())
-        human_prob = probs[24].item() if 24 in label_mapping else probs[-1].item()
-        ai_probs_clone = probs.clone()
-        ai_probs_clone[24] = 0
-        ai_total = ai_probs_clone.sum().item()
-        total = human_prob + ai_total
-        human_pct = (human_prob / total) * 100
-        ai_pct = (ai_total / total) * 100
-        ai_model = label_mapping[torch.argmax(ai_probs_clone).item()]
-        chunk_scores.append({
-            "human": round(human_pct, 2),
-            "ai": round(ai_pct, 2),
-            "model": ai_model,
-            "text_preview": paragraph[:250].replace('\n', ' ') + ("..." if len(paragraph) > 250 else "")
         })
-    # ---- OVERALL ----
-    avg_human = sum(c["human"] for c in chunk_scores) / len(chunk_scores)
-    avg_ai = sum(c["ai"] for c in chunk_scores) / len(chunk_scores)
-    if avg_ai > avg_human:
-        top_model = max(chunk_scores, key=lambda c: c["ai"])["model"]
-        overall = {"result": f"{avg_ai:.2f}% AI-generated", "model": top_model}
-    else:
-        overall = {"result": f"{avg_human:.2f}% Human-written", "model": "human"}
     return {
-        "success": True,
-        "overall": overall,
-        "paragraphs": chunk_scores,
-        "total_paragraphs": len(chunk_scores)
     }
-# ========== RUN LOCALLY ==========
-if __name__ == "__main__":
-    uvicorn.run("app:app", host="0.0.0.0", port=8000)

 from fastapi import FastAPI, Request
 from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import re
+app = FastAPI(title="AI Text Detector API")
+# Device setup
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load model (use small model for Hugging Face to prevent restarts)
+MODEL_NAME = "roberta-base-openai-detector"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME).to(device)
+model.eval()
+# --- Text Cleaning ---
 def clean_text(text: str) -> str:
+    text = re.sub(r'\s{2,}', ' ', text)
+    text = re.sub(r'\s+([,.;:?!])', r'\1', text)
     return text.strip()
+# --- Paragraph Splitter ---
+def split_paragraphs(text: str):
+    return [p.strip() for p in re.split(r'\n{2,}', text) if p.strip()]
+# --- Classification ---
+def analyze_text(text: str):
+    text = clean_text(text)
+    paragraphs = split_paragraphs(text)
+    paragraph_results = []
+    total_ai, total_human = 0, 0
+    for i, p in enumerate(paragraphs, 1):
+        inputs = tokenizer(p, return_tensors="pt", truncation=True, padding=True).to(device)
         with torch.no_grad():
+            logits = model(**inputs).logits
+            probs = torch.softmax(logits, dim=1)[0]
+            ai_score = float(probs[1].item() * 100)
+            human_score = float(probs[0].item() * 100)
+        total_ai += ai_score
+        total_human += human_score
+        paragraph_results.append({
+            "paragraph_number": i,
+            "ai_probability": round(ai_score, 2),
+            "human_probability": round(human_score, 2),
+            "text_snippet": p[:150] + ("..." if len(p) > 150 else "")
         })
+    avg_ai = total_ai / len(paragraphs)
+    avg_human = total_human / len(paragraphs)
+    overall_label = "AI-generated" if avg_ai > avg_human else "Human-written"
     return {
+        "overall_result": {
+            "ai_percentage": round(avg_ai, 2),
+            "human_percentage": round(avg_human, 2),
+            "label": overall_label
+        },
+        "paragraphs": paragraph_results
     }
+# --- Request Schema ---
+class TextInput(BaseModel):
+    text: str
+# --- API Routes ---
+@app.get("/")
+async def root():
+    return {"status": "ok", "message": "AI Text Detector API is running."}
+@app.post("/analyze")
+async def analyze(input_data: TextInput):
+    return analyze_text(input_data.text)