AI_Detector_2

Sleeping

App Files Files Community

mahmoudsaber0 commited on Oct 18

Commit

dde6bd9

verified ·

1 Parent(s): 080d131

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -68

app.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import os
-from fastapi import FastAPI, WebSocket, UploadFile, File
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
 import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
-import asyncio
 # =====================================================
-# ✅ Fix Hugging Face Cache Permission Errors
 # =====================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.environ["HF_HOME"] = CACHE_DIR
@@ -17,80 +16,102 @@ os.environ["HF_HUB_CACHE"] = CACHE_DIR
 os.makedirs(CACHE_DIR, exist_ok=True)
 # =====================================================
-# ✅ Initialize Model and Tokenizer
 # =====================================================
-MODEL_NAME = "answerdotai/ModernBERT-base"
-print("Loading model and tokenizer...")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
-classifier = pipeline(
-    "text-classification",
-    model=model,
-    tokenizer=tokenizer,
-    device=0 if torch.cuda.is_available() else -1
-)
 # =====================================================
-# ✅ FastAPI App Setup
 # =====================================================
-app = FastAPI(title="ModernBERT FastAPI Server")
-# Allow all origins (for testing)
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
 # =====================================================
-# ✅ REST Endpoint Example
 # =====================================================
-@app.post("/analyze")
-async def analyze_text(data: dict):
-    try:
-        text = data.get("text", "")
-        if not text.strip():
-            return JSONResponse({"error": "Empty text provided"}, status_code=400)
-        result = classifier(text)
-        return {"result": result}
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-# =====================================================
-# ✅ WebSocket Endpoint (real-time classification)
-# =====================================================
-@app.websocket("/ws")
-async def websocket_endpoint(ws: WebSocket):
-    await ws.accept()
-    idle_timeout = 60  # seconds
-    async def close_if_idle():
-        while True:
-            await asyncio.sleep(idle_timeout)
-            await ws.close(code=1000)
-            break
-    asyncio.create_task(close_if_idle())
-    try:
-        while True:
-            message = await ws.receive_text()
-            if message.lower() in ["exit", "quit"]:
-                await ws.close(code=1000)
-                break
-            result = classifier(message)
-            await ws.send_json(result)
-    except Exception:
-        await ws.close()
 # =====================================================
-# ✅ Root Endpoint
 # =====================================================
 @app.get("/")
-def home():
-    return {"status": "ok", "model": MODEL_NAME, "device": "cuda" if torch.cuda.is_available() else "cpu"}

 import os
+import re
 import torch
+from fastapi import FastAPI
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 # =====================================================
+# ✅ Safe Hugging Face Cache Configuration
 # =====================================================
 CACHE_DIR = "/tmp/hf_cache"
 os.environ["HF_HOME"] = CACHE_DIR
 os.makedirs(CACHE_DIR, exist_ok=True)
 # =====================================================
+# ✅ Load Model and Tokenizer
 # =====================================================
+MODEL_NAME = "roberta-base-openai-detector"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
+app = FastAPI(title="AI Text Detector")
 # =====================================================
+# ✅ Input Schema
 # =====================================================
+class InputText(BaseModel):
+    text: str
 # =====================================================
+# ✅ Helper Functions
 # =====================================================
+def split_into_paragraphs(text: str):
+    """Split text into paragraphs by double newlines or long single breaks."""
+    paragraphs = re.split(r'\n\s*\n', text.strip())
+    paragraphs = [p.strip() for p in paragraphs if len(p.strip()) > 0]
+    return paragraphs
+def analyze_text_block(text: str):
+    """Analyze a single paragraph and return AI/Human probability."""
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    probs = torch.softmax(logits, dim=1)[0].tolist()
+    return {
+        "label_scores": {
+            model.config.id2label[0]: round(probs[0], 4),
+            model.config.id2label[1]: round(probs[1], 4)
+        },
+        "ai_generated_score": probs[1],
+        "human_written_score": probs[0],
+        "is_ai": probs[1] > probs[0]
+    }
 # =====================================================
+# ✅ Routes
 # =====================================================
 @app.get("/")
+def root():
+    return {"message": "AI Text Detector is running. Use POST /analyze with {'text': 'your text'}"}
+@app.post("/analyze")
+async def analyze(data: InputText):
+    text = data.text.strip()
+    if not text:
+        return {"success": False, "code": 400, "message": "Empty input text"}
+    paragraphs = split_into_paragraphs(text)
+    results = []
+    ai_words, total_words = 0, 0
+    for paragraph in paragraphs:
+        res = analyze_text_block(paragraph)
+        results.append({
+            "paragraph": paragraph,
+            "ai_generated_score": res["ai_generated_score"],
+            "human_written_score": res["human_written_score"]
+        })
+        word_count = len(paragraph.split())
+        total_words += word_count
+        ai_words += word_count * res["ai_generated_score"]
+    fake_percentage = round((ai_words / total_words) * 100, 2) if total_words > 0 else 0
+    feedback = (
+        "Most of Your Text is AI/GPT Generated"
+        if fake_percentage > 50
+        else "Most of Your Text Appears Human-Written"
+    )
+    return {
+        "success": True,
+        "code": 200,
+        "message": "detection result passed to proxy",
+        "data": {
+            "sentences": [],
+            "isHuman": round(100 - fake_percentage, 2),
+            "additional_feedback": "",
+            "h": [r["paragraph"] for r in results],
+            "hi": [],
+            "textWords": total_words,
+            "aiWords": int(total_words * (fake_percentage / 100)),
+            "fakePercentage": fake_percentage,
+            "specialIndexes": [],
+            "specialSentences": [],
+            "originalParagraph": text,
+            "feedback": feedback,
+            "input_text": text,
+            "detected_language": "en"
+        }
+    }