Spaces:

um41r
/

Text-Api

Running

App Files Files Community

um41r commited on 24 days ago

Commit

fa05caa

verified ·

1 Parent(s): 4e4359d

Upload 3 files

Browse files

Files changed (3) hide show

README.md +46 -8
api.py +166 -0
requirements1.txt +5 -0

README.md CHANGED Viewed

@@ -1,12 +1,50 @@
 ---
-title: Text Api
-emoji: 🐢
-colorFrom: green
-colorTo: indigo
-sdk: gradio
-sdk_version: 6.9.0
-app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: AI Text Detector API
+emoji: 🔍
+colorFrom: red
+colorTo: green
+sdk: docker
 pinned: false
 ---
+# AI Text Detector — REST API
+FastAPI wrapper around [`openai-community/roberta-base-openai-detector`](https://huggingface.co/openai-community/roberta-base-openai-detector).
+## Endpoints
+| Method | Path | Description |
+|--------|------|-------------|
+| `GET`  | `/`  | Health check |
+| `POST` | `/detect` | Analyse text |
+## POST /detect
+**Request body**
+```json
+{
+  "text": "Paste the text you want to analyse here."
+}
+```
+**Response**
+```json
+{
+  "label": "AI",
+  "ai_probability": 0.92,
+  "human_probability": 0.08,
+  "confidence": 0.92,
+  "total_chunks": 3,
+  "ai_chunks": 3,
+  "human_chunks": 0,
+  "chunks": [
+    {
+      "text": "In the rapidly evolving landscape...",
+      "ai_probability": 0.94,
+      "human_probability": 0.06,
+      "label": "AI",
+      "confidence": 0.94
+    }
+  ]
+}
+```

api.py ADDED Viewed

	@@ -0,0 +1,166 @@

+"""
+AI Text Detector — FastAPI backend
+Model: openai-community/roberta-base-openai-detector
+"""
+from __future__ import annotations
+import re
+from contextlib import asynccontextmanager
+from typing import Annotated
+import torch
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from transformers import pipeline
+# ─── Config ────────────────────────────────────────────────────────────────────
+MODEL_ID = "openai-community/roberta-base-openai-detector"
+# ─── Lifespan (load model once at startup) ─────────────────────────────────────
+classifier = None  # filled in lifespan
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global classifier
+    print(f"Loading model {MODEL_ID} …")
+    classifier = pipeline(
+        "text-classification",
+        model=MODEL_ID,
+        device=0 if torch.cuda.is_available() else -1,
+    )
+    print("Model ready.")
+    yield
+    # Nothing to clean up
+# ─── App ───────────────────────────────────────────────────────────────────────
+app = FastAPI(
+    title="AI Text Detector API",
+    description="Detects whether text is human-written or AI-generated.",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+# Allow all origins so your website can call this freely.
+# Restrict `allow_origins` to your domain in production.
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["POST", "GET"],
+    allow_headers=["*"],
+)
+# ─── Helpers ───────────────────────────────────────────────────────────────────
+def split_into_chunks(text: str) -> list[str]:
+    """Split text into ~80-word chunks, respecting paragraph / sentence boundaries."""
+    chunks: list[str] = []
+    paragraphs = [p.strip() for p in text.split("\n") if p.strip()] or [text.strip()]
+    for para in paragraphs:
+        sentences = re.split(r"(?<=[.!?])\s+", para)
+        current = ""
+        for sent in sentences:
+            if len((current + " " + sent).split()) > 80:
+                if current.strip():
+                    chunks.append(current.strip())
+                current = sent
+            else:
+                current = (current + " " + sent).strip()
+        if current.strip():
+            chunks.append(current.strip())
+    return chunks or [text.strip()]
+# ─── Schemas ───────────────────────────────────────────────────────────────────
+class DetectRequest(BaseModel):
+    text: Annotated[str, Field(min_length=1, max_length=50_000, description="Text to analyse")]
+class ChunkResult(BaseModel):
+    text: str
+    ai_probability: float
+    human_probability: float
+    label: str  # "AI" | "Human"
+    confidence: float
+class DetectResponse(BaseModel):
+    label: str                # "AI" | "Human"
+    ai_probability: float
+    human_probability: float
+    confidence: float
+    chunks: list[ChunkResult]
+    total_chunks: int
+    ai_chunks: int
+    human_chunks: int
+# ─── Routes ────────────────────────────────────────────────────────────────────
+@app.get("/", tags=["health"])
+async def health():
+    return {"status": "ok", "model": MODEL_ID}
+@app.post("/detect", response_model=DetectResponse, tags=["detection"])
+async def detect(body: DetectRequest):
+    if classifier is None:
+        raise HTTPException(status_code=503, detail="Model not loaded yet — try again shortly.")
+    chunks = split_into_chunks(body.text)
+    raw = classifier(chunks, truncation=True, max_length=512, batch_size=8)
+    chunk_results: list[ChunkResult] = []
+    ai_probs: list[float] = []
+    word_counts: list[int] = []
+    for chunk, res in zip(chunks, raw):
+        ai_prob = res["score"] if res["label"] == "Fake" else 1.0 - res["score"]
+        human_prob = 1.0 - ai_prob
+        is_ai = ai_prob >= 0.5
+        label = "AI" if is_ai else "Human"
+        conf = ai_prob if is_ai else human_prob
+        chunk_results.append(
+            ChunkResult(
+                text=chunk,
+                ai_probability=round(ai_prob, 4),
+                human_probability=round(human_prob, 4),
+                label=label,
+                confidence=round(conf, 4),
+            )
+        )
+        ai_probs.append(ai_prob)
+        word_counts.append(len(chunk.split()))
+    total_words = sum(word_counts)
+    avg_ai = sum(p * w for p, w in zip(ai_probs, word_counts)) / total_words
+    avg_human = 1.0 - avg_ai
+    overall_label = "AI" if avg_ai >= 0.5 else "Human"
+    overall_conf = avg_ai if overall_label == "AI" else avg_human
+    ai_chunks = sum(1 for p in ai_probs if p >= 0.5)
+    return DetectResponse(
+        label=overall_label,
+        ai_probability=round(avg_ai, 4),
+        human_probability=round(avg_human, 4),
+        confidence=round(overall_conf, 4),
+        chunks=chunk_results,
+        total_chunks=len(chunks),
+        ai_chunks=ai_chunks,
+        human_chunks=len(chunks) - ai_chunks,
+    )

requirements1.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi==0.111.0
+uvicorn[standard]==0.29.0
+transformers==4.41.0
+torch==2.3.0
+pydantic==2.7.1