um41r commited on
Commit
fa05caa
Β·
verified Β·
1 Parent(s): 4e4359d

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +46 -8
  2. api.py +166 -0
  3. requirements1.txt +5 -0
README.md CHANGED
@@ -1,12 +1,50 @@
1
  ---
2
- title: Text Api
3
- emoji: 🐒
4
- colorFrom: green
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 6.9.0
8
- app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: AI Text Detector API
3
+ emoji: πŸ”
4
+ colorFrom: red
5
+ colorTo: green
6
+ sdk: docker
 
 
7
  pinned: false
8
  ---
9
 
10
+ # AI Text Detector β€” REST API
11
+
12
+ FastAPI wrapper around [`openai-community/roberta-base-openai-detector`](https://huggingface.co/openai-community/roberta-base-openai-detector).
13
+
14
+ ## Endpoints
15
+
16
+ | Method | Path | Description |
17
+ |--------|------|-------------|
18
+ | `GET` | `/` | Health check |
19
+ | `POST` | `/detect` | Analyse text |
20
+
21
+ ## POST /detect
22
+
23
+ **Request body**
24
+ ```json
25
+ {
26
+ "text": "Paste the text you want to analyse here."
27
+ }
28
+ ```
29
+
30
+ **Response**
31
+ ```json
32
+ {
33
+ "label": "AI",
34
+ "ai_probability": 0.92,
35
+ "human_probability": 0.08,
36
+ "confidence": 0.92,
37
+ "total_chunks": 3,
38
+ "ai_chunks": 3,
39
+ "human_chunks": 0,
40
+ "chunks": [
41
+ {
42
+ "text": "In the rapidly evolving landscape...",
43
+ "ai_probability": 0.94,
44
+ "human_probability": 0.06,
45
+ "label": "AI",
46
+ "confidence": 0.94
47
+ }
48
+ ]
49
+ }
50
+ ```
api.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ AI Text Detector β€” FastAPI backend
3
+ Model: openai-community/roberta-base-openai-detector
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import re
9
+ from contextlib import asynccontextmanager
10
+ from typing import Annotated
11
+
12
+ import torch
13
+ from fastapi import FastAPI, HTTPException
14
+ from fastapi.middleware.cors import CORSMiddleware
15
+ from pydantic import BaseModel, Field
16
+ from transformers import pipeline
17
+
18
+ # ─── Config ────────────────────────────────────────────────────────────────────
19
+
20
+ MODEL_ID = "openai-community/roberta-base-openai-detector"
21
+
22
+ # ─── Lifespan (load model once at startup) ─────────────────────────────────────
23
+
24
+ classifier = None # filled in lifespan
25
+
26
+
27
+ @asynccontextmanager
28
+ async def lifespan(app: FastAPI):
29
+ global classifier
30
+ print(f"Loading model {MODEL_ID} …")
31
+ classifier = pipeline(
32
+ "text-classification",
33
+ model=MODEL_ID,
34
+ device=0 if torch.cuda.is_available() else -1,
35
+ )
36
+ print("Model ready.")
37
+ yield
38
+ # Nothing to clean up
39
+
40
+
41
+ # ─── App ───────────────────────────────────────────────────────────────────────
42
+
43
+ app = FastAPI(
44
+ title="AI Text Detector API",
45
+ description="Detects whether text is human-written or AI-generated.",
46
+ version="1.0.0",
47
+ lifespan=lifespan,
48
+ )
49
+
50
+ # Allow all origins so your website can call this freely.
51
+ # Restrict `allow_origins` to your domain in production.
52
+ app.add_middleware(
53
+ CORSMiddleware,
54
+ allow_origins=["*"],
55
+ allow_methods=["POST", "GET"],
56
+ allow_headers=["*"],
57
+ )
58
+
59
+ # ─── Helpers ───────────────────────────────────────────────────────────────────
60
+
61
+
62
+ def split_into_chunks(text: str) -> list[str]:
63
+ """Split text into ~80-word chunks, respecting paragraph / sentence boundaries."""
64
+ chunks: list[str] = []
65
+ paragraphs = [p.strip() for p in text.split("\n") if p.strip()] or [text.strip()]
66
+
67
+ for para in paragraphs:
68
+ sentences = re.split(r"(?<=[.!?])\s+", para)
69
+ current = ""
70
+ for sent in sentences:
71
+ if len((current + " " + sent).split()) > 80:
72
+ if current.strip():
73
+ chunks.append(current.strip())
74
+ current = sent
75
+ else:
76
+ current = (current + " " + sent).strip()
77
+ if current.strip():
78
+ chunks.append(current.strip())
79
+
80
+ return chunks or [text.strip()]
81
+
82
+
83
+ # ─── Schemas ───────────────────────────────────────────────────────────────────
84
+
85
+
86
+ class DetectRequest(BaseModel):
87
+ text: Annotated[str, Field(min_length=1, max_length=50_000, description="Text to analyse")]
88
+
89
+
90
+ class ChunkResult(BaseModel):
91
+ text: str
92
+ ai_probability: float
93
+ human_probability: float
94
+ label: str # "AI" | "Human"
95
+ confidence: float
96
+
97
+
98
+ class DetectResponse(BaseModel):
99
+ label: str # "AI" | "Human"
100
+ ai_probability: float
101
+ human_probability: float
102
+ confidence: float
103
+ chunks: list[ChunkResult]
104
+ total_chunks: int
105
+ ai_chunks: int
106
+ human_chunks: int
107
+
108
+
109
+ # ─── Routes ────────────────────────────────────────────────────────────────────
110
+
111
+
112
+ @app.get("/", tags=["health"])
113
+ async def health():
114
+ return {"status": "ok", "model": MODEL_ID}
115
+
116
+
117
+ @app.post("/detect", response_model=DetectResponse, tags=["detection"])
118
+ async def detect(body: DetectRequest):
119
+ if classifier is None:
120
+ raise HTTPException(status_code=503, detail="Model not loaded yet β€” try again shortly.")
121
+
122
+ chunks = split_into_chunks(body.text)
123
+
124
+ raw = classifier(chunks, truncation=True, max_length=512, batch_size=8)
125
+
126
+ chunk_results: list[ChunkResult] = []
127
+ ai_probs: list[float] = []
128
+ word_counts: list[int] = []
129
+
130
+ for chunk, res in zip(chunks, raw):
131
+ ai_prob = res["score"] if res["label"] == "Fake" else 1.0 - res["score"]
132
+ human_prob = 1.0 - ai_prob
133
+ is_ai = ai_prob >= 0.5
134
+ label = "AI" if is_ai else "Human"
135
+ conf = ai_prob if is_ai else human_prob
136
+
137
+ chunk_results.append(
138
+ ChunkResult(
139
+ text=chunk,
140
+ ai_probability=round(ai_prob, 4),
141
+ human_probability=round(human_prob, 4),
142
+ label=label,
143
+ confidence=round(conf, 4),
144
+ )
145
+ )
146
+ ai_probs.append(ai_prob)
147
+ word_counts.append(len(chunk.split()))
148
+
149
+ total_words = sum(word_counts)
150
+ avg_ai = sum(p * w for p, w in zip(ai_probs, word_counts)) / total_words
151
+ avg_human = 1.0 - avg_ai
152
+ overall_label = "AI" if avg_ai >= 0.5 else "Human"
153
+ overall_conf = avg_ai if overall_label == "AI" else avg_human
154
+
155
+ ai_chunks = sum(1 for p in ai_probs if p >= 0.5)
156
+
157
+ return DetectResponse(
158
+ label=overall_label,
159
+ ai_probability=round(avg_ai, 4),
160
+ human_probability=round(avg_human, 4),
161
+ confidence=round(overall_conf, 4),
162
+ chunks=chunk_results,
163
+ total_chunks=len(chunks),
164
+ ai_chunks=ai_chunks,
165
+ human_chunks=len(chunks) - ai_chunks,
166
+ )
requirements1.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi==0.111.0
2
+ uvicorn[standard]==0.29.0
3
+ transformers==4.41.0
4
+ torch==2.3.0
5
+ pydantic==2.7.1