Spaces:
Sleeping
Sleeping
Added the files adding system and formated the files system
Browse files- Dockerfile +0 -34
- MODEL/app.py +0 -18
- MODEL/readme.md +0 -61
- MODEL/requirements.txt +0 -1
- Procfile +1 -0
- README.md +0 -268
- __init__.py +0 -0
- app.py +8 -90
- features/text_classifier/__init__.py +0 -0
- features/text_classifier/controller.py +54 -0
- features/text_classifier/inferencer.py +27 -0
- features/text_classifier/model_loader.py +50 -0
- features/text_classifier/preprocess.py +30 -0
- features/text_classifier/routes.py +28 -0
- readme.md +316 -1
- requirements.txt +11 -7
Dockerfile
DELETED
|
@@ -1,34 +0,0 @@
|
|
| 1 |
-
# Use the latest slim Python 3.11 image
|
| 2 |
-
FROM python:3.11-slim
|
| 3 |
-
|
| 4 |
-
# Set environment variables
|
| 5 |
-
ENV HOME=/home/user \
|
| 6 |
-
PATH=/home/user/.local/bin:$PATH \
|
| 7 |
-
PYTHONDONTWRITEBYTECODE=1 \
|
| 8 |
-
PYTHONUNBUFFERED=1
|
| 9 |
-
|
| 10 |
-
# Install system dependencies
|
| 11 |
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 12 |
-
build-essential \
|
| 13 |
-
git \
|
| 14 |
-
curl \
|
| 15 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 16 |
-
|
| 17 |
-
# Create a non-root user for safety
|
| 18 |
-
RUN useradd -ms /bin/bash user
|
| 19 |
-
USER user
|
| 20 |
-
WORKDIR $HOME/app
|
| 21 |
-
|
| 22 |
-
# Copy app source code
|
| 23 |
-
COPY --chown=user . .
|
| 24 |
-
|
| 25 |
-
# Install Python dependencies
|
| 26 |
-
RUN pip install --no-cache-dir --upgrade pip \
|
| 27 |
-
&& pip install --no-cache-dir -r requirements.txt
|
| 28 |
-
|
| 29 |
-
# Expose port
|
| 30 |
-
EXPOSE 7860
|
| 31 |
-
|
| 32 |
-
# Start the FastAPI app using uvicorn
|
| 33 |
-
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL/app.py
DELETED
|
@@ -1,18 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
from huggingface_hub import Repository
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
def download_repo():
|
| 6 |
-
hf_token = os.getenv("HF_TOKEN")
|
| 7 |
-
if not hf_token:
|
| 8 |
-
raise ValueError("HF_TOKEN not found in environment variables.")
|
| 9 |
-
|
| 10 |
-
repo_id = "can-org/AIModel"
|
| 11 |
-
local_dir = "../Ai-Text-Detector/"
|
| 12 |
-
|
| 13 |
-
repo = Repository(local_dir, clone_from=repo_id, token=hf_token)
|
| 14 |
-
print(f"Repository downloaded to: {local_dir}")
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
if __name__ == "__main__":
|
| 18 |
-
download_repo()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL/readme.md
DELETED
|
@@ -1,61 +0,0 @@
|
|
| 1 |
-
### Hugging Face CLI Tool
|
| 2 |
-
|
| 3 |
-
This CLI tool allows you to **upload** and **download** models from Hugging Face repositories. It requires an **Hugging Face Access Token (`HF_TOKEN`)** for authentication, especially for private repositories.
|
| 4 |
-
|
| 5 |
-
### Prerequisites
|
| 6 |
-
|
| 7 |
-
1. **Install Hugging Face Hub**:
|
| 8 |
-
|
| 9 |
-
```bash
|
| 10 |
-
pip install huggingface_hub
|
| 11 |
-
```
|
| 12 |
-
|
| 13 |
-
2. **Get HF_TOKEN**:
|
| 14 |
-
- Log in to [Hugging Face](https://huggingface.co/).
|
| 15 |
-
- Go to **Settings** β **Access Tokens** β **Create a new token** with `read` and `write` permissions.
|
| 16 |
-
- Save the token.
|
| 17 |
-
|
| 18 |
-
### Usage
|
| 19 |
-
|
| 20 |
-
1. **Set the Token**:
|
| 21 |
-
|
| 22 |
-
- **Linux/macOS**:
|
| 23 |
-
```bash
|
| 24 |
-
export HF_TOKEN=your_token_here
|
| 25 |
-
```
|
| 26 |
-
- **Windows (CMD)**:
|
| 27 |
-
```bash
|
| 28 |
-
set HF_TOKEN=your_token_here
|
| 29 |
-
```
|
| 30 |
-
|
| 31 |
-
2. **Download Model**:
|
| 32 |
-
|
| 33 |
-
```bash
|
| 34 |
-
python main.py --download --repo-id <repo_name> --save-dir <local_save_path>
|
| 35 |
-
```
|
| 36 |
-
|
| 37 |
-
3. **Upload Model**:
|
| 38 |
-
```bash
|
| 39 |
-
python main.py --upload --repo-id <repo_name> --model-path <local_model_path>
|
| 40 |
-
```
|
| 41 |
-
|
| 42 |
-
### Example
|
| 43 |
-
|
| 44 |
-
To download a model:
|
| 45 |
-
|
| 46 |
-
```bash
|
| 47 |
-
python main.py
|
| 48 |
-
```
|
| 49 |
-
|
| 50 |
-
### Authentication
|
| 51 |
-
|
| 52 |
-
Ensure you set `HF_TOKEN` to access private repositories. If not set, the script will raise an error.
|
| 53 |
-
Hereβs a clearer and more polished version of that note:
|
| 54 |
-
|
| 55 |
-
---
|
| 56 |
-
|
| 57 |
-
### β οΈ Note
|
| 58 |
-
|
| 59 |
-
**Make sure to run this script from the `HuggingFace` directory to ensure correct path resolution and functionality.**
|
| 60 |
-
|
| 61 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL/requirements.txt
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
huggingface_hub
|
|
|
|
|
|
Procfile
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
web: uvicorn app:app --host 0.0.0.0 --port ${PORT:-8000}
|
README.md
DELETED
|
@@ -1,268 +0,0 @@
|
|
| 1 |
-
### **FastAPI AI**
|
| 2 |
-
|
| 3 |
-
This FastAPI app loads a GPT-2 model, tokenizes input text, classifies it, and returns whether the text is AI-generated or human-written.
|
| 4 |
-
|
| 5 |
-
### **install Dependencies**
|
| 6 |
-
|
| 7 |
-
```bash
|
| 8 |
-
pip install -r requirements.txt
|
| 9 |
-
|
| 10 |
-
```
|
| 11 |
-
|
| 12 |
-
This command installs all the dependencies listed in the `requirements.txt` file. It ensures that your environment has the required packages to run the project smoothly.
|
| 13 |
-
|
| 14 |
-
**NOTE: IF YOU HAVE DONE ANY CHANGES DON'NT FORGOT TO PUT IT IN THE REQUIREMENTS.TXT USING `bash pip freeze > requirements.txt `**
|
| 15 |
-
|
| 16 |
-
---
|
| 17 |
-
|
| 18 |
-
### **Functions**
|
| 19 |
-
|
| 20 |
-
1. **`load_model()`**
|
| 21 |
-
Loads the GPT-2 model and tokenizer from specified paths.
|
| 22 |
-
|
| 23 |
-
2. **`lifespan()`**
|
| 24 |
-
Manages the app's lifecycle: loads the model at startup and handles cleanup on shutdown.
|
| 25 |
-
|
| 26 |
-
3. **`classify_text_sync()`**
|
| 27 |
-
Synchronously tokenizes input text and classifies it using the GPT-2 model. Returns the classification and perplexity.
|
| 28 |
-
|
| 29 |
-
4. **`classify_text()`**
|
| 30 |
-
Asynchronously executes `classify_text_sync()` in a thread pool to ensure non-blocking processing.
|
| 31 |
-
|
| 32 |
-
5. **`analyze_text()`**
|
| 33 |
-
**POST** endpoint: accepts text input, classifies it using `classify_text()`, and returns the result with perplexity.
|
| 34 |
-
|
| 35 |
-
6. **`health_check()`**
|
| 36 |
-
**GET** endpoint: simple health check to confirm the API is running.
|
| 37 |
-
|
| 38 |
-
---
|
| 39 |
-
|
| 40 |
-
### **Code Overview**
|
| 41 |
-
|
| 42 |
-
### **Running and Load Balancing:**
|
| 43 |
-
|
| 44 |
-
To run the app in production with load balancing:
|
| 45 |
-
|
| 46 |
-
```bash
|
| 47 |
-
uvicorn app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 48 |
-
```
|
| 49 |
-
|
| 50 |
-
This command launches the FastAPI app.
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
### **Endpoints**
|
| 54 |
-
|
| 55 |
-
#### 1. **`/analyze`**
|
| 56 |
-
|
| 57 |
-
- **Method:** `POST`
|
| 58 |
-
- **Description:** Classifies whether the text is AI-generated or human-written.
|
| 59 |
-
- **Request:**
|
| 60 |
-
```json
|
| 61 |
-
{ "text": "sample text" }
|
| 62 |
-
```
|
| 63 |
-
- **Response:**
|
| 64 |
-
```json
|
| 65 |
-
{ "result": "AI-generated", "perplexity": 55.67 }
|
| 66 |
-
```
|
| 67 |
-
|
| 68 |
-
#### 2. **`/health`**
|
| 69 |
-
|
| 70 |
-
- **Method:** `GET`
|
| 71 |
-
- **Description:** Returns the status of the API.
|
| 72 |
-
- **Response:**
|
| 73 |
-
```json
|
| 74 |
-
{ "status": "ok" }
|
| 75 |
-
```
|
| 76 |
-
|
| 77 |
-
---
|
| 78 |
-
|
| 79 |
-
### **Running the API**
|
| 80 |
-
|
| 81 |
-
Start the server with:
|
| 82 |
-
|
| 83 |
-
```bash
|
| 84 |
-
uvicorn app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 85 |
-
```
|
| 86 |
-
|
| 87 |
-
---
|
| 88 |
-
|
| 89 |
-
### **π§ͺ Testing the API**
|
| 90 |
-
|
| 91 |
-
You can test the FastAPI endpoint using `curl` like this:
|
| 92 |
-
|
| 93 |
-
```bash
|
| 94 |
-
curl -X POST https://can-org-canspace.hf.space/analyze \
|
| 95 |
-
-H "Authorization: Bearer SECRET_CODE" \
|
| 96 |
-
-H "Content-Type: application/json" \
|
| 97 |
-
-d '{"text": "This is a sample sentence for analysis."}'
|
| 98 |
-
```
|
| 99 |
-
|
| 100 |
-
- The `-H "Authorization: Bearer SECRET_CODE"` part is used to simulate the **handshake**.
|
| 101 |
-
- FastAPI checks this token against the one loaded from the `.env` file.
|
| 102 |
-
- If the token matches, the request is accepted and processed.
|
| 103 |
-
- Otherwise, it responds with a `403 Unauthorized` error.
|
| 104 |
-
|
| 105 |
-
---
|
| 106 |
-
|
| 107 |
-
### **API Documentation**
|
| 108 |
-
|
| 109 |
-
- **Swagger UI:** `https://can-org-canspace.hf.space/docs` -> `/docs`
|
| 110 |
-
- **ReDoc:** `https://can-org-canspace.hf.space/redoc` -> `/redoc`
|
| 111 |
-
|
| 112 |
-
### **π Handshake Mechanism**
|
| 113 |
-
|
| 114 |
-
In this part, we're implementing a simple handshake to verify that the request is coming from a trusted source (e.g., our NestJS server). Here's how it works:
|
| 115 |
-
|
| 116 |
-
- We load a secret token from the `.env` file.
|
| 117 |
-
- When a request is made to the FastAPI server, we extract the `Authorization` header and compare it with our expected secret token.
|
| 118 |
-
- If the token does **not** match, we immediately return a **403 Forbidden** response with the message `"Unauthorized"`.
|
| 119 |
-
- If the token **does** match, we allow the request to proceed to the next step.
|
| 120 |
-
|
| 121 |
-
The verification function looks like this:
|
| 122 |
-
|
| 123 |
-
```python
|
| 124 |
-
def verify_token(auth: str):
|
| 125 |
-
if auth != f"Bearer {EXPECTED_TOKEN}":
|
| 126 |
-
raise HTTPException(status_code=403, detail="Unauthorized")
|
| 127 |
-
```
|
| 128 |
-
|
| 129 |
-
This provides a basic but effective layer of security to prevent unauthorized access to the API.
|
| 130 |
-
|
| 131 |
-
### **Implement it with NEST.js**
|
| 132 |
-
|
| 133 |
-
NOTE: Make an micro service in NEST.JS and implement it there and call it from app.controller.ts
|
| 134 |
-
|
| 135 |
-
in fastapi.service.ts file what we have done is
|
| 136 |
-
|
| 137 |
-
### Project Structure
|
| 138 |
-
|
| 139 |
-
```files
|
| 140 |
-
nestjs-fastapi-bridge/
|
| 141 |
-
βββ src/
|
| 142 |
-
β βββ app.controller.ts
|
| 143 |
-
β βββ app.module.ts
|
| 144 |
-
β βββ fastapi.service.ts
|
| 145 |
-
βββ .env
|
| 146 |
-
|
| 147 |
-
```
|
| 148 |
-
|
| 149 |
-
---
|
| 150 |
-
|
| 151 |
-
### Step-by-Step Setup
|
| 152 |
-
|
| 153 |
-
#### 1. `.env`
|
| 154 |
-
|
| 155 |
-
Create a `.env` file at the root with the following:
|
| 156 |
-
|
| 157 |
-
```environment
|
| 158 |
-
FASTAPI_BASE_URL=https://can-org-canspace.hf.space/
|
| 159 |
-
SECRET_TOKEN="SECRET_CODE_TOKEN"
|
| 160 |
-
```
|
| 161 |
-
|
| 162 |
-
#### 2. `fastapi.service.ts`
|
| 163 |
-
|
| 164 |
-
```javascript
|
| 165 |
-
// src/fastapi.service.ts
|
| 166 |
-
import { Injectable } from "@nestjs/common";
|
| 167 |
-
import { HttpService } from "@nestjs/axios";
|
| 168 |
-
import { ConfigService } from "@nestjs/config";
|
| 169 |
-
import { firstValueFrom } from "rxjs";
|
| 170 |
-
|
| 171 |
-
@Injectable()
|
| 172 |
-
export class FastAPIService {
|
| 173 |
-
constructor(
|
| 174 |
-
private http: HttpService,
|
| 175 |
-
private config: ConfigService,
|
| 176 |
-
) {}
|
| 177 |
-
|
| 178 |
-
async analyzeText(text: string) {
|
| 179 |
-
const url = `${this.config.get("FASTAPI_BASE_URL")}/analyze`;
|
| 180 |
-
const token = this.config.get("SECRET_TOKEN");
|
| 181 |
-
|
| 182 |
-
const response = await firstValueFrom(
|
| 183 |
-
this.http.post(
|
| 184 |
-
url,
|
| 185 |
-
{ text },
|
| 186 |
-
{
|
| 187 |
-
headers: {
|
| 188 |
-
Authorization: `Bearer ${token}`,
|
| 189 |
-
},
|
| 190 |
-
},
|
| 191 |
-
),
|
| 192 |
-
);
|
| 193 |
-
|
| 194 |
-
return response.data;
|
| 195 |
-
}
|
| 196 |
-
}
|
| 197 |
-
```
|
| 198 |
-
|
| 199 |
-
#### 3. `app.module.ts`
|
| 200 |
-
|
| 201 |
-
```javascript
|
| 202 |
-
// src/app.module.ts
|
| 203 |
-
import { Module } from "@nestjs/common";
|
| 204 |
-
import { ConfigModule } from "@nestjs/config";
|
| 205 |
-
import { HttpModule } from "@nestjs/axios";
|
| 206 |
-
import { AppController } from "./app.controller";
|
| 207 |
-
import { FastAPIService } from "./fastapi.service";
|
| 208 |
-
|
| 209 |
-
@Module({
|
| 210 |
-
imports: [ConfigModule.forRoot(), HttpModule],
|
| 211 |
-
controllers: [AppController],
|
| 212 |
-
providers: [FastAPIService],
|
| 213 |
-
})
|
| 214 |
-
export class AppModule {}
|
| 215 |
-
```
|
| 216 |
-
|
| 217 |
-
---
|
| 218 |
-
|
| 219 |
-
#### 4. `app.controller.ts`
|
| 220 |
-
|
| 221 |
-
```javascript
|
| 222 |
-
// src/app.controller.ts
|
| 223 |
-
import { Body, Controller, Post, Get, Query } from '@nestjs/common';
|
| 224 |
-
import { FastAPIService } from './fastapi.service';
|
| 225 |
-
|
| 226 |
-
@Controller()
|
| 227 |
-
export class AppController {
|
| 228 |
-
constructor(private readonly fastapiService: FastAPIService) {}
|
| 229 |
-
|
| 230 |
-
@Post('analyze-text')
|
| 231 |
-
async callFastAPI(@Body('text') text: string) {
|
| 232 |
-
return this.fastapiService.analyzeText(text);
|
| 233 |
-
}
|
| 234 |
-
|
| 235 |
-
@Get()
|
| 236 |
-
getHello(): string {
|
| 237 |
-
return 'NestJS is connected to FastAPI ';
|
| 238 |
-
}
|
| 239 |
-
}
|
| 240 |
-
```
|
| 241 |
-
|
| 242 |
-
### π How to Run
|
| 243 |
-
|
| 244 |
-
Run the server of flask and nest.js:
|
| 245 |
-
|
| 246 |
-
- for nest.js
|
| 247 |
-
```bash
|
| 248 |
-
npm run start
|
| 249 |
-
```
|
| 250 |
-
- for Fastapi
|
| 251 |
-
|
| 252 |
-
```bash
|
| 253 |
-
uvicorn app:app --reload
|
| 254 |
-
```
|
| 255 |
-
|
| 256 |
-
Make sure your FastAPI service is running at `http://localhost:8000`.
|
| 257 |
-
|
| 258 |
-
### Test with CURL
|
| 259 |
-
http://localhost:3000/-> Server of nest.js
|
| 260 |
-
```bash
|
| 261 |
-
curl -X POST http://localhost:3000/analyze-text \
|
| 262 |
-
-H 'Content-Type: application/json' \
|
| 263 |
-
-d '{"text": "This is a test input"}'
|
| 264 |
-
```
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
### MODEL
|
| 268 |
-
- You can download the model from the `/MODEL/app.py` file.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__init__.py
ADDED
|
File without changes
|
app.py
CHANGED
|
@@ -1,102 +1,20 @@
|
|
| 1 |
-
from fastapi import FastAPI
|
| 2 |
-
from fastapi.security import HTTPBearer
|
| 3 |
-
from pydantic import BaseModel
|
| 4 |
-
from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config
|
| 5 |
-
import torch
|
| 6 |
-
import asyncio
|
| 7 |
from contextlib import asynccontextmanager
|
|
|
|
|
|
|
| 8 |
|
| 9 |
-
# FastAPI app instance
|
| 10 |
-
app = FastAPI()
|
| 11 |
-
|
| 12 |
-
# Global model and tokenizer variables
|
| 13 |
-
model, tokenizer = None, None
|
| 14 |
-
|
| 15 |
-
# HTTPBearer instance for security
|
| 16 |
-
bearer_scheme = HTTPBearer()
|
| 17 |
-
|
| 18 |
-
# Function to load model and tokenizer
|
| 19 |
-
def load_model():
|
| 20 |
-
model_path = "./Ai-Text-Detector/model"
|
| 21 |
-
weights_path = "./Ai-Text-Detector/model_weights.pth"
|
| 22 |
|
| 23 |
-
try:
|
| 24 |
-
tokenizer = GPT2TokenizerFast.from_pretrained(model_path)
|
| 25 |
-
config = GPT2Config.from_pretrained(model_path)
|
| 26 |
-
model = GPT2LMHeadModel(config)
|
| 27 |
-
model.load_state_dict(torch.load(weights_path, map_location=torch.device("cpu")))
|
| 28 |
-
model.eval()
|
| 29 |
-
except Exception as e:
|
| 30 |
-
raise RuntimeError(f"Error loading model: {str(e)}")
|
| 31 |
-
|
| 32 |
-
return model, tokenizer
|
| 33 |
-
|
| 34 |
-
# Load model on app startup
|
| 35 |
@asynccontextmanager
|
| 36 |
async def lifespan(app: FastAPI):
|
| 37 |
-
|
| 38 |
-
model, tokenizer = load_model()
|
| 39 |
yield
|
|
|
|
| 40 |
|
| 41 |
-
# Attach startup loader
|
| 42 |
-
app = FastAPI(lifespan=lifespan)
|
| 43 |
-
|
| 44 |
-
# Input schema
|
| 45 |
-
class TextInput(BaseModel):
|
| 46 |
-
text: str
|
| 47 |
-
|
| 48 |
-
# Sync text classification
|
| 49 |
-
def classify_text(sentence: str):
|
| 50 |
-
inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
|
| 51 |
-
input_ids = inputs["input_ids"]
|
| 52 |
-
attention_mask = inputs["attention_mask"]
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
loss = outputs.loss
|
| 57 |
-
perplexity = torch.exp(loss).item()
|
| 58 |
-
|
| 59 |
-
if perplexity < 60:
|
| 60 |
-
result = "AI-generated"
|
| 61 |
-
elif perplexity < 80:
|
| 62 |
-
result = "Probably AI-generated"
|
| 63 |
-
else:
|
| 64 |
-
result = "Human-written"
|
| 65 |
-
|
| 66 |
-
return result, perplexity
|
| 67 |
-
|
| 68 |
-
# POST route to analyze text with Bearer token
|
| 69 |
-
@app.post("/analyze")
|
| 70 |
-
async def analyze_text(data: TextInput, token: str = Depends(bearer_scheme)):
|
| 71 |
-
user_input = data.text.strip()
|
| 72 |
-
|
| 73 |
-
if not user_input:
|
| 74 |
-
raise HTTPException(status_code=400, detail="Text cannot be empty")
|
| 75 |
-
|
| 76 |
-
# Check if there are at least two words
|
| 77 |
-
word_count = len(user_input.split())
|
| 78 |
-
if word_count < 2:
|
| 79 |
-
raise HTTPException(status_code=400, detail="Text must contain at least two words")
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
result, perplexity = await asyncio.to_thread(classify_text, user_input)
|
| 83 |
-
|
| 84 |
-
return {
|
| 85 |
-
"result": result,
|
| 86 |
-
"perplexity": round(perplexity, 2),
|
| 87 |
-
}
|
| 88 |
|
| 89 |
-
# Health check route
|
| 90 |
-
@app.get("/health")
|
| 91 |
-
async def health_check():
|
| 92 |
-
return {"status": "ok"}
|
| 93 |
|
| 94 |
-
# Simple index route
|
| 95 |
@app.get("/")
|
| 96 |
def index():
|
| 97 |
-
return {
|
| 98 |
-
"message": "FastAPI API is up.",
|
| 99 |
-
"try": "/docs to test the API.",
|
| 100 |
-
"status": "OK"
|
| 101 |
-
}
|
| 102 |
-
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from contextlib import asynccontextmanager
|
| 3 |
+
from features.text_classifier.routes import router as text_classifier_router
|
| 4 |
+
from features.text_classifier.model_loader import warmup
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
@asynccontextmanager
|
| 8 |
async def lifespan(app: FastAPI):
|
| 9 |
+
warmup() # Download and load model at startup
|
|
|
|
| 10 |
yield
|
| 11 |
+
# Cleanup lo
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
app = FastAPI()
|
| 15 |
+
app.include_router(text_classifier_router, prefix="/text", tags=["Text Classification"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
|
|
|
| 18 |
@app.get("/")
|
| 19 |
def index():
|
| 20 |
+
return {"Message": "Fast api is running... ", "Try": "/docs"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
features/text_classifier/__init__.py
ADDED
|
File without changes
|
features/text_classifier/controller.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .inferencer import classify_text
|
| 2 |
+
import asyncio
|
| 3 |
+
from fastapi import HTTPException, UploadFile
|
| 4 |
+
from .preprocess import parse_docx, parse_pdf, parse_txt
|
| 5 |
+
|
| 6 |
+
from io import BytesIO
|
| 7 |
+
import logging
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
async def handle_text_analysis(text: str):
|
| 11 |
+
text = text.strip()
|
| 12 |
+
if not text or len(text.split()) < 2:
|
| 13 |
+
raise HTTPException(
|
| 14 |
+
status_code=400, detail="Text must contain at least two words"
|
| 15 |
+
)
|
| 16 |
+
label, perplexity = await asyncio.to_thread(classify_text, text)
|
| 17 |
+
return {"result": label, "perplexity": round(perplexity, 2)}
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
async def handle_file_upload(file: UploadFile):
|
| 21 |
+
try:
|
| 22 |
+
file_contents = await extract_file_contents(file)
|
| 23 |
+
if len(file_contents) > 10000:
|
| 24 |
+
return {"message": "File contains more than 10,000 characters."}
|
| 25 |
+
cleaned_text = file_contents.replace("\n", "").replace("\t", "")
|
| 26 |
+
label, perplexity = await asyncio.to_thread(classify_text, cleaned_text)
|
| 27 |
+
return {"result": label, "perplexity": round(perplexity, 2)}
|
| 28 |
+
except Exception as e:
|
| 29 |
+
logging.error(f"Error processing file: {str(e)}")
|
| 30 |
+
raise HTTPException(status_code=500, detail="Error processing the file")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
async def extract_file_contents(file: UploadFile):
|
| 34 |
+
content = await file.read()
|
| 35 |
+
file_stream = BytesIO(content)
|
| 36 |
+
|
| 37 |
+
if (
|
| 38 |
+
file.content_type
|
| 39 |
+
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
| 40 |
+
):
|
| 41 |
+
return parse_docx(file_stream)
|
| 42 |
+
elif file.content_type == "application/pdf":
|
| 43 |
+
return parse_pdf(file_stream)
|
| 44 |
+
elif file.content_type == "text/plain":
|
| 45 |
+
return parse_txt(file_stream)
|
| 46 |
+
else:
|
| 47 |
+
raise HTTPException(
|
| 48 |
+
status_code=400,
|
| 49 |
+
detail="Invalid file type. Only .docx, .pdf, and .txt are allowed.",
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def classify(text: str):
|
| 54 |
+
return classify_text(text)
|
features/text_classifier/inferencer.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from .model_loader import get_model_tokenizer
|
| 3 |
+
|
| 4 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def classify_text(text: str):
|
| 8 |
+
model, tokenizer = get_model_tokenizer()
|
| 9 |
+
inputs = tokenizer(text, return_tensors="pt",
|
| 10 |
+
truncation=True, padding=True)
|
| 11 |
+
input_ids = inputs["input_ids"].to(device)
|
| 12 |
+
attention_mask = inputs["attention_mask"].to(device)
|
| 13 |
+
|
| 14 |
+
with torch.no_grad():
|
| 15 |
+
outputs = model(
|
| 16 |
+
input_ids, attention_mask=attention_mask, labels=input_ids)
|
| 17 |
+
loss = outputs.loss
|
| 18 |
+
perplexity = torch.exp(loss).item()
|
| 19 |
+
|
| 20 |
+
if perplexity < 60:
|
| 21 |
+
result = "AI-generated"
|
| 22 |
+
elif perplexity < 80:
|
| 23 |
+
result = "Probably AI-generated"
|
| 24 |
+
else:
|
| 25 |
+
result = "Human-written"
|
| 26 |
+
|
| 27 |
+
return result, perplexity
|
features/text_classifier/model_loader.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import shutil
|
| 3 |
+
import logging
|
| 4 |
+
from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config
|
| 5 |
+
from huggingface_hub import snapshot_download
|
| 6 |
+
import torch
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
load_dotenv()
|
| 10 |
+
REPO_ID = "Pujan-Dev/AI-Text-Detector"
|
| 11 |
+
MODEL_DIR = "./models"
|
| 12 |
+
TOKENIZER_DIR = os.path.join(MODEL_DIR, "model")
|
| 13 |
+
WEIGHTS_PATH = os.path.join(MODEL_DIR, "model_weights.pth")
|
| 14 |
+
|
| 15 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 16 |
+
_model, _tokenizer = None, None
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def warmup():
|
| 20 |
+
global _model, _tokenizer
|
| 21 |
+
download_model_repo()
|
| 22 |
+
_model, _tokenizer = load_model()
|
| 23 |
+
logging.info("Its ready")
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def download_model_repo():
|
| 27 |
+
if os.path.exists(MODEL_DIR) and os.path.isdir(MODEL_DIR):
|
| 28 |
+
logging.info("Model already exists, skipping download.")
|
| 29 |
+
return
|
| 30 |
+
snapshot_path = snapshot_download(repo_id=REPO_ID)
|
| 31 |
+
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 32 |
+
shutil.copytree(snapshot_path, MODEL_DIR, dirs_exist_ok=True)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def load_model():
|
| 36 |
+
tokenizer = GPT2TokenizerFast.from_pretrained(TOKENIZER_DIR)
|
| 37 |
+
config = GPT2Config.from_pretrained(TOKENIZER_DIR)
|
| 38 |
+
model = GPT2LMHeadModel(config)
|
| 39 |
+
model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=device))
|
| 40 |
+
model.to(device)
|
| 41 |
+
model.eval()
|
| 42 |
+
return model, tokenizer
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def get_model_tokenizer():
|
| 46 |
+
global _model, _tokenizer
|
| 47 |
+
if _model is None or _tokenizer is None:
|
| 48 |
+
download_model_repo()
|
| 49 |
+
_model, _tokenizer = load_model()
|
| 50 |
+
return _model, _tokenizer
|
features/text_classifier/preprocess.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import fitz # PyMuPDF
|
| 2 |
+
import docx
|
| 3 |
+
from io import BytesIO
|
| 4 |
+
import logging
|
| 5 |
+
from fastapi import HTTPException
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def parse_docx(file: BytesIO):
|
| 9 |
+
doc = docx.Document(file)
|
| 10 |
+
text = ""
|
| 11 |
+
for para in doc.paragraphs:
|
| 12 |
+
text += para.text + "\n"
|
| 13 |
+
return text
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def parse_pdf(file: BytesIO):
|
| 17 |
+
try:
|
| 18 |
+
doc = fitz.open(stream=file, filetype="pdf")
|
| 19 |
+
text = ""
|
| 20 |
+
for page_num in range(doc.page_count):
|
| 21 |
+
page = doc.load_page(page_num)
|
| 22 |
+
text += page.get_text()
|
| 23 |
+
return text
|
| 24 |
+
except Exception as e:
|
| 25 |
+
logging.error(f"Error while processing PDF: {str(e)}")
|
| 26 |
+
raise HTTPException(status_code=500, detail="Error processing PDF file")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def parse_txt(file: BytesIO):
|
| 30 |
+
return file.read().decode("utf-8")
|
features/text_classifier/routes.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File
|
| 2 |
+
from fastapi.security import HTTPBearer
|
| 3 |
+
from pydantic import BaseModel
|
| 4 |
+
from .controller import handle_text_analysis, handle_file_upload
|
| 5 |
+
|
| 6 |
+
router = APIRouter()
|
| 7 |
+
bearer_scheme = HTTPBearer()
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TextInput(BaseModel):
|
| 11 |
+
text: str
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@router.post("/analyze")
|
| 15 |
+
async def analyze(data: TextInput, token: str = Depends(bearer_scheme)):
|
| 16 |
+
return await handle_text_analysis(data.text)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@router.post("/upload")
|
| 20 |
+
async def upload_file(
|
| 21 |
+
file: UploadFile = File(...), token: str = Depends(bearer_scheme)
|
| 22 |
+
):
|
| 23 |
+
return await handle_file_upload(file)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@router.get("/health")
|
| 27 |
+
def health():
|
| 28 |
+
return {"status": "ok"}
|
readme.md
CHANGED
|
@@ -1 +1,316 @@
|
|
| 1 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### **FastAPI AI**
|
| 2 |
+
|
| 3 |
+
This FastAPI app loads a GPT-2 model, tokenizes input text, classifies it, and returns whether the text is AI-generated or human-written.
|
| 4 |
+
|
| 5 |
+
### **install Dependencies**
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
pip install -r requirements.txt
|
| 9 |
+
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
This command installs all the dependencies listed in the `requirements.txt` file. It ensures that your environment has the required packages to run the project smoothly.
|
| 13 |
+
|
| 14 |
+
**NOTE: IF YOU HAVE DONE ANY CHANGES DON'NT FORGOT TO PUT IT IN THE REQUIREMENTS.TXT USING `bash pip freeze > requirements.txt `**
|
| 15 |
+
|
| 16 |
+
---
|
| 17 |
+
### Files STructure
|
| 18 |
+
|
| 19 |
+
```
|
| 20 |
+
βββ app.py
|
| 21 |
+
βββ features
|
| 22 |
+
βΒ Β βββ text_classifier
|
| 23 |
+
βΒ Β βββ controller.py
|
| 24 |
+
βΒ Β βββ inferencer.py
|
| 25 |
+
βΒ Β βββ __init__.py
|
| 26 |
+
βΒ Β βββ model_loader.py
|
| 27 |
+
βΒ Β βββ preprocess.py
|
| 28 |
+
βΒ Β βββ routes.py
|
| 29 |
+
βββ __init__.py
|
| 30 |
+
βββ Procfile
|
| 31 |
+
βββ readme.md
|
| 32 |
+
βββ requirements.txt
|
| 33 |
+
```
|
| 34 |
+
**`app.py`**: Entry point initializing FastAPI app and routes
|
| 35 |
+
**`Procfile`**: Tells Railway how to run the program
|
| 36 |
+
**`requirements.txt`**:Have all the packages that we use in our project
|
| 37 |
+
**`__init__.py`** : Package initializer for the root module
|
| 38 |
+
**FOLDER :features/text_classifier**
|
| 39 |
+
**`controller.py`** :Handles logic between routes and model
|
| 40 |
+
**`inferencer.py`** : Runs inference and returns predictions as well as files system
|
| 41 |
+
**`__init__.py`** :Initializes the module as a package
|
| 42 |
+
**`model_loader.py`** : Loads the ML model and tokenizer
|
| 43 |
+
**`preprocess.py`** :Prepares input text for the model
|
| 44 |
+
**`routes.py`** :Defines API routes for text classification
|
| 45 |
+
|
| 46 |
+
### **Functions**
|
| 47 |
+
|
| 48 |
+
1. **`load_model()`**
|
| 49 |
+
Loads the GPT-2 model and tokenizer from specified paths.
|
| 50 |
+
|
| 51 |
+
2. **`lifespan()`**
|
| 52 |
+
Manages the app's lifecycle: loads the model at startup and handles cleanup on shutdown.
|
| 53 |
+
|
| 54 |
+
3. **`classify_text_sync()`**
|
| 55 |
+
Synchronously tokenizes input text and classifies it using the GPT-2 model. Returns the classification and perplexity.
|
| 56 |
+
|
| 57 |
+
4. **`classify_text()`**
|
| 58 |
+
Asynchronously executes `classify_text_sync()` in a thread pool to ensure non-blocking processing.
|
| 59 |
+
|
| 60 |
+
5. **`analyze_text()`**
|
| 61 |
+
**POST** endpoint: accepts text input, classifies it using `classify_text()`, and returns the result with perplexity.
|
| 62 |
+
|
| 63 |
+
6. **`health()`**
|
| 64 |
+
**GET** endpoint: simple health check to confirm the API is running.
|
| 65 |
+
7. **`parse_docx() ,parse_pdf(),parse_txt()`**
|
| 66 |
+
THis are the function that are used to convert the given docs, pdf or text files into the strings format so that we can classify them.
|
| 67 |
+
8. **`warmup()`**
|
| 68 |
+
This function is used to downlaod the repo and init the _model and _tokenizer from load_model() function
|
| 69 |
+
9. **`download_model_repo()`**
|
| 70 |
+
This function is use to download the model from the MODEL folder
|
| 71 |
+
10. **`get_model_tokenizer()`**
|
| 72 |
+
This function is similler to the warmup but it also check if the model is already exist or not if not exist then download it else let it be or use previous downloaded model
|
| 73 |
+
|
| 74 |
+
11. **`handle_file_upload()`**
|
| 75 |
+
This function is use to handle the file upload in the upload route and classify and returns the results.
|
| 76 |
+
12. **`Extract_file_contents()`**
|
| 77 |
+
This function is use to extract the contains from the files and return the text from the files.
|
| 78 |
+
|
| 79 |
+
---
|
| 80 |
+
|
| 81 |
+
### **Code Overview**
|
| 82 |
+
|
| 83 |
+
### **Running and Load Balancing:**
|
| 84 |
+
|
| 85 |
+
To run the app in production with load balancing:
|
| 86 |
+
|
| 87 |
+
```bash
|
| 88 |
+
uvicorn app:app --host 0.0.0.0 --port 8000
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
This command launches the FastAPI app.
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
### **Endpoints**
|
| 95 |
+
|
| 96 |
+
#### 1. **`/text/analyze`**
|
| 97 |
+
|
| 98 |
+
- **Method:** `POST`
|
| 99 |
+
- **Description:** Classifies whether the text is AI-generated or human-written.
|
| 100 |
+
- **Request:**
|
| 101 |
+
```json
|
| 102 |
+
{ "text": "sample text" }
|
| 103 |
+
```
|
| 104 |
+
- **Response:**
|
| 105 |
+
```json
|
| 106 |
+
{ "result": "AI-generated", "perplexity": 55.67 }
|
| 107 |
+
```
|
| 108 |
+
|
| 109 |
+
#### 2. **`/health`**
|
| 110 |
+
|
| 111 |
+
- **Method:** `GET`
|
| 112 |
+
- **Description:** Returns the status of the API.
|
| 113 |
+
- **Response:**
|
| 114 |
+
```json
|
| 115 |
+
{ "status": "ok" }
|
| 116 |
+
```
|
| 117 |
+
#### 3. **`/text/upload`**
|
| 118 |
+
- **Method:** `POST`
|
| 119 |
+
- **Description:** Takes the files and check the contains inside and returns the results
|
| 120 |
+
- **Request:** Files
|
| 121 |
+
|
| 122 |
+
- **Response:**
|
| 123 |
+
```json
|
| 124 |
+
{ "result": "AI-generated", "perplexity": 55.67 }
|
| 125 |
+
```
|
| 126 |
+
---
|
| 127 |
+
|
| 128 |
+
### **Running the API**
|
| 129 |
+
|
| 130 |
+
Start the server with:
|
| 131 |
+
|
| 132 |
+
```bash
|
| 133 |
+
uvicorn app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
---
|
| 137 |
+
|
| 138 |
+
### **π§ͺ Testing the API**
|
| 139 |
+
|
| 140 |
+
You can test the FastAPI endpoint using `curl` like this:
|
| 141 |
+
|
| 142 |
+
```bash
|
| 143 |
+
curl -X POST https://can-org-canspace.hf.space/analyze \
|
| 144 |
+
-H "Authorization: Bearer SECRET_CODE" \
|
| 145 |
+
-H "Content-Type: application/json" \
|
| 146 |
+
-d '{"text": "This is a sample sentence for analysis."}'
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
- The `-H "Authorization: Bearer SECRET_CODE"` part is used to simulate the **handshake**.
|
| 150 |
+
- FastAPI checks this token against the one loaded from the `.env` file.
|
| 151 |
+
- If the token matches, the request is accepted and processed.
|
| 152 |
+
- Otherwise, it responds with a `403 Unauthorized` error.
|
| 153 |
+
|
| 154 |
+
---
|
| 155 |
+
|
| 156 |
+
### **API Documentation**
|
| 157 |
+
|
| 158 |
+
- **Swagger UI:** `https://can-org-canspace.hf.space/docs` -> `/docs`
|
| 159 |
+
- **ReDoc:** `https://can-org-canspace.hf.space/redoc` -> `/redoc`
|
| 160 |
+
|
| 161 |
+
### **π Handshake Mechanism**
|
| 162 |
+
|
| 163 |
+
In this part, we're implementing a simple handshake to verify that the request is coming from a trusted source (e.g., our NestJS server). Here's how it works:
|
| 164 |
+
|
| 165 |
+
- We load a secret token from the `.env` file.
|
| 166 |
+
- When a request is made to the FastAPI server, we extract the `Authorization` header and compare it with our expected secret token.
|
| 167 |
+
- If the token does **not** match, we immediately return a **403 Forbidden** response with the message `"Unauthorized"`.
|
| 168 |
+
- If the token **does** match, we allow the request to proceed to the next step.
|
| 169 |
+
|
| 170 |
+
The verification function looks like this:
|
| 171 |
+
|
| 172 |
+
```python
|
| 173 |
+
def verify_token(auth: str):
|
| 174 |
+
if auth != f"Bearer {EXPECTED_TOKEN}":
|
| 175 |
+
raise HTTPException(status_code=403, detail="Unauthorized")
|
| 176 |
+
```
|
| 177 |
+
|
| 178 |
+
This provides a basic but effective layer of security to prevent unauthorized access to the API.
|
| 179 |
+
|
| 180 |
+
### **Implement it with NEST.js**
|
| 181 |
+
|
| 182 |
+
NOTE: Make an micro service in NEST.JS and implement it there and call it from app.controller.ts
|
| 183 |
+
|
| 184 |
+
in fastapi.service.ts file what we have done is
|
| 185 |
+
|
| 186 |
+
### Project Structure
|
| 187 |
+
|
| 188 |
+
```files
|
| 189 |
+
nestjs-fastapi-bridge/
|
| 190 |
+
βββ src/
|
| 191 |
+
β βββ app.controller.ts
|
| 192 |
+
β βββ app.module.ts
|
| 193 |
+
β βββ fastapi.service.ts
|
| 194 |
+
βββ .env
|
| 195 |
+
|
| 196 |
+
```
|
| 197 |
+
|
| 198 |
+
---
|
| 199 |
+
|
| 200 |
+
### Step-by-Step Setup
|
| 201 |
+
|
| 202 |
+
#### 1. `.env`
|
| 203 |
+
|
| 204 |
+
Create a `.env` file at the root with the following:
|
| 205 |
+
|
| 206 |
+
```environment
|
| 207 |
+
FASTAPI_BASE_URL=https://can-org-canspace.hf.space/
|
| 208 |
+
SECRET_TOKEN="SECRET_CODE_TOKEN"
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
#### 2. `fastapi.service.ts`
|
| 212 |
+
|
| 213 |
+
```javascript
|
| 214 |
+
// src/fastapi.service.ts
|
| 215 |
+
import { Injectable } from "@nestjs/common";
|
| 216 |
+
import { HttpService } from "@nestjs/axios";
|
| 217 |
+
import { ConfigService } from "@nestjs/config";
|
| 218 |
+
import { firstValueFrom } from "rxjs";
|
| 219 |
+
|
| 220 |
+
@Injectable()
|
| 221 |
+
export class FastAPIService {
|
| 222 |
+
constructor(
|
| 223 |
+
private http: HttpService,
|
| 224 |
+
private config: ConfigService,
|
| 225 |
+
) {}
|
| 226 |
+
|
| 227 |
+
async analyzeText(text: string) {
|
| 228 |
+
const url = `${this.config.get("FASTAPI_BASE_URL")}/analyze`;
|
| 229 |
+
const token = this.config.get("SECRET_TOKEN");
|
| 230 |
+
|
| 231 |
+
const response = await firstValueFrom(
|
| 232 |
+
this.http.post(
|
| 233 |
+
url,
|
| 234 |
+
{ text },
|
| 235 |
+
{
|
| 236 |
+
headers: {
|
| 237 |
+
Authorization: `Bearer ${token}`,
|
| 238 |
+
},
|
| 239 |
+
},
|
| 240 |
+
),
|
| 241 |
+
);
|
| 242 |
+
|
| 243 |
+
return response.data;
|
| 244 |
+
}
|
| 245 |
+
}
|
| 246 |
+
```
|
| 247 |
+
|
| 248 |
+
#### 3. `app.module.ts`
|
| 249 |
+
|
| 250 |
+
```javascript
|
| 251 |
+
// src/app.module.ts
|
| 252 |
+
import { Module } from "@nestjs/common";
|
| 253 |
+
import { ConfigModule } from "@nestjs/config";
|
| 254 |
+
import { HttpModule } from "@nestjs/axios";
|
| 255 |
+
import { AppController } from "./app.controller";
|
| 256 |
+
import { FastAPIService } from "./fastapi.service";
|
| 257 |
+
|
| 258 |
+
@Module({
|
| 259 |
+
imports: [ConfigModule.forRoot(), HttpModule],
|
| 260 |
+
controllers: [AppController],
|
| 261 |
+
providers: [FastAPIService],
|
| 262 |
+
})
|
| 263 |
+
export class AppModule {}
|
| 264 |
+
```
|
| 265 |
+
|
| 266 |
+
---
|
| 267 |
+
|
| 268 |
+
#### 4. `app.controller.ts`
|
| 269 |
+
|
| 270 |
+
```javascript
|
| 271 |
+
// src/app.controller.ts
|
| 272 |
+
import { Body, Controller, Post, Get, Query } from '@nestjs/common';
|
| 273 |
+
import { FastAPIService } from './fastapi.service';
|
| 274 |
+
|
| 275 |
+
@Controller()
|
| 276 |
+
export class AppController {
|
| 277 |
+
constructor(private readonly fastapiService: FastAPIService) {}
|
| 278 |
+
|
| 279 |
+
@Post('analyze-text')
|
| 280 |
+
async callFastAPI(@Body('text') text: string) {
|
| 281 |
+
return this.fastapiService.analyzeText(text);
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
@Get()
|
| 285 |
+
getHello(): string {
|
| 286 |
+
return 'NestJS is connected to FastAPI ';
|
| 287 |
+
}
|
| 288 |
+
}
|
| 289 |
+
```
|
| 290 |
+
|
| 291 |
+
### π How to Run
|
| 292 |
+
|
| 293 |
+
Run the server of flask and nest.js:
|
| 294 |
+
|
| 295 |
+
- for nest.js
|
| 296 |
+
```bash
|
| 297 |
+
npm run start
|
| 298 |
+
```
|
| 299 |
+
- for Fastapi
|
| 300 |
+
|
| 301 |
+
```bash
|
| 302 |
+
uvicorn app:app --reload
|
| 303 |
+
```
|
| 304 |
+
|
| 305 |
+
Make sure your FastAPI service is running at `http://localhost:8000`.
|
| 306 |
+
|
| 307 |
+
### Test with CURL
|
| 308 |
+
http://localhost:3000/-> Server of nest.js
|
| 309 |
+
```bash
|
| 310 |
+
curl -X POST http://localhost:3000/analyze-text \
|
| 311 |
+
-H 'Content-Type: application/json' \
|
| 312 |
+
-d '{"text": "This is a test input"}'
|
| 313 |
+
```
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
|
requirements.txt
CHANGED
|
@@ -1,7 +1,11 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
torch
|
| 4 |
+
transformers
|
| 5 |
+
huggingface_hub
|
| 6 |
+
python-dotenv
|
| 7 |
+
python-docx
|
| 8 |
+
PyMuPDF
|
| 9 |
+
pydantic
|
| 10 |
+
fitz
|
| 11 |
+
python-multipart
|