AI-API

Sleeping

App Files Files Community

Pujan-Dev commited on Apr 30, 2025

Commit

1548e30

verified ·

1 Parent(s): 99d3d8f

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -60

app.py CHANGED Viewed

@@ -1,55 +1,80 @@
-from fastapi import FastAPI, HTTPException, Depends
 from fastapi.security import HTTPBearer
 from pydantic import BaseModel
 from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config
-import torch
-import asyncio
 from contextlib import asynccontextmanager
-# FastAPI app instance
-app = FastAPI()
-# Global model and tokenizer variables
-model, tokenizer = None, None
-# HTTPBearer instance for security
 bearer_scheme = HTTPBearer()
-# Function to load model and tokenizer
-def load_model():
-    model_path = "./Ai-Text-Detector/model"
-    weights_path = "./Ai-Text-Detector/model_weights.pth"
     try:
-        tokenizer = GPT2TokenizerFast.from_pretrained(model_path)
-        config = GPT2Config.from_pretrained(model_path)
-        model = GPT2LMHeadModel(config)
-        model.load_state_dict(torch.load(weights_path, map_location=torch.device("cpu")))
-        model.eval()  # Set model to evaluation mode
     except Exception as e:
         raise RuntimeError(f"Error loading model: {str(e)}")
-    return model, tokenizer
-# Load model on app startup
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    global model, tokenizer
-    model, tokenizer = load_model()
     yield
-# Attach startup loader
 app = FastAPI(lifespan=lifespan)
-# Input schema
-class TextInput(BaseModel):
-    text: str
-# Sync text classification
-def classify_text(sentence: str):
-    inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
-    input_ids = inputs["input_ids"]
-    attention_mask = inputs["attention_mask"]
     with torch.no_grad():
         outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
@@ -57,49 +82,88 @@ def classify_text(sentence: str):
         perplexity = torch.exp(loss).item()
     if perplexity < 60:
-        result = "AI-generated"
     elif perplexity < 80:
-        result = "Probably AI-generated"
     else:
-        result = "Human-written"
-    return result, perplexity
-# POST route to analyze text with Bearer token
 @app.post("/analyze")
 async def analyze_text(data: TextInput, token: str = Depends(bearer_scheme)):
-    user_input = data.text.strip()
-    if not user_input:
         raise HTTPException(status_code=400, detail="Text cannot be empty")
-    # Check if there are at least two words
-    word_count = len(user_input.split())
-    if word_count < 2:
         raise HTTPException(status_code=400, detail="Text must contain at least two words")
-    # The token is automatically extracted from the Authorization header
-    # You can validate the token here if needed
-    print(f"Received Bearer Token: {token}")
-    # Run classification asynchronously to prevent blocking
-    result, perplexity = await asyncio.to_thread(classify_text, user_input)
-    return {
-        "result": result,
-        "perplexity": round(perplexity, 2),
-    }
-# Health check route
 @app.get("/health")
-async def health_check():
     return {"status": "ok"}
-# Simple index route
 @app.get("/")
 def index():
     return {
-        "message": "FastAPI API is up.",
-        "try": "/docs to test the API.",
-        "status": "OK"
     }

+from fastapi import FastAPI, HTTPException, Depends, UploadFile, File
 from fastapi.security import HTTPBearer
 from pydantic import BaseModel
 from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config
+from dotenv import load_dotenv
 from contextlib import asynccontextmanager
+import torch
+import asyncio
+import math
+import os
+import docx
+import fitz  # PyMuPDF
+import logging
+from io import BytesIO
+# Setup logging
+logging.basicConfig(level=logging.DEBUG)
+# Load environment variables
+load_dotenv()
+SECRET_TOKEN = os.getenv("SECRET_TOKEN")
+# File Paths
+MODEL_PATH = "./AI-MODEL/model"
+WEIGHTS_PATH = "./AI-MODEL/model_weights.pth"
+# Global model and tokenizer
+model = None
+tokenizer = None
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Security
 bearer_scheme = HTTPBearer()
+# Text input schema
+class TextInput(BaseModel):
+    text: str
+# Load model and tokenizer
+def load_model():
+    global model, tokenizer
     try:
+        tokenizer = GPT2TokenizerFast.from_pretrained(MODEL_PATH)
+        config = GPT2Config.from_pretrained(MODEL_PATH)
+        model_instance = GPT2LMHeadModel(config)
+        model_instance.load_state_dict(torch.load(WEIGHTS_PATH, map_location=device))
+        model_instance.to(device)
+        model_instance.eval()
+        model = model_instance
     except Exception as e:
         raise RuntimeError(f"Error loading model: {str(e)}")
+# Lifespan event to load model on startup
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    load_model()
     yield
+# FastAPI app instance
 app = FastAPI(lifespan=lifespan)
+# Classification logic
+def classify_text(text: str):
+    if not model or not tokenizer:
+        raise RuntimeError("Model or tokenizer not loaded.")
+    inputs = tokenizer(
+        text,
+        return_tensors="pt",
+        truncation=True,
+        padding="max_length",
+        max_length=512
+    )
+    input_ids = inputs["input_ids"].to(device)
+    attention_mask = inputs["attention_mask"].to(device)
     with torch.no_grad():
         outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
         perplexity = torch.exp(loss).item()
     if perplexity < 60:
+        return "AI-generated", perplexity
     elif perplexity < 80:
+        return "Probably AI-generated", perplexity
     else:
+        return "Human-written", perplexity
+# Score converter (optional utility)
+def Perplexity_Converter(perplexity):
+    return max(0, min(100, 100 - math.log2(perplexity) * 10))
+# Analyze text directly
 @app.post("/analyze")
 async def analyze_text(data: TextInput, token: str = Depends(bearer_scheme)):
+    if token.credentials != SECRET_TOKEN:
+        raise HTTPException(status_code=401, detail="Invalid token")
+    text = data.text.strip()
+    if not text:
         raise HTTPException(status_code=400, detail="Text cannot be empty")
+    if len(text.split()) < 2:
         raise HTTPException(status_code=400, detail="Text must contain at least two words")
+    try:
+        label, perplexity = await asyncio.to_thread(classify_text, text)
+        return {"result": label, "perplexity": round(perplexity, 2)}
+    except Exception as e:
+        logging.error(f"Text analysis failed: {str(e)}")
+        raise HTTPException(status_code=500, detail="Model processing error")
+# -------- File Upload and Parsing -------- #
+def parse_docx(file: BytesIO):
+    doc = docx.Document(file)
+    return "\n".join(para.text for para in doc.paragraphs)
+def parse_pdf(file: BytesIO):
+    try:
+        doc = fitz.open(stream=file, filetype="pdf")
+        return "".join([doc.load_page(i).get_text() for i in range(doc.page_count)])
+    except Exception as e:
+        logging.error(f"PDF error: {str(e)}")
+        raise HTTPException(status_code=500, detail="Error processing PDF")
+def parse_txt(file: BytesIO):
+    return file.read().decode("utf-8")
+@app.post("/upload/")
+async def upload_file(file: UploadFile = File(...), token: str = Depends(bearer_scheme)):
+    if token.credentials != SECRET_TOKEN:
+        raise HTTPException(status_code=401, detail="Invalid token")
+    try:
+        content_type = file.content_type
+        content = await file.read()
+        if content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
+            text = parse_docx(BytesIO(content))
+        elif content_type == 'application/pdf':
+            text = parse_pdf(BytesIO(content))
+        elif content_type == 'text/plain':
+            text = parse_txt(BytesIO(content))
+        else:
+            raise HTTPException(status_code=400, detail="Unsupported file type")
+        if len(text) > 10000:
+            return {"message": "File contains more than 10,000 characters."}
+        cleaned_text = text.replace("\n", "").replace("\t", "")
+        label, perplexity = await asyncio.to_thread(classify_text, cleaned_text)
+        return {"result": label, "perplexity": round(perplexity, 2)}
+    except Exception as e:
+        logging.error(f"File processing error: {str(e)}")
+        raise HTTPException(status_code=500, detail="Error processing file")
+# Health Check and Index
 @app.get("/health")
+def health_check():
     return {"status": "ok"}
 @app.get("/")
 def index():
     return {
+        "message": "FastAPI AI Text Detector is running.",
+        "usage": "Use /docs or /analyze or /upload to test the API."
     }