AI-API

Sleeping

App Files Files Community

Pujan-Dev commited on Apr 30, 2025

Commit

4ecc57f

verified ·

1 Parent(s): 34bf657

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -66

app.py CHANGED Viewed

@@ -2,42 +2,36 @@ from fastapi import FastAPI, HTTPException, Depends, UploadFile, File
 from fastapi.security import HTTPBearer
 from pydantic import BaseModel
 from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config
-from dotenv import load_dotenv
-from contextlib import asynccontextmanager
 import torch
-import asyncio
-import math
 import os
-import docx
-import fitz  # PyMuPDF
 import logging
 from io import BytesIO
-# Setup logging
-logging.basicConfig(level=logging.DEBUG)
 # Load environment variables
 load_dotenv()
 SECRET_TOKEN = os.getenv("SECRET_TOKEN")
-# File Paths
-MODEL_PATH = "./Ai-Text-Detector/model"
-WEIGHTS_PATH = "./Ai-Text-Detector/model_weights.pth"
-# Global model and tokenizer
-model = None
-tokenizer = None
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Security
-bearer_scheme = HTTPBearer()
-# Text input schema
-class TextInput(BaseModel):
-    text: str
-# Load model and tokenizer
 def load_model():
     global model, tokenizer
     try:
@@ -47,32 +41,31 @@ def load_model():
         model_instance.load_state_dict(torch.load(WEIGHTS_PATH, map_location=device))
         model_instance.to(device)
         model_instance.eval()
-        model = model_instance
     except Exception as e:
         raise RuntimeError(f"Error loading model: {str(e)}")
-# Lifespan event to load model on startup
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    load_model()
     yield
-# FastAPI app instance
 app = FastAPI(lifespan=lifespan)
-# Classification logic
 def classify_text(text: str):
     if not model or not tokenizer:
         raise RuntimeError("Model or tokenizer not loaded.")
-    inputs = tokenizer(
-        text,
-        return_tensors="pt",
-        truncation=True,
-        padding="max_length",
-        max_length=512
-    )
     input_ids = inputs["input_ids"].to(device)
     attention_mask = inputs["attention_mask"].to(device)
@@ -88,17 +81,16 @@ def classify_text(text: str):
     else:
         return "Human-written", perplexity
-# Score converter (optional utility)
-def Perplexity_Converter(perplexity):
-    return max(0, min(100, 100 - math.log2(perplexity) * 10))
-# Analyze text directly
 @app.post("/analyze")
 async def analyze_text(data: TextInput, token: str = Depends(bearer_scheme)):
     if token.credentials != SECRET_TOKEN:
         raise HTTPException(status_code=401, detail="Invalid token")
     text = data.text.strip()
     if not text:
         raise HTTPException(status_code=400, detail="Text cannot be empty")
@@ -106,64 +98,78 @@ async def analyze_text(data: TextInput, token: str = Depends(bearer_scheme)):
         raise HTTPException(status_code=400, detail="Text must contain at least two words")
     try:
         label, perplexity = await asyncio.to_thread(classify_text, text)
         return {"result": label, "perplexity": round(perplexity, 2)}
     except Exception as e:
-        logging.error(f"Text analysis failed: {str(e)}")
         raise HTTPException(status_code=500, detail="Model processing error")
-# -------- File Upload and Parsing -------- #
 def parse_docx(file: BytesIO):
     doc = docx.Document(file)
-    return "\n".join(para.text for para in doc.paragraphs)
 def parse_pdf(file: BytesIO):
     try:
         doc = fitz.open(stream=file, filetype="pdf")
-        return "".join([doc.load_page(i).get_text() for i in range(doc.page_count)])
     except Exception as e:
-        logging.error(f"PDF error: {str(e)}")
-        raise HTTPException(status_code=500, detail="Error processing PDF")
 def parse_txt(file: BytesIO):
     return file.read().decode("utf-8")
 @app.post("/upload/")
 async def upload_file(file: UploadFile = File(...), token: str = Depends(bearer_scheme)):
-    if token.credentials != SECRET_TOKEN:
-        raise HTTPException(status_code=401, detail="Invalid token")
     try:
-        content_type = file.content_type
-        content = await file.read()
-        if content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
-            text = parse_docx(BytesIO(content))
-        elif content_type == 'application/pdf':
-            text = parse_pdf(BytesIO(content))
-        elif content_type == 'text/plain':
-            text = parse_txt(BytesIO(content))
         else:
-            raise HTTPException(status_code=400, detail="Unsupported file type")
-        if len(text) > 10000:
             return {"message": "File contains more than 10,000 characters."}
-        cleaned_text = text.replace("\n", "").replace("\t", "")
         label, perplexity = await asyncio.to_thread(classify_text, cleaned_text)
         return {"result": label, "perplexity": round(perplexity, 2)}
     except Exception as e:
-        logging.error(f"File processing error: {str(e)}")
-        raise HTTPException(status_code=500, detail="Error processing file")
-# Health Check and Index
 @app.get("/health")
-def health_check():
     return {"status": "ok"}
 @app.get("/")
 def index():
     return {
         "message": "FastAPI AI Text Detector is running.",
-        "usage": "Use /docs or /analyze or /upload to test the API."
     }

 from fastapi.security import HTTPBearer
 from pydantic import BaseModel
 from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config
 import torch
 import os
+import asyncio
+from contextlib import asynccontextmanager
 import logging
 from io import BytesIO
+import docx
+import fitz  # PyMuPDF
 # Load environment variables
+from dotenv import load_dotenv
 load_dotenv()
 SECRET_TOKEN = os.getenv("SECRET_TOKEN")
+bearer_scheme = HTTPBearer()
+model_path = "./Ai-Text-Detector/model"
+weights_path = "./Ai-Text-Detector/model_weights.pth"
+# FastAPI app instance
+app = FastAPI()
+# Global model and tokenizer variables
+model, tokenizer = None, None
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Logging setup
+logging.basicConfig(level=logging.DEBUG)
+# Load model and tokenizer function
 def load_model():
     global model, tokenizer
     try:
         model_instance.load_state_dict(torch.load(WEIGHTS_PATH, map_location=device))
         model_instance.to(device)
         model_instance.eval()
+        model, tokenizer = model_instance, tokenizer
+        logging.info("Model loaded successfully.")
     except Exception as e:
+        logging.error(f"Error loading model: {str(e)}")
         raise RuntimeError(f"Error loading model: {str(e)}")
+# Load model on app startup
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    load_model()  # Load model when FastAPI app starts
     yield
+# Attach the lifespan to the app instance
 app = FastAPI(lifespan=lifespan)
+# Input schema for text analysis
+class TextInput(BaseModel):
+    text: str
+# Function to classify text using the model
 def classify_text(text: str):
     if not model or not tokenizer:
         raise RuntimeError("Model or tokenizer not loaded.")
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
     input_ids = inputs["input_ids"].to(device)
     attention_mask = inputs["attention_mask"].to(device)
     else:
         return "Human-written", perplexity
+# POST route to analyze text with Bearer token
 @app.post("/analyze")
 async def analyze_text(data: TextInput, token: str = Depends(bearer_scheme)):
+    # Verify token
     if token.credentials != SECRET_TOKEN:
         raise HTTPException(status_code=401, detail="Invalid token")
     text = data.text.strip()
+    # Input validation
     if not text:
         raise HTTPException(status_code=400, detail="Text cannot be empty")
         raise HTTPException(status_code=400, detail="Text must contain at least two words")
     try:
+        # Classify text
         label, perplexity = await asyncio.to_thread(classify_text, text)
         return {"result": label, "perplexity": round(perplexity, 2)}
     except Exception as e:
+        logging.error(f"Error processing text: {str(e)}")
         raise HTTPException(status_code=500, detail="Model processing error")
+# Function to parse .docx files
 def parse_docx(file: BytesIO):
     doc = docx.Document(file)
+    text = ""
+    for para in doc.paragraphs:
+        text += para.text + "\n"
+    return text
+# Function to parse .pdf files
 def parse_pdf(file: BytesIO):
     try:
         doc = fitz.open(stream=file, filetype="pdf")
+        text = ""
+        for page_num in range(doc.page_count):
+            page = doc.load_page(page_num)
+            text += page.get_text()
+        return text
     except Exception as e:
+        logging.error(f"Error while processing PDF: {str(e)}")
+        raise HTTPException(status_code=500, detail="Error processing PDF file")
+# Function to parse .txt files
 def parse_txt(file: BytesIO):
     return file.read().decode("utf-8")
+# POST route to upload files and analyze content
 @app.post("/upload/")
 async def upload_file(file: UploadFile = File(...), token: str = Depends(bearer_scheme)):
+    file_contents = None
     try:
+        if file.content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
+            file_contents = parse_docx(BytesIO(await file.read()))
+        elif file.content_type == 'application/pdf':
+            file_contents = parse_pdf(BytesIO(await file.read()))
+        elif file.content_type == 'text/plain':
+            file_contents = parse_txt(BytesIO(await file.read()))
         else:
+            raise HTTPException(status_code=400, detail="Invalid file type. Only .docx, .pdf, and .txt are allowed.")
+        logging.debug(f"Extracted Text from {file.filename}:\n{file_contents}")
+        # Check if the text length exceeds 10,000 characters
+        if len(file_contents) > 10000:
             return {"message": "File contains more than 10,000 characters."}
+        # Clean the text by removing newline and tab characters
+        cleaned_text = file_contents.replace("\n", "").replace("\t", "")
+        # Analyze the cleaned text
         label, perplexity = await asyncio.to_thread(classify_text, cleaned_text)
         return {"result": label, "perplexity": round(perplexity, 2)}
     except Exception as e:
+        logging.error(f"Error processing file: {str(e)}")
+        raise HTTPException(status_code=500, detail="Error processing the file")
+# Health check route
 @app.get("/health")
+async def health_check():
     return {"status": "ok"}
+# Simple index route
 @app.get("/")
 def index():
     return {
         "message": "FastAPI AI Text Detector is running.",
+        "usage": "Use /docs or /analyze to test the API."
     }