AI_API / features /text_classifier /controller.py
Pujan-Dev's picture
feat: Added the rate limiting per route
805e1e5
raw
history blame
4.46 kB
from .inferencer import classify_text
import asyncio
from fastapi import HTTPException, UploadFile, status, Depends,requests
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from .preprocess import parse_docx, parse_pdf, parse_txt
from nltk.tokenize import sent_tokenize
import os
from io import BytesIO
import logging
import requests
security = HTTPBearer()
# Token verification
async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
token = credentials.credentials
if token != os.getenv("MY_SECRET_TOKEN"):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Invalid or expired token"
)
return token
# Text classification
async def handle_text_analysis(text: str):
text = text.strip()
if not text or len(text.split()) < 10:
raise HTTPException(status_code=400, detail="Text must contain at least two words")
if len(text) > 10000:
raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters.")
label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, text)
return {"result": label, "perplexity": round(perplexity, 2), "ai_likelihood": ai_likelihood}
# File sentence-level analysis
async def handle_file_sentance(file: UploadFile):
try:
file_contents = await extract_file_contents(file)
if len(file_contents) > 10000:
return {"message": "File contains more than 10,000 characters."}
cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
if not cleaned_text:
raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
result = await handle_sentence_level_analysis(cleaned_text)
return {"content": file_contents, **result}
except Exception as e:
logging.error(f"Error processing file: {str(e)}")
raise HTTPException(status_code=500, detail="Error processing the file")
# File-level classification
async def handle_file_upload(file: UploadFile):
try:
file_contents = await extract_file_contents(file)
if len(file_contents) > 10000:
return {"message": "File contains more than 10,000 characters."}
cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
if not cleaned_text:
raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, cleaned_text)
return {
"content": file_contents,
"result": label,
"perplexity": round(perplexity, 2),
"ai_likelihood": ai_likelihood
}
except Exception as e:
logging.error(f"Error processing file: {str(e)}")
raise HTTPException(status_code=500, detail="Error processing the file")
# File extraction
async def extract_file_contents(file: UploadFile):
content = await file.read()
file_stream = BytesIO(content)
if file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
return parse_docx(file_stream)
elif file.content_type == "application/pdf":
return parse_pdf(file_stream)
elif file.content_type == "text/plain":
return parse_txt(file_stream)
else:
raise HTTPException(
status_code=404,
detail="Invalid file type. Only .docx, .pdf, and .txt are allowed."
)
# Sentence-level analysis
async def handle_sentence_level_analysis(text: str):
text = text.strip()
if not text or len(text.split()) < 2:
raise HTTPException(status_code=413, detail="Text must contain at least two words")
if len(text) > 10000:
raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters.")
sentences = sent_tokenize(text, language="english")
results = []
for sentence in sentences:
if not sentence.strip():
continue
label, perplexity, likelihood = await asyncio.to_thread(classify_text, sentence)
results.append({
"sentence": sentence,
"label": label,
"perplexity": round(perplexity, 2),
"ai_likelihood": likelihood
})
return {"analysis": results}
# Synchronous call
def classify(text: str):
return classify_text(text)