Spaces:

Pujan-Dev
/

AI_API

Sleeping

App Files Files Community

AI_API / features /text_classifier /controller.py

Pujan-Dev

feat: Added the rate limiting per route

805e1e5 about 1 year ago

raw

history blame

4.46 kB

	from .inferencer import classify_text
	import asyncio
	from fastapi import HTTPException, UploadFile, status, Depends,requests
	from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
	from .preprocess import parse_docx, parse_pdf, parse_txt
	from nltk.tokenize import sent_tokenize
	import os
	from io import BytesIO
	import logging
	import requests
	security = HTTPBearer()

	# Token verification
	async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
	token = credentials.credentials
	if token != os.getenv("MY_SECRET_TOKEN"):
	raise HTTPException(
	status_code=status.HTTP_403_FORBIDDEN,
	detail="Invalid or expired token"
	)
	return token

	# Text classification
	async def handle_text_analysis(text: str):
	text = text.strip()
	if not text or len(text.split()) < 10:
	raise HTTPException(status_code=400, detail="Text must contain at least two words")
	if len(text) > 10000:
	raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters.")
	label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, text)
	return {"result": label, "perplexity": round(perplexity, 2), "ai_likelihood": ai_likelihood}

	# File sentence-level analysis
	async def handle_file_sentance(file: UploadFile):
	try:
	file_contents = await extract_file_contents(file)
	if len(file_contents) > 10000:
	return {"message": "File contains more than 10,000 characters."}
	cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
	if not cleaned_text:
	raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
	result = await handle_sentence_level_analysis(cleaned_text)
	return {"content": file_contents, **result}
	except Exception as e:
	logging.error(f"Error processing file: {str(e)}")
	raise HTTPException(status_code=500, detail="Error processing the file")

	# File-level classification
	async def handle_file_upload(file: UploadFile):
	try:
	file_contents = await extract_file_contents(file)
	if len(file_contents) > 10000:
	return {"message": "File contains more than 10,000 characters."}
	cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
	if not cleaned_text:
	raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
	label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, cleaned_text)
	return {
	"content": file_contents,
	"result": label,
	"perplexity": round(perplexity, 2),
	"ai_likelihood": ai_likelihood
	}
	except Exception as e:
	logging.error(f"Error processing file: {str(e)}")
	raise HTTPException(status_code=500, detail="Error processing the file")

	# File extraction
	async def extract_file_contents(file: UploadFile):
	content = await file.read()
	file_stream = BytesIO(content)
	if file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	return parse_docx(file_stream)
	elif file.content_type == "application/pdf":
	return parse_pdf(file_stream)
	elif file.content_type == "text/plain":
	return parse_txt(file_stream)
	else:
	raise HTTPException(
	status_code=404,
	detail="Invalid file type. Only .docx, .pdf, and .txt are allowed."
	)

	# Sentence-level analysis
	async def handle_sentence_level_analysis(text: str):
	text = text.strip()
	if not text or len(text.split()) < 2:
	raise HTTPException(status_code=413, detail="Text must contain at least two words")

	if len(text) > 10000:
	raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters.")

	sentences = sent_tokenize(text, language="english")
	results = []
	for sentence in sentences:
	if not sentence.strip():
	continue
	label, perplexity, likelihood = await asyncio.to_thread(classify_text, sentence)
	results.append({
	"sentence": sentence,
	"label": label,
	"perplexity": round(perplexity, 2),
	"ai_likelihood": likelihood
	})
	return {"analysis": results}

	# Synchronous call
	def classify(text: str):
	return classify_text(text)