from .inferencer import classify_text import asyncio from fastapi import HTTPException, UploadFile from .preprocess import parse_docx, parse_pdf, parse_txt from nltk.tokenize import sent_tokenize from io import BytesIO import logging async def handle_text_analysis(text: str): text = text.strip() if not text or len(text.split()) < 2: raise HTTPException( status_code=400, detail="Text must contain at least two words" ) label, perplexity,ai_likelihood = await asyncio.to_thread(classify_text, text) return {"result": label, "perplexity": round(int(perplexity), 2),"ai_likelihood":ai_likelihood} async def handle_file_sentance(file: UploadFile): try: file_contents = await extract_file_contents(file) if len(file_contents) > 10000: return {"message": "File contains more than 10,000 characters."} cleaned_text = file_contents.replace("\n", "").replace("\t", "") result = await handle_sentence_level_analysis(cleaned_text) return {"content": file_contents, **result} except Exception as e: logging.error(f"Error processing file: {str(e)}") raise HTTPException(status_code=500, detail="Error processing the file") async def handle_file_upload(file: UploadFile): try: file_contents = await extract_file_contents(file) if len(file_contents) > 10000: return {"message": "File contains more than 10,000 characters."} cleaned_text = file_contents.replace("\n", "").replace("\t", "") label, perplexity,ai_likelihood = await asyncio.to_thread(classify_text, cleaned_text) return {"content":file_contents,"result": label, "perplexity": round(int(perplexity), 2),"ai_likelihood":ai_likelihood} except Exception as e: logging.error(f"Error processing file: {str(e)}") raise HTTPException(status_code=500, detail="Error processing the file") async def extract_file_contents(file: UploadFile): content = await file.read() file_stream = BytesIO(content) if ( file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ): return parse_docx(file_stream) elif file.content_type == "application/pdf": return parse_pdf(file_stream) elif file.content_type == "text/plain": return parse_txt(file_stream) else: raise HTTPException( status_code=400, detail="Invalid file type. Only .docx, .pdf, and .txt are allowed.", ) async def handle_sentence_level_analysis(text: str): text = text.strip() if not text or len(text.split()) < 2: raise HTTPException( status_code=400, detail="Text must contain at least two words" ) sentences = sent_tokenize(text,language="english") results = [] for sentence in sentences: label, perplexity, likelihood = await asyncio.to_thread(classify_text, sentence) results.append({ "sentence": sentence, "label": label, "perplexity": round(perplexity, 2), "ai_likelihood": likelihood }) return {"analysis": results} def classify(text: str): return classify_text(text)