File size: 4,460 Bytes
0117df3 9afba1d 753c2d1 0117df3 88da32f 753c2d1 0117df3 9afba1d 753c2d1 9afba1d 753c2d1 9afba1d 753c2d1 9afba1d 0117df3 805e1e5 9afba1d 805e1e5 9afba1d 88da32f 9afba1d 88da32f 9afba1d f64e40d 88da32f 9afba1d 0117df3 9afba1d f64e40d 9afba1d 0117df3 9afba1d 0117df3 9afba1d 0117df3 f64e40d 9afba1d 0117df3 9afba1d 88da32f f64e40d 88da32f 9afba1d f64e40d 88da32f 9afba1d 88da32f 9afba1d 88da32f 0117df3 9afba1d 0117df3 9afba1d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | from .inferencer import classify_text
import asyncio
from fastapi import HTTPException, UploadFile, status, Depends,requests
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from .preprocess import parse_docx, parse_pdf, parse_txt
from nltk.tokenize import sent_tokenize
import os
from io import BytesIO
import logging
import requests
security = HTTPBearer()
# Token verification
async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
token = credentials.credentials
if token != os.getenv("MY_SECRET_TOKEN"):
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Invalid or expired token"
)
return token
# Text classification
async def handle_text_analysis(text: str):
text = text.strip()
if not text or len(text.split()) < 10:
raise HTTPException(status_code=400, detail="Text must contain at least two words")
if len(text) > 10000:
raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters.")
label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, text)
return {"result": label, "perplexity": round(perplexity, 2), "ai_likelihood": ai_likelihood}
# File sentence-level analysis
async def handle_file_sentance(file: UploadFile):
try:
file_contents = await extract_file_contents(file)
if len(file_contents) > 10000:
return {"message": "File contains more than 10,000 characters."}
cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
if not cleaned_text:
raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
result = await handle_sentence_level_analysis(cleaned_text)
return {"content": file_contents, **result}
except Exception as e:
logging.error(f"Error processing file: {str(e)}")
raise HTTPException(status_code=500, detail="Error processing the file")
# File-level classification
async def handle_file_upload(file: UploadFile):
try:
file_contents = await extract_file_contents(file)
if len(file_contents) > 10000:
return {"message": "File contains more than 10,000 characters."}
cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
if not cleaned_text:
raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, cleaned_text)
return {
"content": file_contents,
"result": label,
"perplexity": round(perplexity, 2),
"ai_likelihood": ai_likelihood
}
except Exception as e:
logging.error(f"Error processing file: {str(e)}")
raise HTTPException(status_code=500, detail="Error processing the file")
# File extraction
async def extract_file_contents(file: UploadFile):
content = await file.read()
file_stream = BytesIO(content)
if file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
return parse_docx(file_stream)
elif file.content_type == "application/pdf":
return parse_pdf(file_stream)
elif file.content_type == "text/plain":
return parse_txt(file_stream)
else:
raise HTTPException(
status_code=404,
detail="Invalid file type. Only .docx, .pdf, and .txt are allowed."
)
# Sentence-level analysis
async def handle_sentence_level_analysis(text: str):
text = text.strip()
if not text or len(text.split()) < 2:
raise HTTPException(status_code=413, detail="Text must contain at least two words")
if len(text) > 10000:
raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters.")
sentences = sent_tokenize(text, language="english")
results = []
for sentence in sentences:
if not sentence.strip():
continue
label, perplexity, likelihood = await asyncio.to_thread(classify_text, sentence)
results.append({
"sentence": sentence,
"label": label,
"perplexity": round(perplexity, 2),
"ai_likelihood": likelihood
})
return {"analysis": results}
# Synchronous call
def classify(text: str):
return classify_text(text)
|