File size: 3,254 Bytes
0117df3
 
 
 
88da32f
0117df3
 
 
 
 
88da32f
0117df3
 
 
 
 
 
88da32f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0117df3
 
 
 
 
 
 
 
88da32f
 
0117df3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88da32f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0117df3
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from .inferencer import classify_text
import asyncio
from fastapi import HTTPException, UploadFile
from .preprocess import parse_docx, parse_pdf, parse_txt
from nltk.tokenize import sent_tokenize

from io import BytesIO
import logging



async def handle_text_analysis(text: str):
    text = text.strip()
    if not text or len(text.split()) < 2:
        raise HTTPException(
            status_code=400, detail="Text must contain at least two words"
        )
    label, perplexity,ai_likelihood = await asyncio.to_thread(classify_text, text)
    return {"result": label, "perplexity": round(int(perplexity), 2),"ai_likelihood":ai_likelihood}


async def handle_file_sentance(file: UploadFile):
    try:
        file_contents = await extract_file_contents(file)
        if len(file_contents) > 10000:
            return {"message": "File contains more than 10,000 characters."}
        cleaned_text = file_contents.replace("\n", "").replace("\t", "")
        result = await handle_sentence_level_analysis(cleaned_text)
        return {"content": file_contents, **result}
    except Exception as e:
        logging.error(f"Error processing file: {str(e)}")
        raise HTTPException(status_code=500, detail="Error processing the file")



async def handle_file_upload(file: UploadFile):
    try:
        file_contents = await extract_file_contents(file)
        if len(file_contents) > 10000:
            return {"message": "File contains more than 10,000 characters."}
        cleaned_text = file_contents.replace("\n", "").replace("\t", "")
        label, perplexity,ai_likelihood = await asyncio.to_thread(classify_text, cleaned_text)
        return {"content":file_contents,"result": label, "perplexity": round(int(perplexity), 2),"ai_likelihood":ai_likelihood}
    except Exception as e:
        logging.error(f"Error processing file: {str(e)}")
        raise HTTPException(status_code=500, detail="Error processing the file")


async def extract_file_contents(file: UploadFile):
    content = await file.read()
    file_stream = BytesIO(content)

    if (
        file.content_type
        == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
    ):
        return parse_docx(file_stream)
    elif file.content_type == "application/pdf":
        return parse_pdf(file_stream)
    elif file.content_type == "text/plain":
        return parse_txt(file_stream)
    else:
        raise HTTPException(
            status_code=400,
            detail="Invalid file type. Only .docx, .pdf, and .txt are allowed.",
        )

async def handle_sentence_level_analysis(text: str):
    text = text.strip()
    if not text or len(text.split()) < 2:
        raise HTTPException(
            status_code=400, detail="Text must contain at least two words"
        )

    sentences = sent_tokenize(text,language="english")
    results = []

    for sentence in sentences:
        label, perplexity, likelihood = await asyncio.to_thread(classify_text, sentence)
        results.append({
            "sentence": sentence,
            "label": label,
            "perplexity": round(perplexity, 2),
            "ai_likelihood": likelihood
        })

    return {"analysis": results}

def classify(text: str):
    return classify_text(text)