File size: 4,460 Bytes
0117df3
 
9afba1d
753c2d1
0117df3
88da32f
753c2d1
0117df3
 
9afba1d
753c2d1
 
9afba1d
753c2d1
 
9afba1d
753c2d1
 
 
 
 
 
9afba1d
0117df3
 
805e1e5
9afba1d
 
805e1e5
9afba1d
 
88da32f
9afba1d
88da32f
 
 
 
 
9afba1d
 
f64e40d
88da32f
 
 
 
 
 
9afba1d
0117df3
 
 
 
 
9afba1d
 
f64e40d
9afba1d
 
 
 
 
 
 
0117df3
 
 
 
9afba1d
0117df3
 
 
9afba1d
0117df3
 
 
 
 
 
 
f64e40d
9afba1d
0117df3
 
9afba1d
88da32f
 
 
f64e40d
88da32f
9afba1d
f64e40d
88da32f
9afba1d
 
88da32f
9afba1d
 
88da32f
 
 
 
 
 
 
 
0117df3
9afba1d
0117df3
 
9afba1d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from .inferencer import classify_text
import asyncio
from fastapi import HTTPException, UploadFile, status, Depends,requests
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from .preprocess import parse_docx, parse_pdf, parse_txt
from nltk.tokenize import sent_tokenize
import os
from io import BytesIO
import logging
import requests
security = HTTPBearer()

# Token verification
async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
    token = credentials.credentials
    if token != os.getenv("MY_SECRET_TOKEN"):
        raise HTTPException(
            status_code=status.HTTP_403_FORBIDDEN,
            detail="Invalid or expired token"
        )
    return token

# Text classification
async def handle_text_analysis(text: str):
    text = text.strip()
    if not text or len(text.split()) < 10:
        raise HTTPException(status_code=400, detail="Text must contain at least two words")
    if len(text) > 10000:
        raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters.")
    label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, text)
    return {"result": label, "perplexity": round(perplexity, 2), "ai_likelihood": ai_likelihood}

# File sentence-level analysis
async def handle_file_sentance(file: UploadFile):
    try:
        file_contents = await extract_file_contents(file)
        if len(file_contents) > 10000:
            return {"message": "File contains more than 10,000 characters."}
        cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
        if not cleaned_text:
            raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
        result = await handle_sentence_level_analysis(cleaned_text)
        return {"content": file_contents, **result}
    except Exception as e:
        logging.error(f"Error processing file: {str(e)}")
        raise HTTPException(status_code=500, detail="Error processing the file")

# File-level classification
async def handle_file_upload(file: UploadFile):
    try:
        file_contents = await extract_file_contents(file)
        if len(file_contents) > 10000:
            return {"message": "File contains more than 10,000 characters."}
        cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
        if not cleaned_text:
            raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
        label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, cleaned_text)
        return {
            "content": file_contents,
            "result": label,
            "perplexity": round(perplexity, 2),
            "ai_likelihood": ai_likelihood
        }
    except Exception as e:
        logging.error(f"Error processing file: {str(e)}")
        raise HTTPException(status_code=500, detail="Error processing the file")

# File extraction
async def extract_file_contents(file: UploadFile):
    content = await file.read()
    file_stream = BytesIO(content)
    if file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
        return parse_docx(file_stream)
    elif file.content_type == "application/pdf":
        return parse_pdf(file_stream)
    elif file.content_type == "text/plain":
        return parse_txt(file_stream)
    else:
        raise HTTPException(
            status_code=404,
            detail="Invalid file type. Only .docx, .pdf, and .txt are allowed."
        )

# Sentence-level analysis
async def handle_sentence_level_analysis(text: str):
    text = text.strip()
    if not text or len(text.split()) < 2:
        raise HTTPException(status_code=413, detail="Text must contain at least two words")

    if len(text) > 10000:
        raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters.")

    sentences = sent_tokenize(text, language="english")
    results = []
    for sentence in sentences:
        if not sentence.strip():
            continue
        label, perplexity, likelihood = await asyncio.to_thread(classify_text, sentence)
        results.append({
            "sentence": sentence,
            "label": label,
            "perplexity": round(perplexity, 2),
            "ai_likelihood": likelihood
        })
    return {"analysis": results}

# Synchronous call
def classify(text: str):
    return classify_text(text)