feat: Implemented sentence-level analysis tools and added file support for analysis
Browse files- .gitignore +4 -0
- app.py +16 -6
- features/text_classifier/__init__.py +1 -0
- features/text_classifier/controller.py +40 -4
- features/text_classifier/inferencer.py +15 -2
- features/text_classifier/routes.py +11 -1
- readme.md +110 -25
- requirements.txt +1 -0
.gitignore
CHANGED
|
@@ -52,6 +52,10 @@ Thumbs.db
|
|
| 52 |
# ---- Project-specific ----
|
| 53 |
Ai-Text-Detector/
|
| 54 |
HuggingFace/model/
|
|
|
|
| 55 |
# ---- Node Projects (if applicable) ----
|
| 56 |
node_modules/
|
|
|
|
|
|
|
| 57 |
|
|
|
|
|
|
| 52 |
# ---- Project-specific ----
|
| 53 |
Ai-Text-Detector/
|
| 54 |
HuggingFace/model/
|
| 55 |
+
|
| 56 |
# ---- Node Projects (if applicable) ----
|
| 57 |
node_modules/
|
| 58 |
+
model/
|
| 59 |
+
models/.gitattributes #<-- This line can stay if you only want to ignore that file, not the whole folder
|
| 60 |
|
| 61 |
+
#
|
app.py
CHANGED
|
@@ -2,19 +2,29 @@ from fastapi import FastAPI
|
|
| 2 |
from contextlib import asynccontextmanager
|
| 3 |
from features.text_classifier.routes import router as text_classifier_router
|
| 4 |
from features.text_classifier.model_loader import warmup
|
| 5 |
-
|
| 6 |
|
| 7 |
@asynccontextmanager
|
| 8 |
async def lifespan(app: FastAPI):
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
yield
|
| 11 |
-
#
|
| 12 |
|
| 13 |
|
| 14 |
-
|
| 15 |
-
app
|
| 16 |
|
|
|
|
| 17 |
|
| 18 |
@app.get("/")
|
| 19 |
def index():
|
| 20 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from contextlib import asynccontextmanager
|
| 3 |
from features.text_classifier.routes import router as text_classifier_router
|
| 4 |
from features.text_classifier.model_loader import warmup
|
| 5 |
+
import nltk
|
| 6 |
|
| 7 |
@asynccontextmanager
|
| 8 |
async def lifespan(app: FastAPI):
|
| 9 |
+
# Ensure punkt is available
|
| 10 |
+
nltk.download("punkt")
|
| 11 |
+
|
| 12 |
+
nltk.download('punkt_tab')
|
| 13 |
+
|
| 14 |
+
# Your model warmup
|
| 15 |
+
warmup()
|
| 16 |
yield
|
| 17 |
+
# Optionally add cleanup here
|
| 18 |
|
| 19 |
|
| 20 |
+
# Pass lifespan handler to FastAPI constructor
|
| 21 |
+
app = FastAPI(lifespan=lifespan)
|
| 22 |
|
| 23 |
+
app.include_router(text_classifier_router, prefix="/text", tags=["Text Classification"])
|
| 24 |
|
| 25 |
@app.get("/")
|
| 26 |
def index():
|
| 27 |
+
return {
|
| 28 |
+
"Message": "FastAPI is running...",
|
| 29 |
+
"Try": "/text/analyze or /text/analyze-sentences"
|
| 30 |
+
}
|
features/text_classifier/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
features/text_classifier/controller.py
CHANGED
|
@@ -2,19 +2,35 @@ from .inferencer import classify_text
|
|
| 2 |
import asyncio
|
| 3 |
from fastapi import HTTPException, UploadFile
|
| 4 |
from .preprocess import parse_docx, parse_pdf, parse_txt
|
|
|
|
| 5 |
|
| 6 |
from io import BytesIO
|
| 7 |
import logging
|
| 8 |
|
| 9 |
|
|
|
|
| 10 |
async def handle_text_analysis(text: str):
|
| 11 |
text = text.strip()
|
| 12 |
if not text or len(text.split()) < 2:
|
| 13 |
raise HTTPException(
|
| 14 |
status_code=400, detail="Text must contain at least two words"
|
| 15 |
)
|
| 16 |
-
label, perplexity = await asyncio.to_thread(classify_text, text)
|
| 17 |
-
return {"result": label, "perplexity": round(perplexity, 2)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
async def handle_file_upload(file: UploadFile):
|
|
@@ -23,8 +39,8 @@ async def handle_file_upload(file: UploadFile):
|
|
| 23 |
if len(file_contents) > 10000:
|
| 24 |
return {"message": "File contains more than 10,000 characters."}
|
| 25 |
cleaned_text = file_contents.replace("\n", "").replace("\t", "")
|
| 26 |
-
label, perplexity = await asyncio.to_thread(classify_text, cleaned_text)
|
| 27 |
-
return {"result": label, "perplexity": round(perplexity, 2)}
|
| 28 |
except Exception as e:
|
| 29 |
logging.error(f"Error processing file: {str(e)}")
|
| 30 |
raise HTTPException(status_code=500, detail="Error processing the file")
|
|
@@ -49,6 +65,26 @@ async def extract_file_contents(file: UploadFile):
|
|
| 49 |
detail="Invalid file type. Only .docx, .pdf, and .txt are allowed.",
|
| 50 |
)
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
def classify(text: str):
|
| 54 |
return classify_text(text)
|
|
|
|
| 2 |
import asyncio
|
| 3 |
from fastapi import HTTPException, UploadFile
|
| 4 |
from .preprocess import parse_docx, parse_pdf, parse_txt
|
| 5 |
+
from nltk.tokenize import sent_tokenize
|
| 6 |
|
| 7 |
from io import BytesIO
|
| 8 |
import logging
|
| 9 |
|
| 10 |
|
| 11 |
+
|
| 12 |
async def handle_text_analysis(text: str):
|
| 13 |
text = text.strip()
|
| 14 |
if not text or len(text.split()) < 2:
|
| 15 |
raise HTTPException(
|
| 16 |
status_code=400, detail="Text must contain at least two words"
|
| 17 |
)
|
| 18 |
+
label, perplexity,ai_likelihood = await asyncio.to_thread(classify_text, text)
|
| 19 |
+
return {"result": label, "perplexity": round(int(perplexity), 2),"ai_likelihood":ai_likelihood}
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
async def handle_file_sentance(file: UploadFile):
|
| 23 |
+
try:
|
| 24 |
+
file_contents = await extract_file_contents(file)
|
| 25 |
+
if len(file_contents) > 10000:
|
| 26 |
+
return {"message": "File contains more than 10,000 characters."}
|
| 27 |
+
cleaned_text = file_contents.replace("\n", "").replace("\t", "")
|
| 28 |
+
result = await handle_sentence_level_analysis(cleaned_text)
|
| 29 |
+
return {"content": file_contents, **result}
|
| 30 |
+
except Exception as e:
|
| 31 |
+
logging.error(f"Error processing file: {str(e)}")
|
| 32 |
+
raise HTTPException(status_code=500, detail="Error processing the file")
|
| 33 |
+
|
| 34 |
|
| 35 |
|
| 36 |
async def handle_file_upload(file: UploadFile):
|
|
|
|
| 39 |
if len(file_contents) > 10000:
|
| 40 |
return {"message": "File contains more than 10,000 characters."}
|
| 41 |
cleaned_text = file_contents.replace("\n", "").replace("\t", "")
|
| 42 |
+
label, perplexity,ai_likelihood = await asyncio.to_thread(classify_text, cleaned_text)
|
| 43 |
+
return {"content":file_contents,"result": label, "perplexity": round(int(perplexity), 2),"ai_likelihood":ai_likelihood}
|
| 44 |
except Exception as e:
|
| 45 |
logging.error(f"Error processing file: {str(e)}")
|
| 46 |
raise HTTPException(status_code=500, detail="Error processing the file")
|
|
|
|
| 65 |
detail="Invalid file type. Only .docx, .pdf, and .txt are allowed.",
|
| 66 |
)
|
| 67 |
|
| 68 |
+
async def handle_sentence_level_analysis(text: str):
|
| 69 |
+
text = text.strip()
|
| 70 |
+
if not text or len(text.split()) < 2:
|
| 71 |
+
raise HTTPException(
|
| 72 |
+
status_code=400, detail="Text must contain at least two words"
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
sentences = sent_tokenize(text,language="english")
|
| 76 |
+
results = []
|
| 77 |
+
|
| 78 |
+
for sentence in sentences:
|
| 79 |
+
label, perplexity, likelihood = await asyncio.to_thread(classify_text, sentence)
|
| 80 |
+
results.append({
|
| 81 |
+
"sentence": sentence,
|
| 82 |
+
"label": label,
|
| 83 |
+
"perplexity": round(perplexity, 2),
|
| 84 |
+
"ai_likelihood": likelihood
|
| 85 |
+
})
|
| 86 |
+
|
| 87 |
+
return {"analysis": results}
|
| 88 |
|
| 89 |
def classify(text: str):
|
| 90 |
return classify_text(text)
|
features/text_classifier/inferencer.py
CHANGED
|
@@ -3,6 +3,19 @@ from .model_loader import get_model_tokenizer
|
|
| 3 |
|
| 4 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
def classify_text(text: str):
|
| 8 |
model, tokenizer = get_model_tokenizer()
|
|
@@ -23,5 +36,5 @@ def classify_text(text: str):
|
|
| 23 |
result = "Probably AI-generated"
|
| 24 |
else:
|
| 25 |
result = "Human-written"
|
| 26 |
-
|
| 27 |
-
return result, perplexity
|
|
|
|
| 3 |
|
| 4 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 5 |
|
| 6 |
+
def perplexity_to_ai_likelihood(ppl: float) -> float:
|
| 7 |
+
# You can tune these parameters
|
| 8 |
+
min_ppl = 10 # very confident it's AI
|
| 9 |
+
max_ppl = 100 # very confident it's human
|
| 10 |
+
|
| 11 |
+
# Clamp to bounds
|
| 12 |
+
ppl = max(min_ppl, min(ppl, max_ppl))
|
| 13 |
+
|
| 14 |
+
# Invert and scale: lower perplexity -> higher AI-likelihood
|
| 15 |
+
likelihood = 1 - ((ppl - min_ppl) / (max_ppl - min_ppl))
|
| 16 |
+
|
| 17 |
+
return round(likelihood * 100, 2)
|
| 18 |
+
|
| 19 |
|
| 20 |
def classify_text(text: str):
|
| 21 |
model, tokenizer = get_model_tokenizer()
|
|
|
|
| 36 |
result = "Probably AI-generated"
|
| 37 |
else:
|
| 38 |
result = "Human-written"
|
| 39 |
+
likelihood_result=perplexity_to_ai_likelihood(perplexity)
|
| 40 |
+
return result, perplexity,likelihood_result
|
features/text_classifier/routes.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File
|
| 2 |
from fastapi.security import HTTPBearer
|
| 3 |
from pydantic import BaseModel
|
| 4 |
-
from .controller import handle_text_analysis, handle_file_upload
|
| 5 |
|
| 6 |
router = APIRouter()
|
| 7 |
bearer_scheme = HTTPBearer()
|
|
@@ -26,3 +26,13 @@ async def upload_file(
|
|
| 26 |
@router.get("/health")
|
| 27 |
def health():
|
| 28 |
return {"status": "ok"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File
|
| 2 |
from fastapi.security import HTTPBearer
|
| 3 |
from pydantic import BaseModel
|
| 4 |
+
from .controller import handle_text_analysis, handle_file_upload, handle_sentence_level_analysis,handle_file_sentance
|
| 5 |
|
| 6 |
router = APIRouter()
|
| 7 |
bearer_scheme = HTTPBearer()
|
|
|
|
| 26 |
@router.get("/health")
|
| 27 |
def health():
|
| 28 |
return {"status": "ok"}
|
| 29 |
+
|
| 30 |
+
@router.post("/analyze-sentences")
|
| 31 |
+
async def analyze_sentences(data: TextInput, token: str = Depends(bearer_scheme)):
|
| 32 |
+
if not data.text:
|
| 33 |
+
raise HTTPException(status_code=400, detail="Missing 'text' in request body")
|
| 34 |
+
return await handle_sentence_level_analysis(data.text)
|
| 35 |
+
|
| 36 |
+
@router.post("/analyze-sentance-file")
|
| 37 |
+
async def AnalyzeSentanceFile(file:UploadFile=File(...),token:str=Depends(bearer_scheme)):
|
| 38 |
+
return await handle_file_sentance(file)
|
readme.md
CHANGED
|
@@ -45,36 +45,54 @@ This command installs all the dependencies listed in the `requirements.txt` file
|
|
| 45 |
|
| 46 |
### **Functions**
|
| 47 |
|
| 48 |
-
1. **`load_model()`**
|
| 49 |
-
Loads the GPT-2 model and tokenizer from specified paths.
|
| 50 |
|
| 51 |
-
2. **`lifespan()`**
|
| 52 |
-
Manages the
|
| 53 |
|
| 54 |
-
3. **`classify_text_sync()`**
|
| 55 |
-
Synchronously tokenizes input text and
|
| 56 |
|
| 57 |
-
4. **`classify_text()`**
|
| 58 |
-
Asynchronously
|
| 59 |
|
| 60 |
-
5. **`analyze_text()`**
|
| 61 |
-
**POST** endpoint:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
-
6. **`health()`**
|
| 64 |
-
**GET** endpoint: simple health check to confirm the API is running.
|
| 65 |
-
7. **`parse_docx() ,parse_pdf(),parse_txt()`**
|
| 66 |
-
THis are the function that are used to convert the given docs, pdf or text files into the strings format so that we can classify them.
|
| 67 |
8. **`warmup()`**
|
| 68 |
-
|
|
|
|
| 69 |
9. **`download_model_repo()`**
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
| 73 |
|
| 74 |
11. **`handle_file_upload()`**
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
---
|
| 80 |
|
|
@@ -103,8 +121,8 @@ This command launches the FastAPI app.
|
|
| 103 |
```
|
| 104 |
- **Response:**
|
| 105 |
```json
|
| 106 |
-
{ "result": "AI-generated", "perplexity": 55.67
|
| 107 |
-
|
| 108 |
|
| 109 |
#### 2. **`/health`**
|
| 110 |
|
|
@@ -121,8 +139,75 @@ This command launches the FastAPI app.
|
|
| 121 |
|
| 122 |
- **Response:**
|
| 123 |
```json
|
| 124 |
-
{ "result": "AI-generated", "perplexity": 55.67
|
| 125 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
---
|
| 127 |
|
| 128 |
### **Running the API**
|
|
@@ -130,7 +215,7 @@ This command launches the FastAPI app.
|
|
| 130 |
Start the server with:
|
| 131 |
|
| 132 |
```bash
|
| 133 |
-
uvicorn app:app --host 0.0.0.0 --port 8000
|
| 134 |
```
|
| 135 |
|
| 136 |
---
|
|
|
|
| 45 |
|
| 46 |
### **Functions**
|
| 47 |
|
| 48 |
+
1. **`load_model()`**
|
| 49 |
+
Loads the GPT-2 model and tokenizer from the specified directory paths.
|
| 50 |
|
| 51 |
+
2. **`lifespan()`**
|
| 52 |
+
Manages the application lifecycle. It initializes the model at startup and performs cleanup during shutdown.
|
| 53 |
|
| 54 |
+
3. **`classify_text_sync()`**
|
| 55 |
+
Synchronously tokenizes the input text and performs classification using the GPT-2 model. Returns both the classification result and perplexity score.
|
| 56 |
|
| 57 |
+
4. **`classify_text()`**
|
| 58 |
+
Asynchronously runs `classify_text_sync()` in a thread pool for non-blocking text classification.
|
| 59 |
|
| 60 |
+
5. **`analyze_text()`**
|
| 61 |
+
**POST** endpoint: Accepts text input, classifies it using `classify_text()`, and returns the result along with perplexity.
|
| 62 |
+
|
| 63 |
+
6. **`health()`**
|
| 64 |
+
**GET** endpoint: Performs a simple health check to confirm the API is operational.
|
| 65 |
+
|
| 66 |
+
7. **`parse_docx()`, `parse_pdf()`, `parse_txt()`**
|
| 67 |
+
Utility functions to extract and convert the contents of `.docx`, `.pdf`, and `.txt` files into plain text for classification.
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
8. **`warmup()`**
|
| 70 |
+
Downloads the model repository and initializes the model and tokenizer using the `load_model()` function.
|
| 71 |
+
|
| 72 |
9. **`download_model_repo()`**
|
| 73 |
+
Handles downloading the model files from the designated `MODEL` folder.
|
| 74 |
+
|
| 75 |
+
10. **`get_model_tokenizer()`**
|
| 76 |
+
Similar to `warmup()`, but includes a check to see if the model already exists. If not, it downloads the model; otherwise, it uses the previously downloaded one.
|
| 77 |
|
| 78 |
11. **`handle_file_upload()`**
|
| 79 |
+
Manages file uploads from the `/upload` route. Extracts text from the uploaded file, classifies it, and returns the results.
|
| 80 |
+
|
| 81 |
+
12. **`extract_file_contents()`**
|
| 82 |
+
Extracts and returns plain text content from uploaded files (e.g., PDF, DOCX, TXT).
|
| 83 |
+
|
| 84 |
+
13. **`handle_file_sentence()`**
|
| 85 |
+
Processes uploaded files by analyzing each sentence. Ensures the total file text is under 10,000 characters before classification.
|
| 86 |
+
|
| 87 |
+
14. **`handle_sentence_level_analysis()`**
|
| 88 |
+
Strips and checks each sentence’s length, then evaluates the likelihood of AI vs. human generation for each sentence.
|
| 89 |
+
|
| 90 |
+
15. **`analyze_sentences()`**
|
| 91 |
+
Divides long paragraphs into individual sentences, classifies each one, and returns a list of their classification results.
|
| 92 |
+
|
| 93 |
+
16. **`analyze_sentence_file()`**
|
| 94 |
+
A route function that analyzes sentences in uploaded files, similar to `handle_file_sentence()`.
|
| 95 |
+
|
| 96 |
|
| 97 |
---
|
| 98 |
|
|
|
|
| 121 |
```
|
| 122 |
- **Response:**
|
| 123 |
```json
|
| 124 |
+
{ "result": "AI-generated", "perplexity": 55.67,"ai_likelihood":66.6%}
|
| 125 |
+
```
|
| 126 |
|
| 127 |
#### 2. **`/health`**
|
| 128 |
|
|
|
|
| 139 |
|
| 140 |
- **Response:**
|
| 141 |
```json
|
| 142 |
+
{ "result": "AI-generated", "perplexity": 55.67,"ai_likelihood":66.6%}
|
| 143 |
```
|
| 144 |
+
#### 4. **`/text/analyze_sentence_file`**
|
| 145 |
+
- **Method:** `POST`
|
| 146 |
+
- **Description:** Takes the files and check the contains inside and returns the results
|
| 147 |
+
- **Request:** Files
|
| 148 |
+
|
| 149 |
+
- **Response:**
|
| 150 |
+
```json
|
| 151 |
+
{
|
| 152 |
+
"content": "Artificial Intelligence (AI) and Machine Learning (ML) are rapidly transforming the way we \ninteract with technology. AI refers to the broader concept of machines being able to carry out \ntasks in a way that we would consider \"smart,\" while ML is a subset of AI that focuses on the \ndevelopment of algorithms that allow computers to learn from and make decisions based on \ndata. These technologies are behind innovations such as voice assistants, recommendation \nsystems, self-driving cars, and medical diagnosis tools. By analyzing large amounts of data, \nAI and ML can identify patterns, make predictions, and continuously improve their \nperformance over time, making them essential tools in modern industries ranging from \nhealthcare and finance to education and entertainment. \n \n",
|
| 153 |
+
"analysis": [
|
| 154 |
+
{
|
| 155 |
+
"sentence": "Artificial Intelligence (AI) and Machine Learning (ML) are rapidly transforming the way we interact with technology.",
|
| 156 |
+
"label": "AI-generated",
|
| 157 |
+
"perplexity": 8.17,
|
| 158 |
+
"ai_likelihood": 100
|
| 159 |
+
},
|
| 160 |
+
{
|
| 161 |
+
"sentence": "AI refers to the broader concept of machines being able to carry out tasks in a way that we would consider \"smart,\" while ML is a subset of AI that focuses on the development of algorithms that allow computers to learn from and make decisions based on data.",
|
| 162 |
+
"label": "AI-generated",
|
| 163 |
+
"perplexity": 19.34,
|
| 164 |
+
"ai_likelihood": 89.62
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"sentence": "These technologies are behind innovations such as voice assistants, recommendation systems, self-driving cars, and medical diagnosis tools.",
|
| 168 |
+
"label": "AI-generated",
|
| 169 |
+
"perplexity": 40.31,
|
| 170 |
+
"ai_likelihood": 66.32
|
| 171 |
+
},
|
| 172 |
+
{
|
| 173 |
+
"sentence": "By analyzing large amounts of data, AI and ML can identify patterns, make predictions, and continuously improve their performance over time, making them essential tools in modern industries ranging from healthcare and finance to education and entertainment.",
|
| 174 |
+
"label": "AI-generated",
|
| 175 |
+
"perplexity": 26.15,
|
| 176 |
+
"ai_likelihood": 82.05
|
| 177 |
+
}
|
| 178 |
+
]
|
| 179 |
+
}```
|
| 180 |
+
|
| 181 |
+
#### 5. **`/text/analyze_sentences`**
|
| 182 |
+
- **Method:** `POST`
|
| 183 |
+
- **Description:** Takes the text and check the contains inside and returns the results
|
| 184 |
+
- **Request:**
|
| 185 |
+
```json
|
| 186 |
+
{
|
| 187 |
+
"text": "This is an test text. This is an another Text "
|
| 188 |
+
}
|
| 189 |
+
```
|
| 190 |
+
|
| 191 |
+
- **Response:**
|
| 192 |
+
```json
|
| 193 |
+
{
|
| 194 |
+
"analysis": [
|
| 195 |
+
{
|
| 196 |
+
"sentence": "This is an test text.",
|
| 197 |
+
"label": "Human-written",
|
| 198 |
+
"perplexity": 510.28,
|
| 199 |
+
"ai_likelihood": 0
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"sentence": "This is an another Text",
|
| 203 |
+
"label": "Human-written",
|
| 204 |
+
"perplexity": 3926.05,
|
| 205 |
+
"ai_likelihood": 0
|
| 206 |
+
}
|
| 207 |
+
]
|
| 208 |
+
}```
|
| 209 |
+
|
| 210 |
+
|
| 211 |
---
|
| 212 |
|
| 213 |
### **Running the API**
|
|
|
|
| 215 |
Start the server with:
|
| 216 |
|
| 217 |
```bash
|
| 218 |
+
uvicorn app:app --host 0.0.0.0 --port 8000
|
| 219 |
```
|
| 220 |
|
| 221 |
---
|
requirements.txt
CHANGED
|
@@ -8,4 +8,5 @@ python-docx
|
|
| 8 |
PyMuPDF
|
| 9 |
pydantic
|
| 10 |
fitz
|
|
|
|
| 11 |
python-multipart
|
|
|
|
| 8 |
PyMuPDF
|
| 9 |
pydantic
|
| 10 |
fitz
|
| 11 |
+
nltk
|
| 12 |
python-multipart
|