Spaces:

Pujan-Dev
/

AI_API

Sleeping

App Files Files Community

Pujan-Dev commited on May 21, 2025

Commit

9afba1d

1 Parent(s): 753c2d1

refact: added the base 10,000 words limit

Browse files

Files changed (7) hide show

.gitignore +1 -1
app.py +18 -8
config.py +1 -0
features/text_classifier/controller.py +38 -32
features/text_classifier/preprocess.py +3 -0
features/text_classifier/routes.py +3 -0
requirements.txt +1 -0

.gitignore CHANGED Viewed

@@ -58,4 +58,4 @@ node_modules/
 model/
 models/.gitattributes  #<-- This line can stay if you only want to ignore that file, not the whole folder
-#

 model/
 models/.gitattributes  #<-- This line can stay if you only want to ignore that file, not the whole folder
+todo.md

app.py CHANGED Viewed

@@ -1,26 +1,36 @@
-from fastapi import FastAPI
 from contextlib import asynccontextmanager
 from features.text_classifier.routes import router as text_classifier_router
 from features.text_classifier.model_loader import warmup
 import nltk
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    # Your model warmup
     warmup()
     yield
-    # Optionally add cleanup here
-# Pass lifespan handler to FastAPI constructor
 app = FastAPI(lifespan=lifespan)
-app.include_router(text_classifier_router, prefix="/text", tags=["Text Classification"])
 @app.get("/")
 def index():
     return {
         "Message": "FastAPI is running...",
-        "Try": "/text/analyze or /text/analyze-sentences"
     }

+# app.py
+from fastapi import FastAPI, Request
 from contextlib import asynccontextmanager
 from features.text_classifier.routes import router as text_classifier_router
 from features.text_classifier.model_loader import warmup
 import nltk
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.util import get_remote_address
+from slowapi.errors import RateLimitExceeded
+import requests
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     warmup()
     yield
+limiter = Limiter(key_func=get_remote_address)
 app = FastAPI(lifespan=lifespan)
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+app.include_router(text_classifier_router, prefix="/text")
 @app.get("/")
 def index():
     return {
         "Message": "FastAPI is running...",
+        "Try": "/text/analyse or /text/analyse-sentences"
     }
+@app.get("/home")
+@limiter.limit("5/minute")
+async def homepage(request: Request):
+    return {"msg": "This is a good message"}

config.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ ACCESS_RATE=10

features/text_classifier/controller.py CHANGED Viewed

@@ -1,73 +1,75 @@
 from .inferencer import classify_text
 import asyncio
-from fastapi import HTTPException, UploadFile,status,Depends
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from .preprocess import parse_docx, parse_pdf, parse_txt
 from nltk.tokenize import sent_tokenize
 import os
 from io import BytesIO
 import logging
 security = HTTPBearer()
 async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
     token = credentials.credentials
-    if token != os.getenv("MY_SECRET_TOKEN"):  # Replace with your actual secret
         raise HTTPException(
             status_code=status.HTTP_403_FORBIDDEN,
             detail="Invalid or expired token"
         )
     return token
 async def handle_text_analysis(text: str):
     text = text.strip()
     if not text or len(text.split()) < 2:
-        raise HTTPException(
-            status_code=400, detail="Text must contain at least two words"
-        )
-    label, perplexity,ai_likelihood = await asyncio.to_thread(classify_text, text)
-    return {"result": label, "perplexity": round(int(perplexity), 2),"ai_likelihood":ai_likelihood}
 async def handle_file_sentance(file: UploadFile):
     try:
         file_contents = await extract_file_contents(file)
         if len(file_contents) > 10000:
             return {"message": "File contains more than 10,000 characters."}
-        cleaned_text = file_contents.replace("\n", "").replace("\t", "")
         result = await handle_sentence_level_analysis(cleaned_text)
         return {"content": file_contents, **result}
     except Exception as e:
         logging.error(f"Error processing file: {str(e)}")
         raise HTTPException(status_code=500, detail="Error processing the file")
 async def handle_file_upload(file: UploadFile):
     try:
         file_contents = await extract_file_contents(file)
         if len(file_contents) > 10000:
             return {"message": "File contains more than 10,000 characters."}
-        cleaned_text = file_contents.replace("\n", "").replace("\t", "")
-        label, perplexity,ai_likelihood = await asyncio.to_thread(classify_text, cleaned_text)
-        return {"content":file_contents,"result": label, "perplexity": round(int(perplexity), 2),"ai_likelihood":ai_likelihood}
     except Exception as e:
         logging.error(f"Error processing file: {str(e)}")
         raise HTTPException(status_code=500, detail="Error processing the file")
 async def extract_file_contents(file: UploadFile):
     content = await file.read()
     file_stream = BytesIO(content)
-    if (
-        file.content_type
-        == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
-    ):
         return parse_docx(file_stream)
     elif file.content_type == "application/pdf":
         return parse_pdf(file_stream)
@@ -76,20 +78,23 @@ async def extract_file_contents(file: UploadFile):
     else:
         raise HTTPException(
             status_code=400,
-            detail="Invalid file type. Only .docx, .pdf, and .txt are allowed.",
         )
 async def handle_sentence_level_analysis(text: str):
     text = text.strip()
     if not text or len(text.split()) < 2:
-        raise HTTPException(
-            status_code=400, detail="Text must contain at least two words"
-        )
-    sentences = sent_tokenize(text,language="english")
-    results = []
     for sentence in sentences:
         label, perplexity, likelihood = await asyncio.to_thread(classify_text, sentence)
         results.append({
             "sentence": sentence,
@@ -97,8 +102,9 @@ async def handle_sentence_level_analysis(text: str):
             "perplexity": round(perplexity, 2),
             "ai_likelihood": likelihood
         })
     return {"analysis": results}
 def classify(text: str):
     return classify_text(text)

 from .inferencer import classify_text
 import asyncio
+from fastapi import HTTPException, UploadFile, status, Depends,requests
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from .preprocess import parse_docx, parse_pdf, parse_txt
 from nltk.tokenize import sent_tokenize
 import os
 from io import BytesIO
 import logging
+import requests
 security = HTTPBearer()
+# Token verification
 async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
     token = credentials.credentials
+    if token != os.getenv("MY_SECRET_TOKEN"):
         raise HTTPException(
             status_code=status.HTTP_403_FORBIDDEN,
             detail="Invalid or expired token"
         )
     return token
+# Text classification
 async def handle_text_analysis(text: str):
     text = text.strip()
     if not text or len(text.split()) < 2:
+        raise HTTPException(status_code=400, detail="Text must contain at least two words")
+    if len(text) > 10000:
+        raise HTTPException(status_code=400, detail="The text should be less than 10,000 characters.")
+    label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, text)
+    return {"result": label, "perplexity": round(perplexity, 2), "ai_likelihood": ai_likelihood}
+# File sentence-level analysis
 async def handle_file_sentance(file: UploadFile):
     try:
         file_contents = await extract_file_contents(file)
         if len(file_contents) > 10000:
             return {"message": "File contains more than 10,000 characters."}
+        cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
+        if not cleaned_text:
+            raise HTTPException(status_code=400, detail="The file is empty or only contains whitespace.")
         result = await handle_sentence_level_analysis(cleaned_text)
         return {"content": file_contents, **result}
     except Exception as e:
         logging.error(f"Error processing file: {str(e)}")
         raise HTTPException(status_code=500, detail="Error processing the file")
+# File-level classification
 async def handle_file_upload(file: UploadFile):
     try:
         file_contents = await extract_file_contents(file)
         if len(file_contents) > 10000:
             return {"message": "File contains more than 10,000 characters."}
+        cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
+        if not cleaned_text:
+            raise HTTPException(status_code=400, detail="The file is empty or only contains whitespace.")
+        label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, cleaned_text)
+        return {
+            "content": file_contents,
+            "result": label,
+            "perplexity": round(perplexity, 2),
+            "ai_likelihood": ai_likelihood
+        }
     except Exception as e:
         logging.error(f"Error processing file: {str(e)}")
         raise HTTPException(status_code=500, detail="Error processing the file")
+# File extraction
 async def extract_file_contents(file: UploadFile):
     content = await file.read()
     file_stream = BytesIO(content)
+    if file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
         return parse_docx(file_stream)
     elif file.content_type == "application/pdf":
         return parse_pdf(file_stream)
     else:
         raise HTTPException(
             status_code=400,
+            detail="Invalid file type. Only .docx, .pdf, and .txt are allowed."
         )
+# Sentence-level analysis
 async def handle_sentence_level_analysis(text: str):
     text = text.strip()
     if not text or len(text.split()) < 2:
+        raise HTTPException(status_code=400, detail="Text must contain at least two words")
+    if len(text) > 10000:
+        raise HTTPException(status_code=400, detail="Text must be less than 10,000 characters.")
+    sentences = sent_tokenize(text, language="english")
+    results = []
     for sentence in sentences:
+        if not sentence.strip():
+            continue
         label, perplexity, likelihood = await asyncio.to_thread(classify_text, sentence)
         results.append({
             "sentence": sentence,
             "perplexity": round(perplexity, 2),
             "ai_likelihood": likelihood
         })
     return {"analysis": results}
+# Synchronous call
 def classify(text: str):
     return classify_text(text)

features/text_classifier/preprocess.py CHANGED Viewed

@@ -3,6 +3,8 @@ import docx
 from io import BytesIO
 import logging
 from fastapi import HTTPException
 def parse_docx(file: BytesIO):
     doc = docx.Document(file)
     text = ""
@@ -27,3 +29,4 @@ def parse_pdf(file: BytesIO):
 def parse_txt(file: BytesIO):
     return file.read().decode("utf-8")

 from io import BytesIO
 import logging
 from fastapi import HTTPException
 def parse_docx(file: BytesIO):
     doc = docx.Document(file)
     text = ""
 def parse_txt(file: BytesIO):
     return file.read().decode("utf-8")

features/text_classifier/routes.py CHANGED Viewed

@@ -3,6 +3,9 @@ from fastapi.security import HTTPBearer
 from pydantic import BaseModel
 from .controller import handle_text_analysis, handle_file_upload, handle_sentence_level_analysis, handle_file_sentance
 from .controller import verify_token
 router = APIRouter()
 security = HTTPBearer()

 from pydantic import BaseModel
 from .controller import handle_text_analysis, handle_file_upload, handle_sentence_level_analysis, handle_file_sentance
 from .controller import verify_token
 router = APIRouter()
 security = HTTPBearer()

requirements.txt CHANGED Viewed

@@ -8,4 +8,5 @@ python-docx
 pydantic
 PyMuPDF
 nltk
 python-multipart

 pydantic
 PyMuPDF
 nltk
+fastapi-limiter
 python-multipart