Spaces:

can-org
/

Testing-AI-Contain

Sleeping

App Files Files Community

Pujan-Dev commited on May 22, 2025

Commit

eb5aac2

1 Parent(s): 7b30c7c

refact: refactor some codes and typos are fixed

Browse files

Files changed (2) hide show

features/text_classifier/controller.py +67 -53
features/text_classifier/routes.py +2 -2

features/text_classifier/controller.py CHANGED Viewed

@@ -1,59 +1,71 @@
-from .inferencer import classify_text
 import asyncio
-from fastapi import HTTPException, UploadFile, status, Depends,requests
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
-from .preprocess import parse_docx, parse_pdf, parse_txt
 from nltk.tokenize import sent_tokenize
-import os
-from io import BytesIO
-import logging
-import requests
 security = HTTPBearer()
-# Token verification
 async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
     token = credentials.credentials
-    if token != os.getenv("MY_SECRET_TOKEN"):
         raise HTTPException(
             status_code=status.HTTP_403_FORBIDDEN,
             detail="Invalid or expired token"
         )
     return token
-# Text classification
 async def handle_text_analysis(text: str):
     text = text.strip()
     if not text or len(text.split()) < 10:
-        raise HTTPException(status_code=400, detail="Text must contain at least two words")
     if len(text) > 10000:
-        raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters.")
     label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, text)
-    return {"result": label, "perplexity": round(perplexity, 2), "ai_likelihood": ai_likelihood}
-# File sentence-level analysis
-async def handle_file_sentance(file: UploadFile):
-    try:
-        file_contents = await extract_file_contents(file)
-        if len(file_contents) > 10000:
-            return {"message": "File contains more than 10,000 characters."}
-        cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
-        if not cleaned_text:
-            raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
-        result = await handle_sentence_level_analysis(cleaned_text)
-        return {"content": file_contents, **result}
-    except Exception as e:
-        logging.error(f"Error processing file: {str(e)}")
-        raise HTTPException(status_code=500, detail="Error processing the file")
-# File-level classification
 async def handle_file_upload(file: UploadFile):
     try:
         file_contents = await extract_file_contents(file)
         if len(file_contents) > 10000:
             return {"message": "File contains more than 10,000 characters."}
         cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
         if not cleaned_text:
             raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
         label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, cleaned_text)
         return {
             "content": file_contents,
@@ -62,49 +74,51 @@ async def handle_file_upload(file: UploadFile):
             "ai_likelihood": ai_likelihood
         }
     except Exception as e:
-        logging.error(f"Error processing file: {str(e)}")
         raise HTTPException(status_code=500, detail="Error processing the file")
-# File extraction
-async def extract_file_contents(file: UploadFile):
-    content = await file.read()
-    file_stream = BytesIO(content)
-    if file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
-        return parse_docx(file_stream)
-    elif file.content_type == "application/pdf":
-        return parse_pdf(file_stream)
-    elif file.content_type == "text/plain":
-        return parse_txt(file_stream)
-    else:
-        raise HTTPException(
-            status_code=404,
-            detail="Invalid file type. Only .docx, .pdf, and .txt are allowed."
-        )
-# Sentence-level analysis
 async def handle_sentence_level_analysis(text: str):
     text = text.strip()
-    if not text or len(text.split()) < 2:
-        raise HTTPException(status_code=413, detail="Text must contain at least two words")
     if len(text) > 10000:
-        raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters.")
     sentences = sent_tokenize(text, language="english")
     results = []
     for sentence in sentences:
         if not sentence.strip():
             continue
-        label, perplexity, likelihood = await asyncio.to_thread(classify_text, sentence)
         results.append({
             "sentence": sentence,
             "label": label,
             "perplexity": round(perplexity, 2),
-            "ai_likelihood": likelihood
         })
     return {"analysis": results}
-# Synchronous call
 def classify(text: str):
     return classify_text(text)

+import os
 import asyncio
+import logging
+from io import BytesIO
+from fastapi import HTTPException, UploadFile, status, Depends
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 from nltk.tokenize import sent_tokenize
+from .inferencer import classify_text
+from .preprocess import parse_docx, parse_pdf, parse_txt
 security = HTTPBearer()
+# Verify Bearer token from Authorization header
 async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
     token = credentials.credentials
+    expected_token = os.getenv("MY_SECRET_TOKEN")
+    if token != expected_token:
         raise HTTPException(
             status_code=status.HTTP_403_FORBIDDEN,
             detail="Invalid or expired token"
         )
     return token
+# Classify plain text input
 async def handle_text_analysis(text: str):
     text = text.strip()
     if not text or len(text.split()) < 10:
+        raise HTTPException(status_code=400, detail="Text must contain at least 10 words")
     if len(text) > 10000:
+        raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
     label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, text)
+    return {
+        "result": label,
+        "perplexity": round(perplexity, 2),
+        "ai_likelihood": ai_likelihood
+    }
+# Extract text from uploaded files (.docx, .pdf, .txt)
+async def extract_file_contents(file: UploadFile) -> str:
+    content = await file.read()
+    file_stream = BytesIO(content)
+    if file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+        return parse_docx(file_stream)
+    elif file.content_type == "application/pdf":
+        return parse_pdf(file_stream)
+    elif file.content_type == "text/plain":
+        return parse_txt(file_stream)
+    else:
+        raise HTTPException(
+            status_code=415,
+            detail="Invalid file type. Only .docx, .pdf, and .txt are allowed."
+        )
+# Classify text from uploaded file
 async def handle_file_upload(file: UploadFile):
     try:
         file_contents = await extract_file_contents(file)
         if len(file_contents) > 10000:
             return {"message": "File contains more than 10,000 characters."}
         cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
         if not cleaned_text:
             raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
         label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, cleaned_text)
         return {
             "content": file_contents,
             "ai_likelihood": ai_likelihood
         }
     except Exception as e:
+        logging.error(f"Error processing file: {e}")
         raise HTTPException(status_code=500, detail="Error processing the file")
+# Analyze each sentence in plain text input
 async def handle_sentence_level_analysis(text: str):
     text = text.strip()
     if len(text) > 10000:
+        raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
     sentences = sent_tokenize(text, language="english")
     results = []
     for sentence in sentences:
         if not sentence.strip():
             continue
+        label, perplexity, ai_likelihood = await asyncio.to_thread(classify_text, sentence)
         results.append({
             "sentence": sentence,
             "label": label,
             "perplexity": round(perplexity, 2),
+            "ai_likelihood": ai_likelihood
         })
     return {"analysis": results}
+# Analyze each sentence from uploaded file
+async def handle_file_sentence(file: UploadFile):
+    try:
+        file_contents = await extract_file_contents(file)
+        if len(file_contents) > 10000:
+            return {"message": "File contains more than 10,000 characters."}
+        cleaned_text = file_contents.replace("\n", " ").replace("\t", " ").strip()
+        if not cleaned_text:
+            raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
+        result = await handle_sentence_level_analysis(cleaned_text)
+        return {
+            "content": file_contents,
+            **result
+        }
+    except Exception as e:
+        logging.error(f"Error processing file: {e}")
+        raise HTTPException(status_code=500, detail="Error processing the file")
+# Optional synchronous helper function
 def classify(text: str):
     return classify_text(text)

features/text_classifier/routes.py CHANGED Viewed

@@ -9,7 +9,7 @@ from .controller import (
     handle_text_analysis,
     handle_file_upload,
     handle_sentence_level_analysis,
-    handle_file_sentance,
     verify_token
 )
@@ -40,7 +40,7 @@ async def analyze_sentences(request: Request, data: TextInput, token: str = Depe
 @router.post("/analyse-sentance-file")
 @limiter.limit(ACCESS_RATE)
 async def analyze_sentance_file(request: Request, file: UploadFile = File(...), token: str = Depends(verify_token)):
-    return await handle_file_sentance(file)
 @router.get("/health")
 @limiter.limit(ACCESS_RATE)

     handle_text_analysis,
     handle_file_upload,
     handle_sentence_level_analysis,
+    handle_file_sentence,
     verify_token
 )
 @router.post("/analyse-sentance-file")
 @limiter.limit(ACCESS_RATE)
 async def analyze_sentance_file(request: Request, file: UploadFile = File(...), token: str = Depends(verify_token)):
+    return await handle_file_sentence(file)
 @router.get("/health")
 @limiter.limit(ACCESS_RATE)