axyut commited on
Commit
517c60f
·
unverified ·
2 Parent(s): 3d9166e90debd3

Merge pull request #10 from cyberalertnepal/PujanDev

Browse files

feat: reduce load while model is first processing the data

app.py CHANGED
@@ -6,11 +6,7 @@ import nltk
6
 
7
  @asynccontextmanager
8
  async def lifespan(app: FastAPI):
9
- # Ensure punkt is available
10
- nltk.download("punkt")
11
 
12
- nltk.download('punkt_tab')
13
-
14
  # Your model warmup
15
  warmup()
16
  yield
 
6
 
7
  @asynccontextmanager
8
  async def lifespan(app: FastAPI):
 
 
9
 
 
 
10
  # Your model warmup
11
  warmup()
12
  yield
features/text_classifier/model_loader.py CHANGED
@@ -5,7 +5,7 @@ from transformers import GPT2LMHeadModel, GPT2TokenizerFast, GPT2Config
5
  from huggingface_hub import snapshot_download
6
  import torch
7
  from dotenv import load_dotenv
8
-
9
  load_dotenv()
10
  REPO_ID = "Pujan-Dev/AI-Text-Detector"
11
  MODEL_DIR = "./models"
@@ -18,6 +18,11 @@ _model, _tokenizer = None, None
18
 
19
  def warmup():
20
  global _model, _tokenizer
 
 
 
 
 
21
  download_model_repo()
22
  _model, _tokenizer = load_model()
23
  logging.info("Its ready")
 
5
  from huggingface_hub import snapshot_download
6
  import torch
7
  from dotenv import load_dotenv
8
+ import nltk
9
  load_dotenv()
10
  REPO_ID = "Pujan-Dev/AI-Text-Detector"
11
  MODEL_DIR = "./models"
 
18
 
19
  def warmup():
20
  global _model, _tokenizer
21
+ # Ensure punkt is available
22
+ nltk.download("punkt")
23
+
24
+ nltk.download('punkt_tab')
25
+
26
  download_model_repo()
27
  _model, _tokenizer = load_model()
28
  logging.info("Its ready")
features/text_classifier/preprocess.py CHANGED
@@ -3,8 +3,6 @@ import docx
3
  from io import BytesIO
4
  import logging
5
  from fastapi import HTTPException
6
-
7
-
8
  def parse_docx(file: BytesIO):
9
  doc = docx.Document(file)
10
  text = ""
@@ -23,7 +21,8 @@ def parse_pdf(file: BytesIO):
23
  return text
24
  except Exception as e:
25
  logging.error(f"Error while processing PDF: {str(e)}")
26
- raise HTTPException(status_code=500, detail="Error processing PDF file")
 
27
 
28
 
29
  def parse_txt(file: BytesIO):
 
3
  from io import BytesIO
4
  import logging
5
  from fastapi import HTTPException
 
 
6
  def parse_docx(file: BytesIO):
7
  doc = docx.Document(file)
8
  text = ""
 
21
  return text
22
  except Exception as e:
23
  logging.error(f"Error while processing PDF: {str(e)}")
24
+ raise HTTPException(
25
+ status_code=500, detail="Error processing PDF file")
26
 
27
 
28
  def parse_txt(file: BytesIO):
features/text_classifier/routes.py CHANGED
@@ -1,38 +1,43 @@
1
- from fastapi import APIRouter, Depends, HTTPException, UploadFile, File
2
- from fastapi.security import HTTPBearer
3
  from pydantic import BaseModel
4
- from .controller import handle_text_analysis, handle_file_upload, handle_sentence_level_analysis,handle_file_sentance
 
5
 
6
  router = APIRouter()
7
- bearer_scheme = HTTPBearer()
8
-
9
 
10
  class TextInput(BaseModel):
11
  text: str
12
 
 
 
 
 
 
 
 
 
13
 
14
  @router.post("/analyse")
15
- async def analyze(data: TextInput, token: str = Depends(bearer_scheme)):
16
  return await handle_text_analysis(data.text)
17
 
18
-
19
  @router.post("/upload")
20
- async def upload_file(
21
- file: UploadFile = File(...), token: str = Depends(bearer_scheme)
22
- ):
23
  return await handle_file_upload(file)
24
 
25
-
26
- @router.get("/health")
27
- def health():
28
- return {"status": "ok"}
29
-
30
  @router.post("/analyse-sentences")
31
- async def analyze_sentences(data: TextInput, token: str = Depends(bearer_scheme)):
32
  if not data.text:
33
  raise HTTPException(status_code=400, detail="Missing 'text' in request body")
34
  return await handle_sentence_level_analysis(data.text)
35
 
36
  @router.post("/analyse-sentance-file")
37
- async def AnalyzeSentanceFile(file:UploadFile=File(...),token:str=Depends(bearer_scheme)):
38
  return await handle_file_sentance(file)
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, status
2
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
3
  from pydantic import BaseModel
4
+ from .controller import handle_text_analysis, handle_file_upload, handle_sentence_level_analysis, handle_file_sentance
5
+ import os
6
 
7
  router = APIRouter()
8
+ security = HTTPBearer()
 
9
 
10
  class TextInput(BaseModel):
11
  text: str
12
 
13
+ async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
14
+ token = credentials.credentials
15
+ if token != os.getenv("MY_SECRET_TOKEN"): # Replace with your actual secret
16
+ raise HTTPException(
17
+ status_code=status.HTTP_403_FORBIDDEN,
18
+ detail="Invalid or expired token"
19
+ )
20
+ return token
21
 
22
  @router.post("/analyse")
23
+ async def analyze(data: TextInput, token: str = Depends(verify_token)):
24
  return await handle_text_analysis(data.text)
25
 
 
26
  @router.post("/upload")
27
+ async def upload_file(file: UploadFile = File(...), token: str = Depends(verify_token)):
 
 
28
  return await handle_file_upload(file)
29
 
 
 
 
 
 
30
  @router.post("/analyse-sentences")
31
+ async def analyze_sentences(data: TextInput, token: str = Depends(verify_token)):
32
  if not data.text:
33
  raise HTTPException(status_code=400, detail="Missing 'text' in request body")
34
  return await handle_sentence_level_analysis(data.text)
35
 
36
  @router.post("/analyse-sentance-file")
37
+ async def AnalyzeSentanceFile(file: UploadFile = File(...), token: str = Depends(verify_token)):
38
  return await handle_file_sentance(file)
39
+
40
+ @router.get("/health")
41
+ def health():
42
+ return {"status": "ok"}
43
+