Pujan-Dev commited on
Commit
582b4bf
·
1 Parent(s): 31fda96

Changed: Nepali text classifier with new Models and multi models and improved endpoints

Browse files
app.py CHANGED
@@ -20,7 +20,15 @@ from features.text_classifier.routes import router as text_classifier_router
20
  warnings.filterwarnings("ignore")
21
  limiter = Limiter(key_func=get_remote_address, default_limits=[ACCESS_RATE])
22
 
23
- app = FastAPI()
 
 
 
 
 
 
 
 
24
  # added the robots.txt
25
  # Set up SlowAPI
26
  app.state.limiter = limiter
@@ -38,13 +46,13 @@ app.add_exception_handler(
38
  app.add_middleware(SlowAPIMiddleware)
39
 
40
  # Include your routes
41
- app.include_router(text_classifier_router, prefix="/text")
42
- app.include_router(nepali_text_classifier_router, prefix="/NP")
43
- app.include_router(image_classifier_router, prefix="/AI-image")
44
- app.include_router(image_edit_detector_router, prefix="/detect")
45
 
46
 
47
- @app.get("/")
48
  @limiter.limit(ACCESS_RATE)
49
  async def root(request: Request):
50
  return {
 
20
  warnings.filterwarnings("ignore")
21
  limiter = Limiter(key_func=get_remote_address, default_limits=[ACCESS_RATE])
22
 
23
+ openapi_tags = [
24
+ {"name": "English Text Classifier", "description": "Endpoints for English AI-vs-human text analysis."},
25
+ {"name": "Nepali Text Classifier", "description": "Endpoints for Nepali AI-vs-human text analysis."},
26
+ {"name": "AI Image Classifier", "description": "Endpoints for AI-vs-human image classification."},
27
+ {"name": "Image Edit Detection", "description": "Endpoints for edited/forged image detection."},
28
+ {"name": "System", "description": "Health and root endpoints."},
29
+ ]
30
+
31
+ app = FastAPI(openapi_tags=openapi_tags)
32
  # added the robots.txt
33
  # Set up SlowAPI
34
  app.state.limiter = limiter
 
46
  app.add_middleware(SlowAPIMiddleware)
47
 
48
  # Include your routes
49
+ app.include_router(text_classifier_router, prefix="/text", tags=["English Text Classifier"])
50
+ app.include_router(nepali_text_classifier_router, prefix="/NP", tags=["Nepali Text Classifier"])
51
+ app.include_router(image_classifier_router, prefix="/AI-image", tags=["AI Image Classifier"])
52
+ app.include_router(image_edit_detector_router, prefix="/detect", tags=["Image Edit Detection"])
53
 
54
 
55
+ @app.get("/", tags=["System"])
56
  @limiter.limit(ACCESS_RATE)
57
  async def root(request: Request):
58
  return {
features/nepali_text_classifier/controller.py CHANGED
@@ -1,4 +1,5 @@
1
  import asyncio
 
2
  from io import BytesIO
3
  from fastapi import HTTPException, UploadFile, status, Depends
4
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
@@ -9,6 +10,13 @@ import re
9
 
10
  security = HTTPBearer()
11
 
 
 
 
 
 
 
 
12
  def contains_english(text: str) -> bool:
13
  # Remove escape characters
14
  cleaned = text.replace("\n", "").replace("\t", "")
@@ -25,7 +33,7 @@ async def verify_token(credentials: HTTPAuthorizationCredentials = Depends(secur
25
  )
26
  return token
27
 
28
- async def nepali_text_analysis(text: str):
29
  end_symbol_for_NP_text(text)
30
  words = text.split()
31
  if len(words) < 10:
@@ -33,7 +41,8 @@ async def nepali_text_analysis(text: str):
33
  if len(text) > 10000:
34
  raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
35
 
36
- result = await asyncio.to_thread(classify_text, text)
 
37
 
38
  return result
39
 
@@ -51,7 +60,7 @@ async def extract_file_contents(file:UploadFile)-> str:
51
  else:
52
  raise HTTPException(status_code=415,detail="Invalid file type. Only .docx,.pdf and .txt are allowed")
53
 
54
- async def handle_file_upload(file: UploadFile):
55
  try:
56
  file_contents = await extract_file_contents(file)
57
  end_symbol_for_NP_text(file_contents)
@@ -62,7 +71,8 @@ async def handle_file_upload(file: UploadFile):
62
  if not cleaned_text:
63
  raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
64
 
65
- result = await asyncio.to_thread(classify_text, cleaned_text)
 
66
  return result
67
  except Exception as e:
68
  logging.error(f"Error processing file: {e}")
@@ -70,7 +80,7 @@ async def handle_file_upload(file: UploadFile):
70
 
71
 
72
 
73
- async def handle_sentence_level_analysis(text: str):
74
  text = text.strip()
75
  if len(text) > 10000:
76
  raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
@@ -79,11 +89,12 @@ async def handle_sentence_level_analysis(text: str):
79
 
80
  # Split text into sentences
81
  sentences = [s.strip() + "।" for s in text.split("।") if s.strip()]
 
82
 
83
  results = []
84
  for sentence in sentences:
85
  end_symbol_for_NP_text(sentence)
86
- result = await asyncio.to_thread(classify_text, sentence)
87
  results.append({
88
  "text": sentence,
89
  "result": result["label"],
@@ -93,7 +104,7 @@ async def handle_sentence_level_analysis(text: str):
93
  return {"analysis": results}
94
 
95
 
96
- async def handle_file_sentence(file:UploadFile):
97
  try:
98
  file_contents = await extract_file_contents(file)
99
  if len(file_contents) > 10000:
@@ -106,12 +117,13 @@ async def handle_file_sentence(file:UploadFile):
106
 
107
  # Split text into sentences
108
  sentences = [s.strip() + "।" for s in cleaned_text.split("।") if s.strip()]
 
109
 
110
  results = []
111
  for sentence in sentences:
112
  end_symbol_for_NP_text(sentence)
113
 
114
- result = await asyncio.to_thread(classify_text, sentence)
115
  results.append({
116
  "text": sentence,
117
  "result": result["label"],
@@ -125,6 +137,7 @@ async def handle_file_sentence(file:UploadFile):
125
  raise HTTPException(status_code=500, detail="Error processing the file")
126
 
127
 
128
- def classify(text: str):
129
- return classify_text(text)
 
130
 
 
1
  import asyncio
2
+ import logging
3
  from io import BytesIO
4
  from fastapi import HTTPException, UploadFile, status, Depends
5
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
 
10
 
11
  security = HTTPBearer()
12
 
13
+
14
+ def parse_selected_models(models: str | None) -> list[str] | None:
15
+ if not models:
16
+ return None
17
+ parsed = [m.strip() for m in models.split(",") if m.strip()]
18
+ return parsed[:2] if parsed else None
19
+
20
  def contains_english(text: str) -> bool:
21
  # Remove escape characters
22
  cleaned = text.replace("\n", "").replace("\t", "")
 
33
  )
34
  return token
35
 
36
+ async def nepali_text_analysis(text: str, models: str | None = None):
37
  end_symbol_for_NP_text(text)
38
  words = text.split()
39
  if len(words) < 10:
 
41
  if len(text) > 10000:
42
  raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
43
 
44
+ selected_models = parse_selected_models(models)
45
+ result = await asyncio.to_thread(classify_text, text, selected_models, 2)
46
 
47
  return result
48
 
 
60
  else:
61
  raise HTTPException(status_code=415,detail="Invalid file type. Only .docx,.pdf and .txt are allowed")
62
 
63
+ async def handle_file_upload(file: UploadFile, models: str | None = None):
64
  try:
65
  file_contents = await extract_file_contents(file)
66
  end_symbol_for_NP_text(file_contents)
 
71
  if not cleaned_text:
72
  raise HTTPException(status_code=404, detail="The file is empty or only contains whitespace.")
73
 
74
+ selected_models = parse_selected_models(models)
75
+ result = await asyncio.to_thread(classify_text, cleaned_text, selected_models, 2)
76
  return result
77
  except Exception as e:
78
  logging.error(f"Error processing file: {e}")
 
80
 
81
 
82
 
83
+ async def handle_sentence_level_analysis(text: str, models: str | None = None):
84
  text = text.strip()
85
  if len(text) > 10000:
86
  raise HTTPException(status_code=413, detail="Text must be less than 10,000 characters")
 
89
 
90
  # Split text into sentences
91
  sentences = [s.strip() + "।" for s in text.split("।") if s.strip()]
92
+ selected_models = parse_selected_models(models)
93
 
94
  results = []
95
  for sentence in sentences:
96
  end_symbol_for_NP_text(sentence)
97
+ result = await asyncio.to_thread(classify_text, sentence, selected_models, 2)
98
  results.append({
99
  "text": sentence,
100
  "result": result["label"],
 
104
  return {"analysis": results}
105
 
106
 
107
+ async def handle_file_sentence(file:UploadFile, models: str | None = None):
108
  try:
109
  file_contents = await extract_file_contents(file)
110
  if len(file_contents) > 10000:
 
117
 
118
  # Split text into sentences
119
  sentences = [s.strip() + "।" for s in cleaned_text.split("।") if s.strip()]
120
+ selected_models = parse_selected_models(models)
121
 
122
  results = []
123
  for sentence in sentences:
124
  end_symbol_for_NP_text(sentence)
125
 
126
+ result = await asyncio.to_thread(classify_text, sentence, selected_models, 2)
127
  results.append({
128
  "text": sentence,
129
  "result": result["label"],
 
137
  raise HTTPException(status_code=500, detail="Error processing the file")
138
 
139
 
140
+ def classify(text: str, models: str | None = None):
141
+ selected_models = parse_selected_models(models)
142
+ return classify_text(text, selected_models, 2)
143
 
features/nepali_text_classifier/inferencer.py CHANGED
@@ -1,23 +1,89 @@
1
- import torch
2
- from .model_loader import get_model_tokenizer
3
- import torch.nn.functional as F
4
 
5
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
6
 
 
7
 
8
- def classify_text(text: str):
9
- model, tokenizer = get_model_tokenizer()
10
- inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
11
- inputs = {k: v.to(device) for k, v in inputs.items()}
12
 
13
- with torch.no_grad():
14
- outputs = model(**inputs)
15
- logits = outputs if isinstance(outputs, torch.Tensor) else outputs.logits
16
- probs = F.softmax(logits, dim=1)
17
- pred = torch.argmax(probs, dim=1).item()
18
- prob_percent = probs[0][pred].item() * 100
19
 
20
- return {"label": "Human" if pred == 0 else "AI", "confidence": round(prob_percent, 2)}
21
 
 
 
 
 
 
22
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
 
 
2
 
3
+ from scipy.sparse import csr_matrix, hstack
4
 
5
+ from .model_loader import get_default_top_models, load_artifacts
6
 
 
 
 
 
7
 
8
+ TOP_K_MODELS = 2
 
 
 
 
 
9
 
 
10
 
11
+ def normalize_nepali_text(text: str) -> str:
12
+ text = str(text)
13
+ text = re.sub(r"https?://\S+|www\.\S+", " ", text)
14
+ text = re.sub(r"[^\u0900-\u097F\s।!?,]", " ", text)
15
+ return re.sub(r"\s+", " ", text).strip()
16
 
17
 
18
+ def _select_models(models, model_names=None, top_k=2):
19
+ _ = model_names
20
+ ranked = [name for name in get_default_top_models(top_k=top_k) if name in models]
21
+ if ranked:
22
+ return ranked[:top_k]
23
+ return list(models.keys())[:top_k]
24
+
25
+
26
+ def classify_text(text: str, model_names=None, top_k: int = 2):
27
+ artifacts = load_artifacts()
28
+ models = artifacts["models"]
29
+ if not models:
30
+ return {"error": "No models available for inference"}
31
+
32
+ cleaned_text = normalize_nepali_text(text)
33
+ word_features = artifacts["word_vectorizer"].transform([cleaned_text])
34
+ char_features = artifacts["char_vectorizer"].transform([cleaned_text])
35
+ rich_features = artifacts["rich_transformer"].transform([cleaned_text])
36
+ features = hstack([word_features, char_features, csr_matrix(rich_features)])
37
+
38
+ selected_names = _select_models(models, model_names=model_names, top_k=TOP_K_MODELS)
39
+ dense_models = {"Linear SVC"}
40
+
41
+ per_model = []
42
+ ai_votes = 0
43
+ human_votes = 0
44
+ confidence_sum = 0.0
45
+
46
+ for name in selected_names:
47
+ model = models[name]
48
+ model_input = features.toarray() if name in dense_models else features
49
+ pred = int(model.predict(model_input)[0])
50
+ confidence = None
51
+ if hasattr(model, "predict_proba"):
52
+ probs = model.predict_proba(model_input)
53
+ confidence = float(probs[0][pred])
54
+ elif hasattr(model, "decision_function"):
55
+ score = float(model.decision_function(model_input)[0])
56
+ confidence = abs(score) / (1.0 + abs(score))
57
+ else:
58
+ confidence = 0.5
59
+
60
+ if pred == 1:
61
+ ai_votes += 1
62
+ label = "AI"
63
+ else:
64
+ human_votes += 1
65
+ label = "Human"
66
+
67
+ confidence_sum += confidence
68
+ per_model.append(
69
+ {
70
+ "model": name,
71
+ "label": label,
72
+ "confidence": round(confidence * 100, 2),
73
+ }
74
+ )
75
+
76
+ final_label = "AI" if ai_votes > human_votes else "Human"
77
+ if ai_votes == human_votes:
78
+ final_label = per_model[0]["label"]
79
+
80
+ avg_conf = confidence_sum / max(len(per_model), 1)
81
+ return {
82
+ "label": final_label,
83
+ "confidence": round(avg_conf * 100, 2),
84
+ "selected_models": selected_names,
85
+ "model_predictions": per_model,
86
+ "votes": {"AI": ai_votes, "Human": human_votes},
87
+ "available_models": list(models.keys()),
88
+ "unavailable_models": artifacts["unavailable_models"],
89
+ }
features/nepali_text_classifier/model_loader.py CHANGED
@@ -1,54 +1,165 @@
1
- import os
2
- import shutil
3
- import torch
4
- import torch.nn as nn
5
- import torch.nn.functional as F
6
  import logging
7
- from huggingface_hub import snapshot_download
8
- from transformers import AutoTokenizer, AutoModel
9
-
10
- # Configs
11
- REPO_ID = "can-org/Nepali-AI-VS-HUMAN"
12
- BASE_DIR = "./np_text_model"
13
- TOKENIZER_DIR = os.path.join(BASE_DIR, "classifier") # <- update this to match your uploaded folder
14
- WEIGHTS_PATH = os.path.join(BASE_DIR, "model_95_acc.pth") # <- change to match actual uploaded weight
15
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
-
17
- # Define model class
18
- class XLMRClassifier(nn.Module):
19
- def __init__(self):
20
- super(XLMRClassifier, self).__init__()
21
- self.bert = AutoModel.from_pretrained("xlm-roberta-base")
22
- self.classifier = nn.Linear(self.bert.config.hidden_size, 2)
23
-
24
- def forward(self, input_ids, attention_mask):
25
- outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
26
- cls_output = outputs.last_hidden_state[:, 0, :]
27
- return self.classifier(cls_output)
28
-
29
- # Globals for caching
30
- _model = None
31
- _tokenizer = None
32
-
33
- def download_model_repo():
34
- if os.path.exists(BASE_DIR) and os.path.isdir(BASE_DIR):
35
- logging.info("Model already downloaded.")
36
- return
37
- snapshot_path = snapshot_download(repo_id=REPO_ID)
38
- os.makedirs(BASE_DIR, exist_ok=True)
39
- shutil.copytree(snapshot_path, BASE_DIR, dirs_exist_ok=True)
40
-
41
- def load_model():
42
- download_model_repo()
43
- tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR)
44
- model = XLMRClassifier().to(device)
45
- model.load_state_dict(torch.load(WEIGHTS_PATH, map_location=device))
46
- model.eval()
47
- return model, tokenizer
48
-
49
- def get_model_tokenizer():
50
- global _model, _tokenizer
51
- if _model is None or _tokenizer is None:
52
- _model, _tokenizer = load_model()
53
- return _model, _tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import logging
2
+ import os
3
+ import pickle
4
+ import re
5
+ from functools import lru_cache
6
+ from pathlib import Path
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ from config import Config
12
+
13
+
14
+ LOGGER = logging.getLogger(__name__)
15
+
16
+
17
+ MODEL_FILES = {
18
+ "Logistic Regression": "Logistic_Regression.pkl",
19
+ "Random Forest": "Random_Forest.pkl",
20
+ "Gradient Boosting": "Gradient_Boosting.pkl",
21
+ "Linear SVC": "Linear_SVC.pkl",
22
+ "Ridge Classifier": "Ridge_Classifier.pkl",
23
+ "Multinomial NB": "Multinomial_NB.pkl",
24
+ "Bernoulli NB": "Bernoulli_NB.pkl",
25
+ "K-Nearest Neighbors": "KNearest_Neighbors.pkl",
26
+ }
27
+
28
+ # KNN artifact in this repo is very large; keep API responsive by skipping it.
29
+ SKIP_MODELS = {"K-Nearest Neighbors"}
30
+
31
+ # Ranked by validation accuracy from final_model/final_results.csv
32
+ DEFAULT_MODEL_RANKING = [
33
+ "Gradient Boosting",
34
+ "Logistic Regression",
35
+ "Linear SVC",
36
+ "Ridge Classifier",
37
+ "Bernoulli NB",
38
+ "Random Forest",
39
+ "Multinomial NB",
40
+ ]
41
+
42
+
43
+ class NepaliRichFeatures:
44
+ """Burstiness + stylometry feature extractor used during model training."""
45
+
46
+ @staticmethod
47
+ def extract_burstiness(text: str) -> dict:
48
+ sentences = [s.strip() for s in re.split(r"[।!?]", str(text)) if s.strip()]
49
+ if not sentences:
50
+ return {
51
+ "burst_mean": 0.0,
52
+ "burst_std": 0.0,
53
+ "burst_max": 0.0,
54
+ "burst_min": 0.0,
55
+ "burst_range": 0.0,
56
+ }
57
+ lengths = [len(s.split()) for s in sentences]
58
+ return {
59
+ "burst_mean": float(np.mean(lengths)),
60
+ "burst_std": float(np.std(lengths)),
61
+ "burst_max": float(np.max(lengths)),
62
+ "burst_min": float(np.min(lengths)),
63
+ "burst_range": float(np.max(lengths) - np.min(lengths)),
64
+ }
65
+
66
+ @staticmethod
67
+ def extract_stylometry(text: str) -> dict:
68
+ words = str(text).split()
69
+ num_words = max(len(words), 1)
70
+ num_chars = max(len(str(text)), 1)
71
+ num_sentences = max(len([s for s in re.split(r"[।!?]", str(text)) if s.strip()]), 1)
72
+ avg_word_len = float(np.mean([len(w) for w in words])) if words else 0.0
73
+ avg_sent_len = num_words / num_sentences
74
+ lexical_diversity = len(set(words)) / num_words
75
+ punct_count = str(text).count("।") + str(text).count("?") + str(text).count("!") + str(text).count(",")
76
+ punct_ratio = punct_count / num_chars
77
+ bigrams = [" ".join(words[i : i + 2]) for i in range(len(words) - 1)]
78
+ rep_bigram_ratio = (1.0 - len(set(bigrams)) / max(len(bigrams), 1)) if bigrams else 0.0
79
+ diacritic_count = sum(1 for c in str(text) if "\u093e" <= c <= "\u094d")
80
+ diacritic_ratio = diacritic_count / num_chars
81
+ return {
82
+ "num_words": num_words,
83
+ "num_chars": num_chars,
84
+ "num_sentences": num_sentences,
85
+ "avg_word_len": avg_word_len,
86
+ "avg_sent_len": avg_sent_len,
87
+ "lexical_diversity": lexical_diversity,
88
+ "punct_ratio": punct_ratio,
89
+ "rep_bigram_ratio": rep_bigram_ratio,
90
+ "diacritic_ratio": diacritic_ratio,
91
+ }
92
+
93
+ def transform(self, texts):
94
+ if isinstance(texts, str):
95
+ texts = [texts]
96
+ rows = []
97
+ for text in texts:
98
+ row = {**self.extract_burstiness(text), **self.extract_stylometry(text)}
99
+ rows.append(row)
100
+ return pd.DataFrame(rows).values.astype(np.float32)
101
+
102
+
103
+ def _repo_root() -> Path:
104
+ return Path(__file__).resolve().parents[2]
105
+
106
+
107
+ def resolve_model_dir() -> Path:
108
+ candidates = []
109
+ if Config.Nepali_model_folder:
110
+ candidates.append(Path(Config.Nepali_model_folder))
111
+ repo = _repo_root()
112
+ candidates.append(repo / "features" / "Model" / "Nepali_model")
113
+ candidates.append(repo / "notebook" / "ai_vs_human_nepali" / "final_model" / "saved_models")
114
+
115
+ for path in candidates:
116
+ if path.exists() and path.is_dir() and (path / "word_vectorizer.pkl").exists():
117
+ return path
118
+ raise FileNotFoundError("Nepali model directory not found. Set Nepali_model env or add expected artifacts.")
119
+
120
+
121
+ @lru_cache(maxsize=1)
122
+ def load_artifacts():
123
+ model_dir = resolve_model_dir()
124
+ LOGGER.info("Loading Nepali artifacts from %s", model_dir)
125
+
126
+ models = {}
127
+ unavailable = {}
128
+ for model_name, file_name in MODEL_FILES.items():
129
+ if model_name in SKIP_MODELS:
130
+ unavailable[model_name] = "Skipped due to large artifact size"
131
+ continue
132
+ file_path = model_dir / file_name
133
+ if not file_path.exists():
134
+ unavailable[model_name] = "Missing model file"
135
+ continue
136
+ with open(file_path, "rb") as fp:
137
+ models[model_name] = pickle.load(fp)
138
+
139
+ with open(model_dir / "word_vectorizer.pkl", "rb") as fp:
140
+ word_vectorizer = pickle.load(fp)
141
+ with open(model_dir / "char_vectorizer.pkl", "rb") as fp:
142
+ char_vectorizer = pickle.load(fp)
143
+
144
+ rich_transformer = NepaliRichFeatures()
145
+ return {
146
+ "model_dir": str(model_dir),
147
+ "models": models,
148
+ "unavailable_models": unavailable,
149
+ "word_vectorizer": word_vectorizer,
150
+ "char_vectorizer": char_vectorizer,
151
+ "rich_transformer": rich_transformer,
152
+ }
153
+
154
+
155
+ def get_available_models():
156
+ artifacts = load_artifacts()
157
+ return list(artifacts["models"].keys())
158
+
159
 
160
+ def get_default_top_models(top_k: int = 2):
161
+ available = set(get_available_models())
162
+ ranked = [name for name in DEFAULT_MODEL_RANKING if name in available]
163
+ if not ranked:
164
+ return list(available)[:top_k]
165
+ return ranked[: max(1, top_k)]
features/nepali_text_classifier/routes.py CHANGED
@@ -15,27 +15,42 @@ security = HTTPBearer()
15
  # Input schema
16
  class TextInput(BaseModel):
17
  text: str
 
18
 
19
  @router.post("/analyse")
20
  @limiter.limit(ACCESS_RATE)
21
  async def analyse(request: Request, data: TextInput, token: str = Depends(security)):
22
- result = classify_text(data.text)
 
23
  return result
24
 
25
  @router.post("/upload")
26
  @limiter.limit(ACCESS_RATE)
27
- async def upload_file(request:Request,file:UploadFile=File(...),token:str=Depends(security)):
28
- return await handle_file_upload(file)
29
 
30
  @router.post("/analyse-sentences")
31
  @limiter.limit(ACCESS_RATE)
32
  async def upload_file(request:Request,data:TextInput,token:str=Depends(security)):
33
- return await handle_sentence_level_analysis(data.text)
 
34
 
35
  @router.post("/file-sentences-analyse")
36
  @limiter.limit(ACCESS_RATE)
37
- async def analyze_sentance_file(request: Request, file: UploadFile = File(...), token: str = Depends(security)):
38
- return await handle_file_sentence(file)
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
 
41
  @router.get("/health")
 
15
  # Input schema
16
  class TextInput(BaseModel):
17
  text: str
18
+ models: list[str] | None = None
19
 
20
  @router.post("/analyse")
21
  @limiter.limit(ACCESS_RATE)
22
  async def analyse(request: Request, data: TextInput, token: str = Depends(security)):
23
+ selected = ",".join(data.models[:2]) if data.models else None
24
+ result = await nepali_text_analysis(data.text, selected)
25
  return result
26
 
27
  @router.post("/upload")
28
  @limiter.limit(ACCESS_RATE)
29
+ async def upload_file(request:Request,file:UploadFile=File(...), models: str | None = None, token:str=Depends(security)):
30
+ return await handle_file_upload(file, models)
31
 
32
  @router.post("/analyse-sentences")
33
  @limiter.limit(ACCESS_RATE)
34
  async def upload_file(request:Request,data:TextInput,token:str=Depends(security)):
35
+ selected = ",".join(data.models[:2]) if data.models else None
36
+ return await handle_sentence_level_analysis(data.text, selected)
37
 
38
  @router.post("/file-sentences-analyse")
39
  @limiter.limit(ACCESS_RATE)
40
+ async def analyze_sentance_file(request: Request, file: UploadFile = File(...), models: str | None = None, token: str = Depends(security)):
41
+ return await handle_file_sentence(file, models)
42
+
43
+
44
+ @router.get("/models")
45
+ @limiter.limit(ACCESS_RATE)
46
+ def get_models(request: Request):
47
+ from .model_loader import get_available_models, get_default_top_models
48
+
49
+ available = get_available_models()
50
+ return {
51
+ "available_models": available,
52
+ "default_top_2": get_default_top_models(2),
53
+ }
54
 
55
 
56
  @router.get("/health")