Spaces:

Pujan-Dev
/

AI_API

Sleeping

App Files Files Community

Pujan-Dev commited on Apr 6

Commit

8d28be7

1 Parent(s): 582b4bf

Added:Added new code changes

Browse files

Files changed (5) hide show

features/ai_human_image_classifier/model_loader.py +2 -1
features/image_classifier/model_loader.py +2 -1
features/nepali_text_classifier/model_loader.py +52 -10
features/real_forged_classifier/model_loader.py +2 -1
features/text_classifier/model_loader.py +51 -16

features/ai_human_image_classifier/model_loader.py CHANGED Viewed

@@ -3,6 +3,7 @@ import torch
 import joblib
 from pathlib import Path
 from huggingface_hub import hf_hub_download
 class ModelLoader:
     """
@@ -56,7 +57,7 @@ class ModelLoader:
         print(f"Downloading SVM model from Hugging Face repo: {repo_id}")
         try:
             # Download the model file from the Hub. It returns the cached path.
-            model_path = hf_hub_download(repo_id=repo_id, filename=filename)
             print(f"SVM model downloaded to: {model_path}")
             # Load the model from the downloaded path

 import joblib
 from pathlib import Path
 from huggingface_hub import hf_hub_download
+from config import Config
 class ModelLoader:
     """
         print(f"Downloading SVM model from Hugging Face repo: {repo_id}")
         try:
             # Download the model file from the Hub. It returns the cached path.
+            model_path = hf_hub_download(repo_id=repo_id, filename=filename, token=Config.HF_TOKEN)
             print(f"SVM model downloaded to: {model_path}")
             # Load the model from the downloaded path

features/image_classifier/model_loader.py CHANGED Viewed

@@ -9,6 +9,7 @@ from huggingface_hub import snapshot_download
 REPO_ID = "can-org/AI-VS-HUMAN-IMAGE-classifier"
 MODEL_DIR = "./IMG_Models"
 WEIGHTS_PATH = os.path.join(MODEL_DIR, "latest-my_cnn_model.h5")
 # Device info (for logging)
 gpus = tf.config.list_physical_devices("GPU")
@@ -32,7 +33,7 @@ def download_model_repo():
     if os.path.exists(MODEL_DIR) and os.path.isdir(MODEL_DIR):
         logging.info("Image model already exists, skipping download.")
         return
-    snapshot_path = snapshot_download(repo_id=REPO_ID)
     os.makedirs(MODEL_DIR, exist_ok=True)
     shutil.copytree(snapshot_path, MODEL_DIR, dirs_exist_ok=True)

 REPO_ID = "can-org/AI-VS-HUMAN-IMAGE-classifier"
 MODEL_DIR = "./IMG_Models"
 WEIGHTS_PATH = os.path.join(MODEL_DIR, "latest-my_cnn_model.h5")
+HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
 # Device info (for logging)
 gpus = tf.config.list_physical_devices("GPU")
     if os.path.exists(MODEL_DIR) and os.path.isdir(MODEL_DIR):
         logging.info("Image model already exists, skipping download.")
         return
+    snapshot_path = snapshot_download(repo_id=REPO_ID, token=HF_TOKEN)
     os.makedirs(MODEL_DIR, exist_ok=True)
     shutil.copytree(snapshot_path, MODEL_DIR, dirs_exist_ok=True)

features/nepali_text_classifier/model_loader.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import logging
-import os
 import pickle
 import re
 from functools import lru_cache
 from pathlib import Path
 import numpy as np
 import pandas as pd
 from config import Config
@@ -22,11 +23,15 @@ MODEL_FILES = {
     "Ridge Classifier": "Ridge_Classifier.pkl",
     "Multinomial NB": "Multinomial_NB.pkl",
     "Bernoulli NB": "Bernoulli_NB.pkl",
-    "K-Nearest Neighbors": "KNearest_Neighbors.pkl",
 }
-# KNN artifact in this repo is very large; keep API responsive by skipping it.
-SKIP_MODELS = {"K-Nearest Neighbors"}
 # Ranked by validation accuracy from final_model/final_results.csv
 DEFAULT_MODEL_RANKING = [
@@ -104,17 +109,54 @@ def _repo_root() -> Path:
     return Path(__file__).resolve().parents[2]
-def resolve_model_dir() -> Path:
     candidates = []
-    if Config.Nepali_model_folder:
-        candidates.append(Path(Config.Nepali_model_folder))
     repo = _repo_root()
-    candidates.append(repo / "features" / "Model" / "Nepali_model")
     candidates.append(repo / "notebook" / "ai_vs_human_nepali" / "final_model" / "saved_models")
-    for path in candidates:
-        if path.exists() and path.is_dir() and (path / "word_vectorizer.pkl").exists():
             return path
     raise FileNotFoundError("Nepali model directory not found. Set Nepali_model env or add expected artifacts.")

 import logging
 import pickle
 import re
+import shutil
 from functools import lru_cache
 from pathlib import Path
 import numpy as np
 import pandas as pd
+from huggingface_hub import snapshot_download
 from config import Config
     "Ridge Classifier": "Ridge_Classifier.pkl",
     "Multinomial NB": "Multinomial_NB.pkl",
     "Bernoulli NB": "Bernoulli_NB.pkl",
 }
+SKIP_MODELS = set()
+REPO_ID = Config.REPO_ID_LANG
+HF_TOKEN = Config.HF_TOKEN
+NEPALI_SUBDIR = "Nepali_model"
+REQUIRED_BASE_FILES = ("word_vectorizer.pkl", "char_vectorizer.pkl")
 # Ranked by validation accuracy from final_model/final_results.csv
 DEFAULT_MODEL_RANKING = [
     return Path(__file__).resolve().parents[2]
+def _has_required_artifacts(path: Path) -> bool:
+    if not path.exists() or not path.is_dir():
+        return False
+    has_base = all((path / filename).exists() for filename in REQUIRED_BASE_FILES)
+    has_any_model = any((path / filename).exists() for filename in MODEL_FILES.values())
+    return has_base and has_any_model
+def _candidate_model_dirs() -> list[Path]:
     candidates = []
     repo = _repo_root()
+    if Config.Nepali_model_folder:
+        custom = Path(Config.Nepali_model_folder)
+        candidates.extend([custom, custom / NEPALI_SUBDIR])
+    default_dir = repo / "features" / "Model" / "Nepali_model"
+    candidates.extend([default_dir, default_dir / NEPALI_SUBDIR])
     candidates.append(repo / "notebook" / "ai_vs_human_nepali" / "final_model" / "saved_models")
+    return candidates
+def _download_nepali_artifacts() -> None:
+    if not REPO_ID:
+        raise ValueError("English_model repo id is not configured")
+    repo = _repo_root()
+    target_dir = Path(Config.Nepali_model_folder) if Config.Nepali_model_folder else repo / "features" / "Model" / "Nepali_model"
+    snapshot_path = Path(snapshot_download(repo_id=REPO_ID, token=HF_TOKEN))
+    source_dir = snapshot_path / NEPALI_SUBDIR if (snapshot_path / NEPALI_SUBDIR).is_dir() else snapshot_path
+    target_dir.mkdir(parents=True, exist_ok=True)
+    shutil.copytree(source_dir, target_dir, dirs_exist_ok=True)
+def resolve_model_dir() -> Path:
+    for path in _candidate_model_dirs():
+        if _has_required_artifacts(path):
             return path
+    LOGGER.info("Nepali artifacts not found locally; downloading from %s", REPO_ID)
+    _download_nepali_artifacts()
+    for path in _candidate_model_dirs():
+        if _has_required_artifacts(path):
+            return path
     raise FileNotFoundError("Nepali model directory not found. Set Nepali_model env or add expected artifacts.")

features/real_forged_classifier/model_loader.py CHANGED Viewed

@@ -2,6 +2,7 @@ import torch
 from pathlib import Path
 from huggingface_hub import hf_hub_download
 from model import FFTCNN # Import the model architecture
 class ModelLoader:
     """
@@ -35,7 +36,7 @@ class ModelLoader:
         print(f"Downloading FFT CNN model from Hugging Face repo: {repo_id}")
         try:
             # Download the model file from the Hub. It returns the cached path.
-            model_path = hf_hub_download(repo_id=repo_id, filename=filename)
             print(f"Model downloaded to: {model_path}")
             # Initialize the model architecture

 from pathlib import Path
 from huggingface_hub import hf_hub_download
 from model import FFTCNN # Import the model architecture
+from config import Config
 class ModelLoader:
     """
         print(f"Downloading FFT CNN model from Hugging Face repo: {repo_id}")
         try:
             # Download the model file from the Hub. It returns the cached path.
+            model_path = hf_hub_download(repo_id=repo_id, filename=filename, token=Config.HF_TOKEN)
             print(f"Model downloaded to: {model_path}")
             # Initialize the model architecture

features/text_classifier/model_loader.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import json
 import logging
-import os
 import pickle
 import shutil
 from pathlib import Path
@@ -12,17 +11,41 @@ from config import Config
 REPO_ID = Config.REPO_ID_LANG
 MODEL_DIR = Path(Config.LANG_MODEL) if Config.LANG_MODEL else None
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-_model, _tokenizer = None, None
 def warmup():
     logging.info("Warming up model...")
     if MODEL_DIR is None:
         raise ValueError("LANG_MODEL is not configured")
-    if MODEL_DIR.exists() and MODEL_DIR.is_dir():
-        logging.info("Model already exists, skipping download.")
         return
     download_model_repo()
@@ -30,34 +53,46 @@ def warmup():
 def download_model_repo():
     if MODEL_DIR is None:
         raise ValueError("LANG_MODEL is not configured")
-    if MODEL_DIR.exists() and MODEL_DIR.is_dir():
-        logging.info("Model already exists, skipping download.")
         return
-    snapshot_path = snapshot_download(repo_id=REPO_ID)
-    os.makedirs(MODEL_DIR, exist_ok=True)
-    shutil.copytree(snapshot_path, MODEL_DIR, dirs_exist_ok=True)
 def load_model():
     if MODEL_DIR is None:
         raise ValueError("LANG_MODEL is not configured")
-    with open(MODEL_DIR / "classifier.pkl", "rb") as f:
         loaded_classifier = pickle.load(f)
-    with open(MODEL_DIR / "scaler.pkl", "rb") as f:
         loaded_scaler = pickle.load(f)
-    with open(MODEL_DIR / "word_vectorizer.pkl", "rb") as f:
         loaded_word_vectorizer = pickle.load(f)
-    with open(MODEL_DIR / "char_vectorizer.pkl", "rb") as f:
         loaded_char_vectorizer = pickle.load(f)
-    with open(MODEL_DIR / "feature_names.json", "r") as f:
         loaded_features = json.load(f)
-    with open(MODEL_DIR / "metadata.json", "r") as f:
         loaded_metadata = json.load(f)
     return (
         loaded_classifier,

 import json
 import logging
 import pickle
 import shutil
 from pathlib import Path
 REPO_ID = Config.REPO_ID_LANG
 MODEL_DIR = Path(Config.LANG_MODEL) if Config.LANG_MODEL else None
+HF_TOKEN = Config.HF_TOKEN
+ENGLISH_SUBDIR = "English_model"
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+REQUIRED_FILES = (
+    "classifier.pkl",
+    "scaler.pkl",
+    "word_vectorizer.pkl",
+    "char_vectorizer.pkl",
+    "feature_names.json",
+    "metadata.json",
+)
+def _has_required_artifacts(model_dir: Path) -> bool:
+    if not model_dir.exists() or not model_dir.is_dir():
+        return False
+    return all((model_dir / filename).exists() for filename in REQUIRED_FILES)
+def _resolve_artifact_dir(base_dir: Path) -> Path | None:
+    candidates = [base_dir, base_dir / ENGLISH_SUBDIR]
+    for candidate in candidates:
+        if _has_required_artifacts(candidate):
+            return candidate
+    return None
 def warmup():
     logging.info("Warming up model...")
     if MODEL_DIR is None:
         raise ValueError("LANG_MODEL is not configured")
+    if _resolve_artifact_dir(MODEL_DIR):
+        logging.info("Model artifacts already exist, skipping download.")
         return
     download_model_repo()
 def download_model_repo():
     if MODEL_DIR is None:
         raise ValueError("LANG_MODEL is not configured")
+    if not REPO_ID:
+        raise ValueError("English_model repo id is not configured")
+    if _resolve_artifact_dir(MODEL_DIR):
+        logging.info("Model artifacts already exist, skipping download.")
         return
+    snapshot_path = Path(snapshot_download(repo_id=REPO_ID, token=HF_TOKEN))
+    source_dir = snapshot_path / ENGLISH_SUBDIR if (snapshot_path / ENGLISH_SUBDIR).is_dir() else snapshot_path
+    MODEL_DIR.mkdir(parents=True, exist_ok=True)
+    shutil.copytree(source_dir, MODEL_DIR, dirs_exist_ok=True)
 def load_model():
     if MODEL_DIR is None:
         raise ValueError("LANG_MODEL is not configured")
+    artifact_dir = _resolve_artifact_dir(MODEL_DIR)
+    if artifact_dir is None:
+        logging.info("Model artifacts missing in %s, downloading now.", MODEL_DIR)
+        download_model_repo()
+        artifact_dir = _resolve_artifact_dir(MODEL_DIR)
+    if artifact_dir is None:
+        raise FileNotFoundError(
+            f"Required model artifacts not found in {MODEL_DIR}. Expected files: {', '.join(REQUIRED_FILES)}"
+        )
+    with open(artifact_dir / "classifier.pkl", "rb") as f:
         loaded_classifier = pickle.load(f)
+    with open(artifact_dir / "scaler.pkl", "rb") as f:
         loaded_scaler = pickle.load(f)
+    with open(artifact_dir / "word_vectorizer.pkl", "rb") as f:
         loaded_word_vectorizer = pickle.load(f)
+    with open(artifact_dir / "char_vectorizer.pkl", "rb") as f:
         loaded_char_vectorizer = pickle.load(f)
+    with open(artifact_dir / "feature_names.json", "r") as f:
         loaded_features = json.load(f)
+    with open(artifact_dir / "metadata.json", "r") as f:
         loaded_metadata = json.load(f)
     return (
         loaded_classifier,