Added:Added new code changes
Browse files
features/ai_human_image_classifier/model_loader.py
CHANGED
|
@@ -3,6 +3,7 @@ import torch
|
|
| 3 |
import joblib
|
| 4 |
from pathlib import Path
|
| 5 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 6 |
|
| 7 |
class ModelLoader:
|
| 8 |
"""
|
|
@@ -56,7 +57,7 @@ class ModelLoader:
|
|
| 56 |
print(f"Downloading SVM model from Hugging Face repo: {repo_id}")
|
| 57 |
try:
|
| 58 |
# Download the model file from the Hub. It returns the cached path.
|
| 59 |
-
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
| 60 |
print(f"SVM model downloaded to: {model_path}")
|
| 61 |
|
| 62 |
# Load the model from the downloaded path
|
|
|
|
| 3 |
import joblib
|
| 4 |
from pathlib import Path
|
| 5 |
from huggingface_hub import hf_hub_download
|
| 6 |
+
from config import Config
|
| 7 |
|
| 8 |
class ModelLoader:
|
| 9 |
"""
|
|
|
|
| 57 |
print(f"Downloading SVM model from Hugging Face repo: {repo_id}")
|
| 58 |
try:
|
| 59 |
# Download the model file from the Hub. It returns the cached path.
|
| 60 |
+
model_path = hf_hub_download(repo_id=repo_id, filename=filename, token=Config.HF_TOKEN)
|
| 61 |
print(f"SVM model downloaded to: {model_path}")
|
| 62 |
|
| 63 |
# Load the model from the downloaded path
|
features/image_classifier/model_loader.py
CHANGED
|
@@ -9,6 +9,7 @@ from huggingface_hub import snapshot_download
|
|
| 9 |
REPO_ID = "can-org/AI-VS-HUMAN-IMAGE-classifier"
|
| 10 |
MODEL_DIR = "./IMG_Models"
|
| 11 |
WEIGHTS_PATH = os.path.join(MODEL_DIR, "latest-my_cnn_model.h5")
|
|
|
|
| 12 |
|
| 13 |
# Device info (for logging)
|
| 14 |
gpus = tf.config.list_physical_devices("GPU")
|
|
@@ -32,7 +33,7 @@ def download_model_repo():
|
|
| 32 |
if os.path.exists(MODEL_DIR) and os.path.isdir(MODEL_DIR):
|
| 33 |
logging.info("Image model already exists, skipping download.")
|
| 34 |
return
|
| 35 |
-
snapshot_path = snapshot_download(repo_id=REPO_ID)
|
| 36 |
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 37 |
shutil.copytree(snapshot_path, MODEL_DIR, dirs_exist_ok=True)
|
| 38 |
|
|
|
|
| 9 |
REPO_ID = "can-org/AI-VS-HUMAN-IMAGE-classifier"
|
| 10 |
MODEL_DIR = "./IMG_Models"
|
| 11 |
WEIGHTS_PATH = os.path.join(MODEL_DIR, "latest-my_cnn_model.h5")
|
| 12 |
+
HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
|
| 13 |
|
| 14 |
# Device info (for logging)
|
| 15 |
gpus = tf.config.list_physical_devices("GPU")
|
|
|
|
| 33 |
if os.path.exists(MODEL_DIR) and os.path.isdir(MODEL_DIR):
|
| 34 |
logging.info("Image model already exists, skipping download.")
|
| 35 |
return
|
| 36 |
+
snapshot_path = snapshot_download(repo_id=REPO_ID, token=HF_TOKEN)
|
| 37 |
os.makedirs(MODEL_DIR, exist_ok=True)
|
| 38 |
shutil.copytree(snapshot_path, MODEL_DIR, dirs_exist_ok=True)
|
| 39 |
|
features/nepali_text_classifier/model_loader.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
import logging
|
| 2 |
-
import os
|
| 3 |
import pickle
|
| 4 |
import re
|
|
|
|
| 5 |
from functools import lru_cache
|
| 6 |
from pathlib import Path
|
| 7 |
|
| 8 |
import numpy as np
|
| 9 |
import pandas as pd
|
|
|
|
| 10 |
|
| 11 |
from config import Config
|
| 12 |
|
|
@@ -22,11 +23,15 @@ MODEL_FILES = {
|
|
| 22 |
"Ridge Classifier": "Ridge_Classifier.pkl",
|
| 23 |
"Multinomial NB": "Multinomial_NB.pkl",
|
| 24 |
"Bernoulli NB": "Bernoulli_NB.pkl",
|
| 25 |
-
"K-Nearest Neighbors": "KNearest_Neighbors.pkl",
|
| 26 |
}
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
# Ranked by validation accuracy from final_model/final_results.csv
|
| 32 |
DEFAULT_MODEL_RANKING = [
|
|
@@ -104,17 +109,54 @@ def _repo_root() -> Path:
|
|
| 104 |
return Path(__file__).resolve().parents[2]
|
| 105 |
|
| 106 |
|
| 107 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
candidates = []
|
| 109 |
-
if Config.Nepali_model_folder:
|
| 110 |
-
candidates.append(Path(Config.Nepali_model_folder))
|
| 111 |
repo = _repo_root()
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
candidates.append(repo / "notebook" / "ai_vs_human_nepali" / "final_model" / "saved_models")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
return path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
raise FileNotFoundError("Nepali model directory not found. Set Nepali_model env or add expected artifacts.")
|
| 119 |
|
| 120 |
|
|
|
|
| 1 |
import logging
|
|
|
|
| 2 |
import pickle
|
| 3 |
import re
|
| 4 |
+
import shutil
|
| 5 |
from functools import lru_cache
|
| 6 |
from pathlib import Path
|
| 7 |
|
| 8 |
import numpy as np
|
| 9 |
import pandas as pd
|
| 10 |
+
from huggingface_hub import snapshot_download
|
| 11 |
|
| 12 |
from config import Config
|
| 13 |
|
|
|
|
| 23 |
"Ridge Classifier": "Ridge_Classifier.pkl",
|
| 24 |
"Multinomial NB": "Multinomial_NB.pkl",
|
| 25 |
"Bernoulli NB": "Bernoulli_NB.pkl",
|
|
|
|
| 26 |
}
|
| 27 |
|
| 28 |
+
SKIP_MODELS = set()
|
| 29 |
+
|
| 30 |
+
REPO_ID = Config.REPO_ID_LANG
|
| 31 |
+
HF_TOKEN = Config.HF_TOKEN
|
| 32 |
+
NEPALI_SUBDIR = "Nepali_model"
|
| 33 |
+
REQUIRED_BASE_FILES = ("word_vectorizer.pkl", "char_vectorizer.pkl")
|
| 34 |
+
|
| 35 |
|
| 36 |
# Ranked by validation accuracy from final_model/final_results.csv
|
| 37 |
DEFAULT_MODEL_RANKING = [
|
|
|
|
| 109 |
return Path(__file__).resolve().parents[2]
|
| 110 |
|
| 111 |
|
| 112 |
+
def _has_required_artifacts(path: Path) -> bool:
|
| 113 |
+
if not path.exists() or not path.is_dir():
|
| 114 |
+
return False
|
| 115 |
+
has_base = all((path / filename).exists() for filename in REQUIRED_BASE_FILES)
|
| 116 |
+
has_any_model = any((path / filename).exists() for filename in MODEL_FILES.values())
|
| 117 |
+
return has_base and has_any_model
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def _candidate_model_dirs() -> list[Path]:
|
| 121 |
candidates = []
|
|
|
|
|
|
|
| 122 |
repo = _repo_root()
|
| 123 |
+
|
| 124 |
+
if Config.Nepali_model_folder:
|
| 125 |
+
custom = Path(Config.Nepali_model_folder)
|
| 126 |
+
candidates.extend([custom, custom / NEPALI_SUBDIR])
|
| 127 |
+
|
| 128 |
+
default_dir = repo / "features" / "Model" / "Nepali_model"
|
| 129 |
+
candidates.extend([default_dir, default_dir / NEPALI_SUBDIR])
|
| 130 |
candidates.append(repo / "notebook" / "ai_vs_human_nepali" / "final_model" / "saved_models")
|
| 131 |
+
return candidates
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def _download_nepali_artifacts() -> None:
|
| 135 |
+
if not REPO_ID:
|
| 136 |
+
raise ValueError("English_model repo id is not configured")
|
| 137 |
|
| 138 |
+
repo = _repo_root()
|
| 139 |
+
target_dir = Path(Config.Nepali_model_folder) if Config.Nepali_model_folder else repo / "features" / "Model" / "Nepali_model"
|
| 140 |
+
|
| 141 |
+
snapshot_path = Path(snapshot_download(repo_id=REPO_ID, token=HF_TOKEN))
|
| 142 |
+
source_dir = snapshot_path / NEPALI_SUBDIR if (snapshot_path / NEPALI_SUBDIR).is_dir() else snapshot_path
|
| 143 |
+
|
| 144 |
+
target_dir.mkdir(parents=True, exist_ok=True)
|
| 145 |
+
shutil.copytree(source_dir, target_dir, dirs_exist_ok=True)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def resolve_model_dir() -> Path:
|
| 149 |
+
for path in _candidate_model_dirs():
|
| 150 |
+
if _has_required_artifacts(path):
|
| 151 |
return path
|
| 152 |
+
|
| 153 |
+
LOGGER.info("Nepali artifacts not found locally; downloading from %s", REPO_ID)
|
| 154 |
+
_download_nepali_artifacts()
|
| 155 |
+
|
| 156 |
+
for path in _candidate_model_dirs():
|
| 157 |
+
if _has_required_artifacts(path):
|
| 158 |
+
return path
|
| 159 |
+
|
| 160 |
raise FileNotFoundError("Nepali model directory not found. Set Nepali_model env or add expected artifacts.")
|
| 161 |
|
| 162 |
|
features/real_forged_classifier/model_loader.py
CHANGED
|
@@ -2,6 +2,7 @@ import torch
|
|
| 2 |
from pathlib import Path
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
from model import FFTCNN # Import the model architecture
|
|
|
|
| 5 |
|
| 6 |
class ModelLoader:
|
| 7 |
"""
|
|
@@ -35,7 +36,7 @@ class ModelLoader:
|
|
| 35 |
print(f"Downloading FFT CNN model from Hugging Face repo: {repo_id}")
|
| 36 |
try:
|
| 37 |
# Download the model file from the Hub. It returns the cached path.
|
| 38 |
-
model_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
| 39 |
print(f"Model downloaded to: {model_path}")
|
| 40 |
|
| 41 |
# Initialize the model architecture
|
|
|
|
| 2 |
from pathlib import Path
|
| 3 |
from huggingface_hub import hf_hub_download
|
| 4 |
from model import FFTCNN # Import the model architecture
|
| 5 |
+
from config import Config
|
| 6 |
|
| 7 |
class ModelLoader:
|
| 8 |
"""
|
|
|
|
| 36 |
print(f"Downloading FFT CNN model from Hugging Face repo: {repo_id}")
|
| 37 |
try:
|
| 38 |
# Download the model file from the Hub. It returns the cached path.
|
| 39 |
+
model_path = hf_hub_download(repo_id=repo_id, filename=filename, token=Config.HF_TOKEN)
|
| 40 |
print(f"Model downloaded to: {model_path}")
|
| 41 |
|
| 42 |
# Initialize the model architecture
|
features/text_classifier/model_loader.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import json
|
| 2 |
import logging
|
| 3 |
-
import os
|
| 4 |
import pickle
|
| 5 |
import shutil
|
| 6 |
from pathlib import Path
|
|
@@ -12,17 +11,41 @@ from config import Config
|
|
| 12 |
|
| 13 |
REPO_ID = Config.REPO_ID_LANG
|
| 14 |
MODEL_DIR = Path(Config.LANG_MODEL) if Config.LANG_MODEL else None
|
|
|
|
|
|
|
| 15 |
|
| 16 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def warmup():
|
| 21 |
logging.info("Warming up model...")
|
| 22 |
if MODEL_DIR is None:
|
| 23 |
raise ValueError("LANG_MODEL is not configured")
|
| 24 |
-
if
|
| 25 |
-
logging.info("Model already
|
| 26 |
return
|
| 27 |
download_model_repo()
|
| 28 |
|
|
@@ -30,34 +53,46 @@ def warmup():
|
|
| 30 |
def download_model_repo():
|
| 31 |
if MODEL_DIR is None:
|
| 32 |
raise ValueError("LANG_MODEL is not configured")
|
| 33 |
-
if
|
| 34 |
-
|
|
|
|
|
|
|
| 35 |
return
|
| 36 |
-
snapshot_path = snapshot_download(repo_id=REPO_ID)
|
| 37 |
-
|
| 38 |
-
|
|
|
|
| 39 |
|
| 40 |
|
| 41 |
def load_model():
|
| 42 |
if MODEL_DIR is None:
|
| 43 |
raise ValueError("LANG_MODEL is not configured")
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
loaded_classifier = pickle.load(f)
|
| 47 |
|
| 48 |
-
with open(
|
| 49 |
loaded_scaler = pickle.load(f)
|
| 50 |
|
| 51 |
-
with open(
|
| 52 |
loaded_word_vectorizer = pickle.load(f)
|
| 53 |
|
| 54 |
-
with open(
|
| 55 |
loaded_char_vectorizer = pickle.load(f)
|
| 56 |
|
| 57 |
-
with open(
|
| 58 |
loaded_features = json.load(f)
|
| 59 |
|
| 60 |
-
with open(
|
| 61 |
loaded_metadata = json.load(f)
|
| 62 |
return (
|
| 63 |
loaded_classifier,
|
|
|
|
| 1 |
import json
|
| 2 |
import logging
|
|
|
|
| 3 |
import pickle
|
| 4 |
import shutil
|
| 5 |
from pathlib import Path
|
|
|
|
| 11 |
|
| 12 |
REPO_ID = Config.REPO_ID_LANG
|
| 13 |
MODEL_DIR = Path(Config.LANG_MODEL) if Config.LANG_MODEL else None
|
| 14 |
+
HF_TOKEN = Config.HF_TOKEN
|
| 15 |
+
ENGLISH_SUBDIR = "English_model"
|
| 16 |
|
| 17 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 18 |
+
|
| 19 |
+
REQUIRED_FILES = (
|
| 20 |
+
"classifier.pkl",
|
| 21 |
+
"scaler.pkl",
|
| 22 |
+
"word_vectorizer.pkl",
|
| 23 |
+
"char_vectorizer.pkl",
|
| 24 |
+
"feature_names.json",
|
| 25 |
+
"metadata.json",
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _has_required_artifacts(model_dir: Path) -> bool:
|
| 30 |
+
if not model_dir.exists() or not model_dir.is_dir():
|
| 31 |
+
return False
|
| 32 |
+
return all((model_dir / filename).exists() for filename in REQUIRED_FILES)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _resolve_artifact_dir(base_dir: Path) -> Path | None:
|
| 36 |
+
candidates = [base_dir, base_dir / ENGLISH_SUBDIR]
|
| 37 |
+
for candidate in candidates:
|
| 38 |
+
if _has_required_artifacts(candidate):
|
| 39 |
+
return candidate
|
| 40 |
+
return None
|
| 41 |
|
| 42 |
|
| 43 |
def warmup():
|
| 44 |
logging.info("Warming up model...")
|
| 45 |
if MODEL_DIR is None:
|
| 46 |
raise ValueError("LANG_MODEL is not configured")
|
| 47 |
+
if _resolve_artifact_dir(MODEL_DIR):
|
| 48 |
+
logging.info("Model artifacts already exist, skipping download.")
|
| 49 |
return
|
| 50 |
download_model_repo()
|
| 51 |
|
|
|
|
| 53 |
def download_model_repo():
|
| 54 |
if MODEL_DIR is None:
|
| 55 |
raise ValueError("LANG_MODEL is not configured")
|
| 56 |
+
if not REPO_ID:
|
| 57 |
+
raise ValueError("English_model repo id is not configured")
|
| 58 |
+
if _resolve_artifact_dir(MODEL_DIR):
|
| 59 |
+
logging.info("Model artifacts already exist, skipping download.")
|
| 60 |
return
|
| 61 |
+
snapshot_path = Path(snapshot_download(repo_id=REPO_ID, token=HF_TOKEN))
|
| 62 |
+
source_dir = snapshot_path / ENGLISH_SUBDIR if (snapshot_path / ENGLISH_SUBDIR).is_dir() else snapshot_path
|
| 63 |
+
MODEL_DIR.mkdir(parents=True, exist_ok=True)
|
| 64 |
+
shutil.copytree(source_dir, MODEL_DIR, dirs_exist_ok=True)
|
| 65 |
|
| 66 |
|
| 67 |
def load_model():
|
| 68 |
if MODEL_DIR is None:
|
| 69 |
raise ValueError("LANG_MODEL is not configured")
|
| 70 |
+
artifact_dir = _resolve_artifact_dir(MODEL_DIR)
|
| 71 |
+
if artifact_dir is None:
|
| 72 |
+
logging.info("Model artifacts missing in %s, downloading now.", MODEL_DIR)
|
| 73 |
+
download_model_repo()
|
| 74 |
+
artifact_dir = _resolve_artifact_dir(MODEL_DIR)
|
| 75 |
+
if artifact_dir is None:
|
| 76 |
+
raise FileNotFoundError(
|
| 77 |
+
f"Required model artifacts not found in {MODEL_DIR}. Expected files: {', '.join(REQUIRED_FILES)}"
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
with open(artifact_dir / "classifier.pkl", "rb") as f:
|
| 81 |
loaded_classifier = pickle.load(f)
|
| 82 |
|
| 83 |
+
with open(artifact_dir / "scaler.pkl", "rb") as f:
|
| 84 |
loaded_scaler = pickle.load(f)
|
| 85 |
|
| 86 |
+
with open(artifact_dir / "word_vectorizer.pkl", "rb") as f:
|
| 87 |
loaded_word_vectorizer = pickle.load(f)
|
| 88 |
|
| 89 |
+
with open(artifact_dir / "char_vectorizer.pkl", "rb") as f:
|
| 90 |
loaded_char_vectorizer = pickle.load(f)
|
| 91 |
|
| 92 |
+
with open(artifact_dir / "feature_names.json", "r") as f:
|
| 93 |
loaded_features = json.load(f)
|
| 94 |
|
| 95 |
+
with open(artifact_dir / "metadata.json", "r") as f:
|
| 96 |
loaded_metadata = json.load(f)
|
| 97 |
return (
|
| 98 |
loaded_classifier,
|