Pujan-Dev commited on
Commit
8d28be7
·
1 Parent(s): 582b4bf

Added:Added new code changes

Browse files
features/ai_human_image_classifier/model_loader.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  import joblib
4
  from pathlib import Path
5
  from huggingface_hub import hf_hub_download
 
6
 
7
  class ModelLoader:
8
  """
@@ -56,7 +57,7 @@ class ModelLoader:
56
  print(f"Downloading SVM model from Hugging Face repo: {repo_id}")
57
  try:
58
  # Download the model file from the Hub. It returns the cached path.
59
- model_path = hf_hub_download(repo_id=repo_id, filename=filename)
60
  print(f"SVM model downloaded to: {model_path}")
61
 
62
  # Load the model from the downloaded path
 
3
  import joblib
4
  from pathlib import Path
5
  from huggingface_hub import hf_hub_download
6
+ from config import Config
7
 
8
  class ModelLoader:
9
  """
 
57
  print(f"Downloading SVM model from Hugging Face repo: {repo_id}")
58
  try:
59
  # Download the model file from the Hub. It returns the cached path.
60
+ model_path = hf_hub_download(repo_id=repo_id, filename=filename, token=Config.HF_TOKEN)
61
  print(f"SVM model downloaded to: {model_path}")
62
 
63
  # Load the model from the downloaded path
features/image_classifier/model_loader.py CHANGED
@@ -9,6 +9,7 @@ from huggingface_hub import snapshot_download
9
  REPO_ID = "can-org/AI-VS-HUMAN-IMAGE-classifier"
10
  MODEL_DIR = "./IMG_Models"
11
  WEIGHTS_PATH = os.path.join(MODEL_DIR, "latest-my_cnn_model.h5")
 
12
 
13
  # Device info (for logging)
14
  gpus = tf.config.list_physical_devices("GPU")
@@ -32,7 +33,7 @@ def download_model_repo():
32
  if os.path.exists(MODEL_DIR) and os.path.isdir(MODEL_DIR):
33
  logging.info("Image model already exists, skipping download.")
34
  return
35
- snapshot_path = snapshot_download(repo_id=REPO_ID)
36
  os.makedirs(MODEL_DIR, exist_ok=True)
37
  shutil.copytree(snapshot_path, MODEL_DIR, dirs_exist_ok=True)
38
 
 
9
  REPO_ID = "can-org/AI-VS-HUMAN-IMAGE-classifier"
10
  MODEL_DIR = "./IMG_Models"
11
  WEIGHTS_PATH = os.path.join(MODEL_DIR, "latest-my_cnn_model.h5")
12
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
13
 
14
  # Device info (for logging)
15
  gpus = tf.config.list_physical_devices("GPU")
 
33
  if os.path.exists(MODEL_DIR) and os.path.isdir(MODEL_DIR):
34
  logging.info("Image model already exists, skipping download.")
35
  return
36
+ snapshot_path = snapshot_download(repo_id=REPO_ID, token=HF_TOKEN)
37
  os.makedirs(MODEL_DIR, exist_ok=True)
38
  shutil.copytree(snapshot_path, MODEL_DIR, dirs_exist_ok=True)
39
 
features/nepali_text_classifier/model_loader.py CHANGED
@@ -1,12 +1,13 @@
1
  import logging
2
- import os
3
  import pickle
4
  import re
 
5
  from functools import lru_cache
6
  from pathlib import Path
7
 
8
  import numpy as np
9
  import pandas as pd
 
10
 
11
  from config import Config
12
 
@@ -22,11 +23,15 @@ MODEL_FILES = {
22
  "Ridge Classifier": "Ridge_Classifier.pkl",
23
  "Multinomial NB": "Multinomial_NB.pkl",
24
  "Bernoulli NB": "Bernoulli_NB.pkl",
25
- "K-Nearest Neighbors": "KNearest_Neighbors.pkl",
26
  }
27
 
28
- # KNN artifact in this repo is very large; keep API responsive by skipping it.
29
- SKIP_MODELS = {"K-Nearest Neighbors"}
 
 
 
 
 
30
 
31
  # Ranked by validation accuracy from final_model/final_results.csv
32
  DEFAULT_MODEL_RANKING = [
@@ -104,17 +109,54 @@ def _repo_root() -> Path:
104
  return Path(__file__).resolve().parents[2]
105
 
106
 
107
- def resolve_model_dir() -> Path:
 
 
 
 
 
 
 
 
108
  candidates = []
109
- if Config.Nepali_model_folder:
110
- candidates.append(Path(Config.Nepali_model_folder))
111
  repo = _repo_root()
112
- candidates.append(repo / "features" / "Model" / "Nepali_model")
 
 
 
 
 
 
113
  candidates.append(repo / "notebook" / "ai_vs_human_nepali" / "final_model" / "saved_models")
 
 
 
 
 
 
114
 
115
- for path in candidates:
116
- if path.exists() and path.is_dir() and (path / "word_vectorizer.pkl").exists():
 
 
 
 
 
 
 
 
 
 
 
117
  return path
 
 
 
 
 
 
 
 
118
  raise FileNotFoundError("Nepali model directory not found. Set Nepali_model env or add expected artifacts.")
119
 
120
 
 
1
  import logging
 
2
  import pickle
3
  import re
4
+ import shutil
5
  from functools import lru_cache
6
  from pathlib import Path
7
 
8
  import numpy as np
9
  import pandas as pd
10
+ from huggingface_hub import snapshot_download
11
 
12
  from config import Config
13
 
 
23
  "Ridge Classifier": "Ridge_Classifier.pkl",
24
  "Multinomial NB": "Multinomial_NB.pkl",
25
  "Bernoulli NB": "Bernoulli_NB.pkl",
 
26
  }
27
 
28
+ SKIP_MODELS = set()
29
+
30
+ REPO_ID = Config.REPO_ID_LANG
31
+ HF_TOKEN = Config.HF_TOKEN
32
+ NEPALI_SUBDIR = "Nepali_model"
33
+ REQUIRED_BASE_FILES = ("word_vectorizer.pkl", "char_vectorizer.pkl")
34
+
35
 
36
  # Ranked by validation accuracy from final_model/final_results.csv
37
  DEFAULT_MODEL_RANKING = [
 
109
  return Path(__file__).resolve().parents[2]
110
 
111
 
112
+ def _has_required_artifacts(path: Path) -> bool:
113
+ if not path.exists() or not path.is_dir():
114
+ return False
115
+ has_base = all((path / filename).exists() for filename in REQUIRED_BASE_FILES)
116
+ has_any_model = any((path / filename).exists() for filename in MODEL_FILES.values())
117
+ return has_base and has_any_model
118
+
119
+
120
+ def _candidate_model_dirs() -> list[Path]:
121
  candidates = []
 
 
122
  repo = _repo_root()
123
+
124
+ if Config.Nepali_model_folder:
125
+ custom = Path(Config.Nepali_model_folder)
126
+ candidates.extend([custom, custom / NEPALI_SUBDIR])
127
+
128
+ default_dir = repo / "features" / "Model" / "Nepali_model"
129
+ candidates.extend([default_dir, default_dir / NEPALI_SUBDIR])
130
  candidates.append(repo / "notebook" / "ai_vs_human_nepali" / "final_model" / "saved_models")
131
+ return candidates
132
+
133
+
134
+ def _download_nepali_artifacts() -> None:
135
+ if not REPO_ID:
136
+ raise ValueError("English_model repo id is not configured")
137
 
138
+ repo = _repo_root()
139
+ target_dir = Path(Config.Nepali_model_folder) if Config.Nepali_model_folder else repo / "features" / "Model" / "Nepali_model"
140
+
141
+ snapshot_path = Path(snapshot_download(repo_id=REPO_ID, token=HF_TOKEN))
142
+ source_dir = snapshot_path / NEPALI_SUBDIR if (snapshot_path / NEPALI_SUBDIR).is_dir() else snapshot_path
143
+
144
+ target_dir.mkdir(parents=True, exist_ok=True)
145
+ shutil.copytree(source_dir, target_dir, dirs_exist_ok=True)
146
+
147
+
148
+ def resolve_model_dir() -> Path:
149
+ for path in _candidate_model_dirs():
150
+ if _has_required_artifacts(path):
151
  return path
152
+
153
+ LOGGER.info("Nepali artifacts not found locally; downloading from %s", REPO_ID)
154
+ _download_nepali_artifacts()
155
+
156
+ for path in _candidate_model_dirs():
157
+ if _has_required_artifacts(path):
158
+ return path
159
+
160
  raise FileNotFoundError("Nepali model directory not found. Set Nepali_model env or add expected artifacts.")
161
 
162
 
features/real_forged_classifier/model_loader.py CHANGED
@@ -2,6 +2,7 @@ import torch
2
  from pathlib import Path
3
  from huggingface_hub import hf_hub_download
4
  from model import FFTCNN # Import the model architecture
 
5
 
6
  class ModelLoader:
7
  """
@@ -35,7 +36,7 @@ class ModelLoader:
35
  print(f"Downloading FFT CNN model from Hugging Face repo: {repo_id}")
36
  try:
37
  # Download the model file from the Hub. It returns the cached path.
38
- model_path = hf_hub_download(repo_id=repo_id, filename=filename)
39
  print(f"Model downloaded to: {model_path}")
40
 
41
  # Initialize the model architecture
 
2
  from pathlib import Path
3
  from huggingface_hub import hf_hub_download
4
  from model import FFTCNN # Import the model architecture
5
+ from config import Config
6
 
7
  class ModelLoader:
8
  """
 
36
  print(f"Downloading FFT CNN model from Hugging Face repo: {repo_id}")
37
  try:
38
  # Download the model file from the Hub. It returns the cached path.
39
+ model_path = hf_hub_download(repo_id=repo_id, filename=filename, token=Config.HF_TOKEN)
40
  print(f"Model downloaded to: {model_path}")
41
 
42
  # Initialize the model architecture
features/text_classifier/model_loader.py CHANGED
@@ -1,6 +1,5 @@
1
  import json
2
  import logging
3
- import os
4
  import pickle
5
  import shutil
6
  from pathlib import Path
@@ -12,17 +11,41 @@ from config import Config
12
 
13
  REPO_ID = Config.REPO_ID_LANG
14
  MODEL_DIR = Path(Config.LANG_MODEL) if Config.LANG_MODEL else None
 
 
15
 
16
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
- _model, _tokenizer = None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  def warmup():
21
  logging.info("Warming up model...")
22
  if MODEL_DIR is None:
23
  raise ValueError("LANG_MODEL is not configured")
24
- if MODEL_DIR.exists() and MODEL_DIR.is_dir():
25
- logging.info("Model already exists, skipping download.")
26
  return
27
  download_model_repo()
28
 
@@ -30,34 +53,46 @@ def warmup():
30
  def download_model_repo():
31
  if MODEL_DIR is None:
32
  raise ValueError("LANG_MODEL is not configured")
33
- if MODEL_DIR.exists() and MODEL_DIR.is_dir():
34
- logging.info("Model already exists, skipping download.")
 
 
35
  return
36
- snapshot_path = snapshot_download(repo_id=REPO_ID)
37
- os.makedirs(MODEL_DIR, exist_ok=True)
38
- shutil.copytree(snapshot_path, MODEL_DIR, dirs_exist_ok=True)
 
39
 
40
 
41
  def load_model():
42
  if MODEL_DIR is None:
43
  raise ValueError("LANG_MODEL is not configured")
44
-
45
- with open(MODEL_DIR / "classifier.pkl", "rb") as f:
 
 
 
 
 
 
 
 
 
46
  loaded_classifier = pickle.load(f)
47
 
48
- with open(MODEL_DIR / "scaler.pkl", "rb") as f:
49
  loaded_scaler = pickle.load(f)
50
 
51
- with open(MODEL_DIR / "word_vectorizer.pkl", "rb") as f:
52
  loaded_word_vectorizer = pickle.load(f)
53
 
54
- with open(MODEL_DIR / "char_vectorizer.pkl", "rb") as f:
55
  loaded_char_vectorizer = pickle.load(f)
56
 
57
- with open(MODEL_DIR / "feature_names.json", "r") as f:
58
  loaded_features = json.load(f)
59
 
60
- with open(MODEL_DIR / "metadata.json", "r") as f:
61
  loaded_metadata = json.load(f)
62
  return (
63
  loaded_classifier,
 
1
  import json
2
  import logging
 
3
  import pickle
4
  import shutil
5
  from pathlib import Path
 
11
 
12
  REPO_ID = Config.REPO_ID_LANG
13
  MODEL_DIR = Path(Config.LANG_MODEL) if Config.LANG_MODEL else None
14
+ HF_TOKEN = Config.HF_TOKEN
15
+ ENGLISH_SUBDIR = "English_model"
16
 
17
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+
19
+ REQUIRED_FILES = (
20
+ "classifier.pkl",
21
+ "scaler.pkl",
22
+ "word_vectorizer.pkl",
23
+ "char_vectorizer.pkl",
24
+ "feature_names.json",
25
+ "metadata.json",
26
+ )
27
+
28
+
29
+ def _has_required_artifacts(model_dir: Path) -> bool:
30
+ if not model_dir.exists() or not model_dir.is_dir():
31
+ return False
32
+ return all((model_dir / filename).exists() for filename in REQUIRED_FILES)
33
+
34
+
35
+ def _resolve_artifact_dir(base_dir: Path) -> Path | None:
36
+ candidates = [base_dir, base_dir / ENGLISH_SUBDIR]
37
+ for candidate in candidates:
38
+ if _has_required_artifacts(candidate):
39
+ return candidate
40
+ return None
41
 
42
 
43
  def warmup():
44
  logging.info("Warming up model...")
45
  if MODEL_DIR is None:
46
  raise ValueError("LANG_MODEL is not configured")
47
+ if _resolve_artifact_dir(MODEL_DIR):
48
+ logging.info("Model artifacts already exist, skipping download.")
49
  return
50
  download_model_repo()
51
 
 
53
  def download_model_repo():
54
  if MODEL_DIR is None:
55
  raise ValueError("LANG_MODEL is not configured")
56
+ if not REPO_ID:
57
+ raise ValueError("English_model repo id is not configured")
58
+ if _resolve_artifact_dir(MODEL_DIR):
59
+ logging.info("Model artifacts already exist, skipping download.")
60
  return
61
+ snapshot_path = Path(snapshot_download(repo_id=REPO_ID, token=HF_TOKEN))
62
+ source_dir = snapshot_path / ENGLISH_SUBDIR if (snapshot_path / ENGLISH_SUBDIR).is_dir() else snapshot_path
63
+ MODEL_DIR.mkdir(parents=True, exist_ok=True)
64
+ shutil.copytree(source_dir, MODEL_DIR, dirs_exist_ok=True)
65
 
66
 
67
  def load_model():
68
  if MODEL_DIR is None:
69
  raise ValueError("LANG_MODEL is not configured")
70
+ artifact_dir = _resolve_artifact_dir(MODEL_DIR)
71
+ if artifact_dir is None:
72
+ logging.info("Model artifacts missing in %s, downloading now.", MODEL_DIR)
73
+ download_model_repo()
74
+ artifact_dir = _resolve_artifact_dir(MODEL_DIR)
75
+ if artifact_dir is None:
76
+ raise FileNotFoundError(
77
+ f"Required model artifacts not found in {MODEL_DIR}. Expected files: {', '.join(REQUIRED_FILES)}"
78
+ )
79
+
80
+ with open(artifact_dir / "classifier.pkl", "rb") as f:
81
  loaded_classifier = pickle.load(f)
82
 
83
+ with open(artifact_dir / "scaler.pkl", "rb") as f:
84
  loaded_scaler = pickle.load(f)
85
 
86
+ with open(artifact_dir / "word_vectorizer.pkl", "rb") as f:
87
  loaded_word_vectorizer = pickle.load(f)
88
 
89
+ with open(artifact_dir / "char_vectorizer.pkl", "rb") as f:
90
  loaded_char_vectorizer = pickle.load(f)
91
 
92
+ with open(artifact_dir / "feature_names.json", "r") as f:
93
  loaded_features = json.load(f)
94
 
95
+ with open(artifact_dir / "metadata.json", "r") as f:
96
  loaded_metadata = json.load(f)
97
  return (
98
  loaded_classifier,