Pujan-Dev commited on
Commit
ec8d360
·
1 Parent(s): 128b0a8

Remove model artifacts and fetch FAISS assets from HF repo

Browse files
Files changed (4) hide show
  1. .gitignore +5 -1
  2. Dockerfile +13 -12
  3. config.py +14 -0
  4. rag_service.py +40 -2
.gitignore CHANGED
@@ -1 +1,5 @@
1
- __pycache__
 
 
 
 
 
1
+ __pycache__/
2
+ Models/
3
+ *.index
4
+ *.pkl
5
+ .cache/
Dockerfile CHANGED
@@ -13,21 +13,22 @@ WORKDIR /app
13
 
14
  # System libs often needed by ML wheels/runtime.
15
  RUN apt-get update && apt-get install -y --no-install-recommends \
16
- git \
17
- build-essential \
18
- && rm -rf /var/lib/apt/lists/*
19
 
20
  # Install Python dependencies used by Fastapi/main.py.
21
  RUN pip install --upgrade pip && pip install \
22
- fastapi \
23
- "uvicorn[standard]" \
24
- numpy \
25
- faiss-cpu \
26
- torch \
27
- transformers \
28
- sentencepiece \
29
- InstructorEmbedding \
30
- langchain-core
 
31
 
32
  # Copy the whole repo so Fastapi app can resolve vector_db.index/chunks.pkl
33
  # from /app, /app/Fastapi, or /app/RAG_pipeline.
 
13
 
14
  # System libs often needed by ML wheels/runtime.
15
  RUN apt-get update && apt-get install -y --no-install-recommends \
16
+ git \
17
+ build-essential \
18
+ && rm -rf /var/lib/apt/lists/*
19
 
20
  # Install Python dependencies used by Fastapi/main.py.
21
  RUN pip install --upgrade pip && pip install \
22
+ fastapi \
23
+ "uvicorn[standard]" \
24
+ numpy \
25
+ faiss-cpu \
26
+ torch \
27
+ transformers \
28
+ huggingface_hub \
29
+ sentencepiece \
30
+ InstructorEmbedding \
31
+ langchain-core
32
 
33
  # Copy the whole repo so Fastapi app can resolve vector_db.index/chunks.pkl
34
  # from /app, /app/Fastapi, or /app/RAG_pipeline.
config.py CHANGED
@@ -40,6 +40,17 @@ def _to_float(value: str, default: float) -> float:
40
  return default
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
43
  _BASE_DIR = Path(__file__).resolve().parent
44
  _load_dotenv(_BASE_DIR / ".env")
45
 
@@ -57,6 +68,9 @@ class Settings:
57
  models_dir: str = _get_env("MODELS_DIR", "Models")
58
  vector_db_file: str = _get_env("VECTOR_DB_FILE", "vector_db.index", aliases=("VECTOR_STORE_PATH",))
59
  chunks_file: str = _get_env("CHUNKS_FILE", "chunks.pkl")
 
 
 
60
 
61
  retrieval_instruction: str = _get_env(
62
  "RETRIEVAL_INSTRUCTION",
 
40
  return default
41
 
42
 
43
+ def _to_bool(value: str, default: bool) -> bool:
44
+ if value is None:
45
+ return default
46
+ normalized = value.strip().lower()
47
+ if normalized in {"1", "true", "yes", "on"}:
48
+ return True
49
+ if normalized in {"0", "false", "no", "off"}:
50
+ return False
51
+ return default
52
+
53
+
54
  _BASE_DIR = Path(__file__).resolve().parent
55
  _load_dotenv(_BASE_DIR / ".env")
56
 
 
68
  models_dir: str = _get_env("MODELS_DIR", "Models")
69
  vector_db_file: str = _get_env("VECTOR_DB_FILE", "vector_db.index", aliases=("VECTOR_STORE_PATH",))
70
  chunks_file: str = _get_env("CHUNKS_FILE", "chunks.pkl")
71
+ hf_assets_repo_id: str = _get_env("HF_ASSETS_REPO_ID", "Pujan-Dev/faiss_emb")
72
+ hf_assets_subdir: str = _get_env("HF_ASSETS_SUBDIR", "")
73
+ allow_hf_assets_download: bool = _to_bool(_get_env("ALLOW_HF_ASSETS_DOWNLOAD", "true"), True)
74
 
75
  retrieval_instruction: str = _get_env(
76
  "RETRIEVAL_INSTRUCTION",
rag_service.py CHANGED
@@ -5,6 +5,7 @@ import time
5
  import faiss
6
  import numpy as np
7
  import torch
 
8
  from InstructorEmbedding import INSTRUCTOR
9
  from transformers import AutoModelForCausalLM, AutoTokenizer
10
 
@@ -74,6 +75,43 @@ def find_data_file(filename: str) -> Path:
74
  raise FileNotFoundError(f"Could not find {filename} in expected locations")
75
 
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  class AppState:
78
  def __init__(self):
79
  self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -194,14 +232,14 @@ def preload() -> dict:
194
 
195
  print("Loading vector DB...")
196
  t_index = time.perf_counter()
197
- index_path = find_data_file(settings.vector_db_file)
198
  state.index = faiss.read_index(str(index_path))
199
  index_time = time.perf_counter() - t_index
200
  print(f"Index loaded : {state.index.ntotal} vectors")
201
 
202
  print("Loading chunks...")
203
  t_chunks = time.perf_counter()
204
- chunks_path = find_data_file(settings.chunks_file)
205
  state.chunks = _load_chunks(chunks_path)
206
  chunks_time = time.perf_counter() - t_chunks
207
  print(f"Chunks loaded : {len(state.chunks)}")
 
5
  import faiss
6
  import numpy as np
7
  import torch
8
+ from huggingface_hub import hf_hub_download
9
  from InstructorEmbedding import INSTRUCTOR
10
  from transformers import AutoModelForCausalLM, AutoTokenizer
11
 
 
75
  raise FileNotFoundError(f"Could not find {filename} in expected locations")
76
 
77
 
78
+ def resolve_data_file(filename: str) -> Path:
79
+ try:
80
+ return find_data_file(filename)
81
+ except FileNotFoundError:
82
+ if not settings.allow_hf_assets_download:
83
+ raise
84
+
85
+ if not settings.hf_assets_repo_id:
86
+ raise FileNotFoundError(
87
+ f"Could not find {filename} locally and HF_ASSETS_REPO_ID is not configured"
88
+ )
89
+
90
+ subdir = settings.hf_assets_subdir.strip("/")
91
+ preferred_filename = f"{subdir}/{filename}" if subdir else filename
92
+ fallback_filename = filename
93
+ attempts = [preferred_filename]
94
+ if fallback_filename != preferred_filename:
95
+ attempts.append(fallback_filename)
96
+
97
+ last_error = None
98
+ for candidate in attempts:
99
+ try:
100
+ downloaded = hf_hub_download(
101
+ repo_id=settings.hf_assets_repo_id,
102
+ filename=candidate,
103
+ repo_type="model",
104
+ )
105
+ print(f"Downloaded {candidate} from {settings.hf_assets_repo_id}")
106
+ return Path(downloaded)
107
+ except Exception as exc:
108
+ last_error = exc
109
+
110
+ raise FileNotFoundError(
111
+ f"Could not find {filename} locally or in Hugging Face repo {settings.hf_assets_repo_id}"
112
+ ) from last_error
113
+
114
+
115
  class AppState:
116
  def __init__(self):
117
  self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
232
 
233
  print("Loading vector DB...")
234
  t_index = time.perf_counter()
235
+ index_path = resolve_data_file(settings.vector_db_file)
236
  state.index = faiss.read_index(str(index_path))
237
  index_time = time.perf_counter() - t_index
238
  print(f"Index loaded : {state.index.ntotal} vectors")
239
 
240
  print("Loading chunks...")
241
  t_chunks = time.perf_counter()
242
+ chunks_path = resolve_data_file(settings.chunks_file)
243
  state.chunks = _load_chunks(chunks_path)
244
  chunks_time = time.perf_counter() - t_chunks
245
  print(f"Chunks loaded : {len(state.chunks)}")