Spaces:

Pujan-Dev
/

per_new

Sleeping

App Files Files Community

Pujan-Dev commited on Apr 1

Commit

ec8d360

1 Parent(s): 128b0a8

Remove model artifacts and fetch FAISS assets from HF repo

Browse files

Files changed (4) hide show

.gitignore +5 -1
Dockerfile +13 -12
config.py +14 -0
rag_service.py +40 -2

.gitignore CHANGED Viewed

	@@ -1 +1,5 @@
1	- __pycache__

+__pycache__/
+Models/
+*.index
+*.pkl
+.cache/

Dockerfile CHANGED Viewed

@@ -13,21 +13,22 @@ WORKDIR /app
 # System libs often needed by ML wheels/runtime.
 RUN apt-get update && apt-get install -y --no-install-recommends \
-  git \
-  build-essential \
-  && rm -rf /var/lib/apt/lists/*
 # Install Python dependencies used by Fastapi/main.py.
 RUN pip install --upgrade pip && pip install \
-  fastapi \
-  "uvicorn[standard]" \
-  numpy \
-  faiss-cpu \
-  torch \
-  transformers \
-  sentencepiece \
-  InstructorEmbedding \
-  langchain-core
 # Copy the whole repo so Fastapi app can resolve vector_db.index/chunks.pkl
 # from /app, /app/Fastapi, or /app/RAG_pipeline.

 # System libs often needed by ML wheels/runtime.
 RUN apt-get update && apt-get install -y --no-install-recommends \
+	git \
+	build-essential \
+	&& rm -rf /var/lib/apt/lists/*
 # Install Python dependencies used by Fastapi/main.py.
 RUN pip install --upgrade pip && pip install \
+	fastapi \
+	"uvicorn[standard]" \
+	numpy \
+	faiss-cpu \
+	torch \
+	transformers \
+	huggingface_hub \
+	sentencepiece \
+	InstructorEmbedding \
+	langchain-core
 # Copy the whole repo so Fastapi app can resolve vector_db.index/chunks.pkl
 # from /app, /app/Fastapi, or /app/RAG_pipeline.

config.py CHANGED Viewed

@@ -40,6 +40,17 @@ def _to_float(value: str, default: float) -> float:
         return default
 _BASE_DIR = Path(__file__).resolve().parent
 _load_dotenv(_BASE_DIR / ".env")
@@ -57,6 +68,9 @@ class Settings:
     models_dir: str = _get_env("MODELS_DIR", "Models")
     vector_db_file: str = _get_env("VECTOR_DB_FILE", "vector_db.index", aliases=("VECTOR_STORE_PATH",))
     chunks_file: str = _get_env("CHUNKS_FILE", "chunks.pkl")
     retrieval_instruction: str = _get_env(
         "RETRIEVAL_INSTRUCTION",

         return default
+def _to_bool(value: str, default: bool) -> bool:
+    if value is None:
+        return default
+    normalized = value.strip().lower()
+    if normalized in {"1", "true", "yes", "on"}:
+        return True
+    if normalized in {"0", "false", "no", "off"}:
+        return False
+    return default
 _BASE_DIR = Path(__file__).resolve().parent
 _load_dotenv(_BASE_DIR / ".env")
     models_dir: str = _get_env("MODELS_DIR", "Models")
     vector_db_file: str = _get_env("VECTOR_DB_FILE", "vector_db.index", aliases=("VECTOR_STORE_PATH",))
     chunks_file: str = _get_env("CHUNKS_FILE", "chunks.pkl")
+    hf_assets_repo_id: str = _get_env("HF_ASSETS_REPO_ID", "Pujan-Dev/faiss_emb")
+    hf_assets_subdir: str = _get_env("HF_ASSETS_SUBDIR", "")
+    allow_hf_assets_download: bool = _to_bool(_get_env("ALLOW_HF_ASSETS_DOWNLOAD", "true"), True)
     retrieval_instruction: str = _get_env(
         "RETRIEVAL_INSTRUCTION",

rag_service.py CHANGED Viewed

@@ -5,6 +5,7 @@ import time
 import faiss
 import numpy as np
 import torch
 from InstructorEmbedding import INSTRUCTOR
 from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -74,6 +75,43 @@ def find_data_file(filename: str) -> Path:
     raise FileNotFoundError(f"Could not find {filename} in expected locations")
 class AppState:
     def __init__(self):
         self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -194,14 +232,14 @@ def preload() -> dict:
     print("Loading vector DB...")
     t_index = time.perf_counter()
-    index_path = find_data_file(settings.vector_db_file)
     state.index = faiss.read_index(str(index_path))
     index_time = time.perf_counter() - t_index
     print(f"Index loaded : {state.index.ntotal} vectors")
     print("Loading chunks...")
     t_chunks = time.perf_counter()
-    chunks_path = find_data_file(settings.chunks_file)
     state.chunks = _load_chunks(chunks_path)
     chunks_time = time.perf_counter() - t_chunks
     print(f"Chunks loaded : {len(state.chunks)}")

 import faiss
 import numpy as np
 import torch
+from huggingface_hub import hf_hub_download
 from InstructorEmbedding import INSTRUCTOR
 from transformers import AutoModelForCausalLM, AutoTokenizer
     raise FileNotFoundError(f"Could not find {filename} in expected locations")
+def resolve_data_file(filename: str) -> Path:
+    try:
+        return find_data_file(filename)
+    except FileNotFoundError:
+        if not settings.allow_hf_assets_download:
+            raise
+    if not settings.hf_assets_repo_id:
+        raise FileNotFoundError(
+            f"Could not find {filename} locally and HF_ASSETS_REPO_ID is not configured"
+        )
+    subdir = settings.hf_assets_subdir.strip("/")
+    preferred_filename = f"{subdir}/{filename}" if subdir else filename
+    fallback_filename = filename
+    attempts = [preferred_filename]
+    if fallback_filename != preferred_filename:
+        attempts.append(fallback_filename)
+    last_error = None
+    for candidate in attempts:
+        try:
+            downloaded = hf_hub_download(
+                repo_id=settings.hf_assets_repo_id,
+                filename=candidate,
+                repo_type="model",
+            )
+            print(f"Downloaded {candidate} from {settings.hf_assets_repo_id}")
+            return Path(downloaded)
+        except Exception as exc:
+            last_error = exc
+    raise FileNotFoundError(
+        f"Could not find {filename} locally or in Hugging Face repo {settings.hf_assets_repo_id}"
+    ) from last_error
 class AppState:
     def __init__(self):
         self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
     print("Loading vector DB...")
     t_index = time.perf_counter()
+    index_path = resolve_data_file(settings.vector_db_file)
     state.index = faiss.read_index(str(index_path))
     index_time = time.perf_counter() - t_index
     print(f"Index loaded : {state.index.ntotal} vectors")
     print("Loading chunks...")
     t_chunks = time.perf_counter()
+    chunks_path = resolve_data_file(settings.chunks_file)
     state.chunks = _load_chunks(chunks_path)
     chunks_time = time.perf_counter() - t_chunks
     print(f"Chunks loaded : {len(state.chunks)}")