AsyncRAG

Sleeping

Zubaish commited on 16 days ago

Commit

4ed3f0a

1 Parent(s): 11f1809

update

Files changed (3) hide show

config.py CHANGED Viewed

@@ -1,40 +1,36 @@
 # config.py
 # Central configuration for HubRAG (HF Space safe)
 import os
 # -----------------------------
-# Hugging Face Dataset
 # -----------------------------
-# MUST exactly match the dataset URL:
-# https://huggingface.co/datasets/Zubaish/hubrag-kb
-HF_DATASET_REPO = "Zubaish/hubrag-kb"
-# Optional HF token (needed only if dataset is private)
 HF_TOKEN = os.getenv("HF_TOKEN")
-# -----------------------------
-# Embeddings
-# -----------------------------
-# Small, fast, CPU-safe
-EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-# -----------------------------
-# Vector store
-# -----------------------------
-# Stored locally inside the Space container
-CHROMA_DIR = "./chroma_db"
 # -----------------------------
-# LLM (CPU SAFE)
 # -----------------------------
-# Do NOT use Phi, Mistral, LLaMA on HF free CPU
 LLM_MODEL = "google/flan-t5-small"
 # -----------------------------
 # Text splitting
 # -----------------------------
-CHUNK_SIZE = 500
-CHUNK_OVERLAP = 50
-KB_DIR = "./kb"

 # config.py
 # Central configuration for HubRAG (HF Space safe)
 import os
 # -----------------------------
+# Path Configuration
 # -----------------------------
+# Using absolute paths ensures the app finds the DB built in Dockerfile
+BASE_DIR = "/app"
+# Hugging Face Dataset
+HF_DATASET_REPO = "Zubaish/hubrag-kb"
 HF_TOKEN = os.getenv("HF_TOKEN")
+# Vector Store Path
+CHROMA_DIR = os.path.join(BASE_DIR, "chroma_db")
+# Knowledge Base (Temp PDF storage)
+KB_DIR = os.path.join(BASE_DIR, "kb")
 # -----------------------------
+# Model Configuration
 # -----------------------------
+# Small, fast, CPU-safe for free-tier Spaces
+EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 LLM_MODEL = "google/flan-t5-small"
+# LLM Task type: 'text-generation' is more universally supported
+# than 'text2text-generation' in some transformers versions.
+LLM_TASK = "text-generation"
 # -----------------------------
 # Text splitting
 # -----------------------------
+CHUNK_SIZE = 1000
+CHUNK_OVERLAP = 100

download_models.py CHANGED Viewed

@@ -7,5 +7,5 @@ print("⏳ Pre-downloading models...")
 # Download Embedding Model
 HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
 # Download LLM
-pipeline("text-generation", model=LLM_MODEL)
 print("✅ Models downloaded successfully")

 # Download Embedding Model
 HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
 # Download LLM
+pipeline("text-generation", model=LLM_MODEL, trust_remote_code=True)
 print("✅ Models downloaded successfully")

rag.py CHANGED Viewed

@@ -21,9 +21,10 @@ else:
 # 3. LLM Pipeline
 qa_pipeline = pipeline(
-    task="text2text-generation", # Fixed task type for T5 models
     model=LLM_MODEL,
-    max_new_tokens=256
 )
 def ask_rag_with_status(question: str):

 # 3. LLM Pipeline
 qa_pipeline = pipeline(
+    task="text-generation", # Changed back from text2text-generation
     model=LLM_MODEL,
+    max_new_tokens=256,
+    trust_remote_code=True # Added for better compatibility
 )
 def ask_rag_with_status(question: str):