Zubaish commited on
Commit
4ed3f0a
·
1 Parent(s): 11f1809
Files changed (3) hide show
  1. config.py +18 -22
  2. download_models.py +1 -1
  3. rag.py +3 -2
config.py CHANGED
@@ -1,40 +1,36 @@
1
  # config.py
2
  # Central configuration for HubRAG (HF Space safe)
3
-
4
  import os
5
 
6
  # -----------------------------
7
- # Hugging Face Dataset
8
  # -----------------------------
9
- # MUST exactly match the dataset URL:
10
- # https://huggingface.co/datasets/Zubaish/hubrag-kb
11
- HF_DATASET_REPO = "Zubaish/hubrag-kb"
12
 
13
- # Optional HF token (needed only if dataset is private)
 
14
  HF_TOKEN = os.getenv("HF_TOKEN")
15
 
16
- # -----------------------------
17
- # Embeddings
18
- # -----------------------------
19
- # Small, fast, CPU-safe
20
- EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
21
 
22
- # -----------------------------
23
- # Vector store
24
- # -----------------------------
25
- # Stored locally inside the Space container
26
- CHROMA_DIR = "./chroma_db"
27
 
28
  # -----------------------------
29
- # LLM (CPU SAFE)
30
  # -----------------------------
31
- # Do NOT use Phi, Mistral, LLaMA on HF free CPU
 
32
  LLM_MODEL = "google/flan-t5-small"
33
 
 
 
 
 
34
  # -----------------------------
35
  # Text splitting
36
  # -----------------------------
37
- CHUNK_SIZE = 500
38
- CHUNK_OVERLAP = 50
39
-
40
- KB_DIR = "./kb"
 
1
  # config.py
2
  # Central configuration for HubRAG (HF Space safe)
 
3
  import os
4
 
5
  # -----------------------------
6
+ # Path Configuration
7
  # -----------------------------
8
+ # Using absolute paths ensures the app finds the DB built in Dockerfile
9
+ BASE_DIR = "/app"
 
10
 
11
+ # Hugging Face Dataset
12
+ HF_DATASET_REPO = "Zubaish/hubrag-kb"
13
  HF_TOKEN = os.getenv("HF_TOKEN")
14
 
15
+ # Vector Store Path
16
+ CHROMA_DIR = os.path.join(BASE_DIR, "chroma_db")
 
 
 
17
 
18
+ # Knowledge Base (Temp PDF storage)
19
+ KB_DIR = os.path.join(BASE_DIR, "kb")
 
 
 
20
 
21
  # -----------------------------
22
+ # Model Configuration
23
  # -----------------------------
24
+ # Small, fast, CPU-safe for free-tier Spaces
25
+ EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
26
  LLM_MODEL = "google/flan-t5-small"
27
 
28
+ # LLM Task type: 'text-generation' is more universally supported
29
+ # than 'text2text-generation' in some transformers versions.
30
+ LLM_TASK = "text-generation"
31
+
32
  # -----------------------------
33
  # Text splitting
34
  # -----------------------------
35
+ CHUNK_SIZE = 1000
36
+ CHUNK_OVERLAP = 100
 
 
download_models.py CHANGED
@@ -7,5 +7,5 @@ print("⏳ Pre-downloading models...")
7
  # Download Embedding Model
8
  HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
9
  # Download LLM
10
- pipeline("text-generation", model=LLM_MODEL)
11
  print("✅ Models downloaded successfully")
 
7
  # Download Embedding Model
8
  HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
9
  # Download LLM
10
+ pipeline("text-generation", model=LLM_MODEL, trust_remote_code=True)
11
  print("✅ Models downloaded successfully")
rag.py CHANGED
@@ -21,9 +21,10 @@ else:
21
 
22
  # 3. LLM Pipeline
23
  qa_pipeline = pipeline(
24
- task="text2text-generation", # Fixed task type for T5 models
25
  model=LLM_MODEL,
26
- max_new_tokens=256
 
27
  )
28
 
29
  def ask_rag_with_status(question: str):
 
21
 
22
  # 3. LLM Pipeline
23
  qa_pipeline = pipeline(
24
+ task="text-generation", # Changed back from text2text-generation
25
  model=LLM_MODEL,
26
+ max_new_tokens=256,
27
+ trust_remote_code=True # Added for better compatibility
28
  )
29
 
30
  def ask_rag_with_status(question: str):