Spaces:

Subha95
/

Harry_potter_wiki

Sleeping

Subha95 commited on Sep 5, 2025

Commit

3f3e3b8

verified ·

1 Parent(s): c23257f

Update chatbot_rag.py

Files changed (1) hide show

chatbot_rag.py CHANGED Viewed

@@ -39,30 +39,36 @@ def build_qa():
     print("📂 Docs in DB:", vectorstore._collection.count())
     # 3. Load LLM (Phi-3 mini)
-    print("🔹 Loading LLM...")
-    model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        device_map="auto",
-        torch_dtype="auto",
-        trust_remote_code=True
     )
     pipe = pipeline(
         "text-generation",
         model=model,
         tokenizer=tokenizer,
-        max_new_tokens=256,
-        temperature=0.2,       # deterministic
-        do_sample=False,       # no randomness
-        repetition_penalty=1.1,
         eos_token_id=tokenizer.eos_token_id,
         return_full_text=False
     )
     llm = HuggingFacePipeline(pipeline=pipe)

     print("📂 Docs in DB:", vectorstore._collection.count())
     # 3. Load LLM (Phi-3 mini)
+    print("🔹 Loading LLM...")
+    model_id = "microsoft/phi-1_5"
+    # Load tokenizer
     tokenizer = AutoTokenizer.from_pretrained(model_id)
+    # Load model
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        device_map="auto",       # put on GPU if available, else CPU
+        torch_dtype="auto",      # auto precision
+        trust_remote_code=True   # allow custom model code
     )
+    # Create pipeline
     pipe = pipeline(
         "text-generation",
         model=model,
         tokenizer=tokenizer,
+        max_new_tokens=256,       # control length of response
+        temperature=0.2,          # more deterministic
+        do_sample=False,          # no randomness (deterministic answers)
+        top_p=0.9,                # nucleus sampling
+        repetition_penalty=1.2,   # 🚀 reduce loops/repeats
         eos_token_id=tokenizer.eos_token_id,
         return_full_text=False
     )
+    # Wrap into LangChain LLM
     llm = HuggingFacePipeline(pipeline=pipe)