Spaces:

Subha95
/

Harry_potter_wiki

Sleeping

Subha95 commited on Sep 4, 2025

Commit

98aeb15

verified ·

1 Parent(s): 7c4e1db

Update chatbot_rag.py

Files changed (1) hide show

chatbot_rag.py CHANGED Viewed

@@ -39,28 +39,30 @@ def build_qa():
     # 3. Load LLM (Phi-3 mini)
     print("🔹 Loading LLM...")
-    model_id = "meta-llama/Llama-3.2-1B-Instruct"   # or "meta-llama/Llama-3.1-1B-Instruct"
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         device_map="auto",
-        trust_remote_code=True # ensures it runs on available CPU
     )
     pipe = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=256,
-    temperature=0.2,      # keeps answers deterministic but less rigid than 0
-    do_sample=False,       # allow some randomness
-    top_p=0.9,            # nucleus sampling to avoid loops
-    repetition_penalty=1.2,  # 🚀 penalize repeats
-    eos_token_id=tokenizer.eos_token_id,  # stop at EOS
-    return_full_text=False
     )
     llm = HuggingFacePipeline(pipeline=pipe)

     # 3. Load LLM (Phi-3 mini)
     print("🔹 Loading LLM...")
+    #model_id = "meta-llama/Llama-3.2-1B-Instruct"   # or "meta-llama/Llama-3.1-1B-Instruct"
+    model_id = "PleIAs/Pleias-RAG-1B"
+    # Load tokenizer & model
     tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
         device_map="auto",
+        trust_remote_code=True   # important for some HF models
     )
+    # Build pipeline
     pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=256,
+        temperature=0.2,          # more deterministic
+        do_sample=False,          # no random sampling
+        top_p=0.9,                # nucleus sampling (safety if do_sample=True)
+        repetition_penalty=1.2,   # avoid loops
+        eos_token_id=tokenizer.eos_token_id,
+        return_full_text=False
     )
     llm = HuggingFacePipeline(pipeline=pipe)