Spaces:

Subha95
/

Harry_potter_wiki

Sleeping

Subha95 commited on Sep 4, 2025

Commit

e5b3fe2

verified ·

1 Parent(s): 3464f1c

Update chatbot_rag.py

Files changed (1) hide show

chatbot_rag.py CHANGED Viewed

@@ -30,8 +30,12 @@ def build_qa():
     # 3. Load LLM (Phi-3 mini)
     print("🔹 Loading LLM...")
     model_id = "meta-llama/Llama-3.2-1B-Instruct"   # or "meta-llama/Llama-3.1-1B-Instruct"
     tokenizer = AutoTokenizer.from_pretrained(model_id)
-    model = AutoModelForCausalLM.from_pretrained(model_id)
     pipe = pipeline(
         "text-generation",
@@ -39,7 +43,8 @@ def build_qa():
         tokenizer=tokenizer,
         max_new_tokens=256,
         temperature=0.7,
-        do_sample=True
     )
     llm = HuggingFacePipeline(pipeline=pipe)

     # 3. Load LLM (Phi-3 mini)
     print("🔹 Loading LLM...")
     model_id = "meta-llama/Llama-3.2-1B-Instruct"   # or "meta-llama/Llama-3.1-1B-Instruct"
     tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        device_map="auto",     # ensures it runs on available CPU
+    )
     pipe = pipeline(
         "text-generation",
         tokenizer=tokenizer,
         max_new_tokens=256,
         temperature=0.7,
+        do_sample=True,
+        return_full_text=False   # 🚀 only return new text, avoids messy context echoes
     )
     llm = HuggingFacePipeline(pipeline=pipe)