Subha95 commited on
Commit
e5b3fe2
·
verified ·
1 Parent(s): 3464f1c

Update chatbot_rag.py

Browse files
Files changed (1) hide show
  1. chatbot_rag.py +7 -2
chatbot_rag.py CHANGED
@@ -30,8 +30,12 @@ def build_qa():
30
  # 3. Load LLM (Phi-3 mini)
31
  print("🔹 Loading LLM...")
32
  model_id = "meta-llama/Llama-3.2-1B-Instruct" # or "meta-llama/Llama-3.1-1B-Instruct"
 
33
  tokenizer = AutoTokenizer.from_pretrained(model_id)
34
- model = AutoModelForCausalLM.from_pretrained(model_id)
 
 
 
35
 
36
  pipe = pipeline(
37
  "text-generation",
@@ -39,7 +43,8 @@ def build_qa():
39
  tokenizer=tokenizer,
40
  max_new_tokens=256,
41
  temperature=0.7,
42
- do_sample=True
 
43
  )
44
 
45
  llm = HuggingFacePipeline(pipeline=pipe)
 
30
  # 3. Load LLM (Phi-3 mini)
31
  print("🔹 Loading LLM...")
32
  model_id = "meta-llama/Llama-3.2-1B-Instruct" # or "meta-llama/Llama-3.1-1B-Instruct"
33
+
34
  tokenizer = AutoTokenizer.from_pretrained(model_id)
35
+ model = AutoModelForCausalLM.from_pretrained(
36
+ model_id,
37
+ device_map="auto", # ensures it runs on available CPU
38
+ )
39
 
40
  pipe = pipeline(
41
  "text-generation",
 
43
  tokenizer=tokenizer,
44
  max_new_tokens=256,
45
  temperature=0.7,
46
+ do_sample=True,
47
+ return_full_text=False # 🚀 only return new text, avoids messy context echoes
48
  )
49
 
50
  llm = HuggingFacePipeline(pipeline=pipe)