Subha95 commited on
Commit
c23257f
·
verified ·
1 Parent(s): 0a510f7

Update chatbot_rag.py

Browse files
Files changed (1) hide show
  1. chatbot_rag.py +23 -20
chatbot_rag.py CHANGED
@@ -39,27 +39,30 @@ def build_qa():
39
  print("📂 Docs in DB:", vectorstore._collection.count())
40
 
41
  # 3. Load LLM (Phi-3 mini)
42
- print("🔹 Loading LLM...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- model_id = "meta-llama/Llama-3.2-1B-Instruct"
45
- tokenizer = AutoTokenizer.from_pretrained(model_id)
46
- model = AutoModelForCausalLM.from_pretrained(
47
- model_id,
48
- device_map="auto",
49
- trust_remote_code=True # ensures it runs on available CPU
50
- )
51
- pipe = pipeline(
52
- "text-generation",
53
- model=model,
54
- tokenizer=tokenizer,
55
- max_new_tokens=256,
56
- temperature=0.2, # keeps answers deterministic but less rigid than 0
57
- do_sample=True, # allow some randomness
58
- top_p=0.9, # nucleus sampling to avoid loops
59
- repetition_penalty=1.2, # 🚀 penalize repeats
60
- eos_token_id=tokenizer.eos_token_id, # stop at EOS
61
- return_full_text=False
62
- )
63
  llm = HuggingFacePipeline(pipeline=pipe)
64
 
65
 
 
39
  print("📂 Docs in DB:", vectorstore._collection.count())
40
 
41
  # 3. Load LLM (Phi-3 mini)
42
+ print("🔹 Loading LLM...")
43
+
44
+ model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
45
+
46
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
47
+ model = AutoModelForCausalLM.from_pretrained(
48
+ model_id,
49
+ device_map="auto",
50
+ torch_dtype="auto",
51
+ trust_remote_code=True
52
+ )
53
+
54
+ pipe = pipeline(
55
+ "text-generation",
56
+ model=model,
57
+ tokenizer=tokenizer,
58
+ max_new_tokens=256,
59
+ temperature=0.2, # deterministic
60
+ do_sample=False, # no randomness
61
+ repetition_penalty=1.1,
62
+ eos_token_id=tokenizer.eos_token_id,
63
+ return_full_text=False
64
+ )
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  llm = HuggingFacePipeline(pipeline=pipe)
67
 
68