Subha95 commited on
Commit
3f3e3b8
·
verified ·
1 Parent(s): c23257f

Update chatbot_rag.py

Browse files
Files changed (1) hide show
  1. chatbot_rag.py +15 -9
chatbot_rag.py CHANGED
@@ -39,30 +39,36 @@ def build_qa():
39
  print("📂 Docs in DB:", vectorstore._collection.count())
40
 
41
  # 3. Load LLM (Phi-3 mini)
42
- print("🔹 Loading LLM...")
43
 
44
- model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
45
 
 
46
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
 
47
  model = AutoModelForCausalLM.from_pretrained(
48
  model_id,
49
- device_map="auto",
50
- torch_dtype="auto",
51
- trust_remote_code=True
52
  )
53
 
 
54
  pipe = pipeline(
55
  "text-generation",
56
  model=model,
57
  tokenizer=tokenizer,
58
- max_new_tokens=256,
59
- temperature=0.2, # deterministic
60
- do_sample=False, # no randomness
61
- repetition_penalty=1.1,
 
62
  eos_token_id=tokenizer.eos_token_id,
63
  return_full_text=False
64
  )
65
 
 
66
  llm = HuggingFacePipeline(pipeline=pipe)
67
 
68
 
 
39
  print("📂 Docs in DB:", vectorstore._collection.count())
40
 
41
  # 3. Load LLM (Phi-3 mini)
42
+ print("🔹 Loading LLM...")
43
 
44
+ model_id = "microsoft/phi-1_5"
45
 
46
+ # Load tokenizer
47
  tokenizer = AutoTokenizer.from_pretrained(model_id)
48
+
49
+ # Load model
50
  model = AutoModelForCausalLM.from_pretrained(
51
  model_id,
52
+ device_map="auto", # put on GPU if available, else CPU
53
+ torch_dtype="auto", # auto precision
54
+ trust_remote_code=True # allow custom model code
55
  )
56
 
57
+ # Create pipeline
58
  pipe = pipeline(
59
  "text-generation",
60
  model=model,
61
  tokenizer=tokenizer,
62
+ max_new_tokens=256, # control length of response
63
+ temperature=0.2, # more deterministic
64
+ do_sample=False, # no randomness (deterministic answers)
65
+ top_p=0.9, # nucleus sampling
66
+ repetition_penalty=1.2, # 🚀 reduce loops/repeats
67
  eos_token_id=tokenizer.eos_token_id,
68
  return_full_text=False
69
  )
70
 
71
+ # Wrap into LangChain LLM
72
  llm = HuggingFacePipeline(pipeline=pipe)
73
 
74