Subha95 commited on
Commit
98aeb15
·
verified ·
1 Parent(s): 7c4e1db

Update chatbot_rag.py

Browse files
Files changed (1) hide show
  1. chatbot_rag.py +15 -13
chatbot_rag.py CHANGED
@@ -39,28 +39,30 @@ def build_qa():
39
 
40
  # 3. Load LLM (Phi-3 mini)
41
  print("🔹 Loading LLM...")
42
- model_id = "meta-llama/Llama-3.2-1B-Instruct" # or "meta-llama/Llama-3.1-1B-Instruct"
 
43
 
 
44
  tokenizer = AutoTokenizer.from_pretrained(model_id)
45
  model = AutoModelForCausalLM.from_pretrained(
46
  model_id,
47
  device_map="auto",
48
- trust_remote_code=True # ensures it runs on available CPU
49
  )
50
 
 
51
  pipe = pipeline(
52
- "text-generation",
53
- model=model,
54
- tokenizer=tokenizer,
55
- max_new_tokens=256,
56
- temperature=0.2, # keeps answers deterministic but less rigid than 0
57
- do_sample=False, # allow some randomness
58
- top_p=0.9, # nucleus sampling to avoid loops
59
- repetition_penalty=1.2, # 🚀 penalize repeats
60
- eos_token_id=tokenizer.eos_token_id, # stop at EOS
61
- return_full_text=False
62
  )
63
-
64
 
65
  llm = HuggingFacePipeline(pipeline=pipe)
66
 
 
39
 
40
  # 3. Load LLM (Phi-3 mini)
41
  print("🔹 Loading LLM...")
42
+ #model_id = "meta-llama/Llama-3.2-1B-Instruct" # or "meta-llama/Llama-3.1-1B-Instruct"
43
+ model_id = "PleIAs/Pleias-RAG-1B"
44
 
45
+ # Load tokenizer & model
46
  tokenizer = AutoTokenizer.from_pretrained(model_id)
47
  model = AutoModelForCausalLM.from_pretrained(
48
  model_id,
49
  device_map="auto",
50
+ trust_remote_code=True # important for some HF models
51
  )
52
 
53
+ # Build pipeline
54
  pipe = pipeline(
55
+ "text-generation",
56
+ model=model,
57
+ tokenizer=tokenizer,
58
+ max_new_tokens=256,
59
+ temperature=0.2, # more deterministic
60
+ do_sample=False, # no random sampling
61
+ top_p=0.9, # nucleus sampling (safety if do_sample=True)
62
+ repetition_penalty=1.2, # avoid loops
63
+ eos_token_id=tokenizer.eos_token_id,
64
+ return_full_text=False
65
  )
 
66
 
67
  llm = HuggingFacePipeline(pipeline=pipe)
68