Subha95 commited on
Commit
7defe17
·
verified ·
1 Parent(s): 98aeb15

Update chatbot_rag.py

Browse files
Files changed (1) hide show
  1. chatbot_rag.py +16 -26
chatbot_rag.py CHANGED
@@ -38,34 +38,24 @@ def build_qa():
38
  print("📂 Docs in DB:", vectorstore._collection.count())
39
 
40
  # 3. Load LLM (Phi-3 mini)
41
- print("🔹 Loading LLM...")
42
- #model_id = "meta-llama/Llama-3.2-1B-Instruct" # or "meta-llama/Llama-3.1-1B-Instruct"
43
- model_id = "PleIAs/Pleias-RAG-1B"
44
-
45
- # Load tokenizer & model
46
- tokenizer = AutoTokenizer.from_pretrained(model_id)
47
- model = AutoModelForCausalLM.from_pretrained(
48
- model_id,
49
- device_map="auto",
50
- trust_remote_code=True # important for some HF models
51
- )
52
-
53
- # Build pipeline
54
- pipe = pipeline(
55
- "text-generation",
56
- model=model,
57
- tokenizer=tokenizer,
58
- max_new_tokens=256,
59
- temperature=0.2, # more deterministic
60
- do_sample=False, # no random sampling
61
- top_p=0.9, # nucleus sampling (safety if do_sample=True)
62
- repetition_penalty=1.2, # avoid loops
63
- eos_token_id=tokenizer.eos_token_id,
64
- return_full_text=False
65
- )
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  llm = HuggingFacePipeline(pipeline=pipe)
68
-
69
  # 4. Retriever
70
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
71
 
 
38
  print("📂 Docs in DB:", vectorstore._collection.count())
39
 
40
  # 3. Load LLM (Phi-3 mini)
41
+ print("🔹 Loading LLM...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ model_id = "meta-llama/Llama-3.2-3B-Instruct"
44
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
45
+ model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", trust_remote_code=True # ensures it runs on available CPU )
46
+ pipe = pipeline(
47
+ "text-generation",
48
+ model=model,
49
+ tokenizer=tokenizer,
50
+ max_new_tokens=256,
51
+ temperature=0.2, # keeps answers deterministic but less rigid than 0
52
+ do_sample=True, # allow some randomness
53
+ top_p=0.9, # nucleus sampling to avoid loops
54
+ repetition_penalty=1.2, # 🚀 penalize repeats
55
+ eos_token_id=tokenizer.eos_token_id, # stop at EOS
56
+ return_full_text=False
57
+ )
58
  llm = HuggingFacePipeline(pipeline=pipe)
 
59
  # 4. Retriever
60
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
61