Subha95 commited on
Commit
52fa7cc
·
verified ·
1 Parent(s): b6660a5

Update chatbot_rag.py

Browse files
Files changed (1) hide show
  1. chatbot_rag.py +20 -20
chatbot_rag.py CHANGED
@@ -38,28 +38,28 @@ def build_qa():
38
  print("📂 Docs in DB:", vectorstore._collection.count())
39
 
40
  # 3. Load LLM (Phi-3 mini)
41
- print("🔹 Loading LLM...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- model_id = "meta-llama/Llama-3.2-1B"
44
- tokenizer = AutoTokenizer.from_pretrained(model_id)
45
- model = AutoModelForCausalLM.from_pretrained(
46
- model_id,
47
- device_map="auto",
48
- trust_remote_code=True # ensures it runs on available CPU
49
- )
50
- pipe = pipeline(
51
- "text-generation",
52
- model=model,
53
- tokenizer=tokenizer,
54
- max_new_tokens=256,
55
- temperature=0.2, # keeps answers deterministic but less rigid than 0
56
- do_sample=True, # allow some randomness
57
- top_p=0.9, # nucleus sampling to avoid loops
58
- repetition_penalty=1.2, # 🚀 penalize repeats
59
- eos_token_id=tokenizer.eos_token_id, # stop at EOS
60
- return_full_text=False
61
- )
62
  llm = HuggingFacePipeline(pipeline=pipe)
 
63
  # 4. Retriever
64
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
65
 
 
38
  print("📂 Docs in DB:", vectorstore._collection.count())
39
 
40
  # 3. Load LLM (Phi-3 mini)
41
+ print("🔹 Loading LLM...")
42
+ model_id = "openai-community/gpt2-xl" # ~1.5B GPT-2
43
+
44
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
45
+ model = AutoModelForCausalLM.from_pretrained(
46
+ model_id,
47
+ device_map="auto"
48
+ )
49
+
50
+ pipe = pipeline(
51
+ "text-generation",
52
+ model=model,
53
+ tokenizer=tokenizer,
54
+ max_new_tokens=256,
55
+ temperature=0.2,
56
+ do_sample=False,
57
+ repetition_penalty=1.2,
58
+ return_full_text=False
59
+ )
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  llm = HuggingFacePipeline(pipeline=pipe)
62
+
63
  # 4. Retriever
64
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
65