Subha95 commited on
Commit
86d9f08
·
verified ·
1 Parent(s): 29c649d

Update chatbot_rag.py

Browse files
Files changed (1) hide show
  1. chatbot_rag.py +29 -21
chatbot_rag.py CHANGED
@@ -40,29 +40,37 @@ def build_qa():
40
 
41
  # 3. Load LLM (Phi-3 mini)
42
  print("🔹 Loading LLM...")
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- model_id = "meta-llama/Llama-3.2-3B-Instruct"
45
- tokenizer = AutoTokenizer.from_pretrained(model_id)
46
- model = AutoModelForCausalLM.from_pretrained(
47
- model_id,
48
- device_map="auto",
49
- trust_remote_code=True # ensures it runs on available CPU
50
- )
51
- pipe = pipeline(
52
- "text-generation",
53
- model=model,
54
- tokenizer=tokenizer,
55
- max_new_tokens=256,
56
- temperature=0.2, # keeps answers deterministic but less rigid than 0
57
- do_sample=True, # allow some randomness
58
- top_p=0.9, # nucleus sampling to avoid loops
59
- repetition_penalty=1.2, # 🚀 penalize repeats
60
- eos_token_id=tokenizer.eos_token_id, # stop at EOS
61
- return_full_text=False
62
- )
63
  llm = HuggingFacePipeline(pipeline=pipe)
64
 
65
-
66
  # 4. Retriever
67
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
68
 
@@ -79,7 +87,7 @@ def build_qa():
79
  {context}
80
 
81
  Question: {question}
82
- Answer (one short sentence):
83
  """,
84
  )
85
 
 
40
 
41
  # 3. Load LLM (Phi-3 mini)
42
  print("🔹 Loading LLM...")
43
+
44
+ model_id = "microsoft/phi-2"
45
+
46
+ # Load tokenizer
47
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
48
+
49
+ # Load model
50
+ model = AutoModelForCausalLM.from_pretrained(
51
+ model_id,
52
+ device_map="auto", # put on GPU if available, else CPU
53
+ torch_dtype="auto", # auto precision
54
+ trust_remote_code=True # allow custom model code
55
+ )
56
 
57
+ # Create pipeline
58
+ pipe = pipeline(
59
+ "text-generation",
60
+ model=model,
61
+ tokenizer=tokenizer,
62
+ max_new_tokens=256, # control length of response
63
+ temperature=0.2, # more deterministic
64
+ do_sample=False, # no randomness (deterministic answers)
65
+ top_p=0.9, # nucleus sampling
66
+ repetition_penalty=1.2, # 🚀 reduce loops/repeats
67
+ eos_token_id=tokenizer.eos_token_id,
68
+ return_full_text=False
69
+ )
70
+
71
+ # Wrap into LangChain LLM
 
 
 
 
72
  llm = HuggingFacePipeline(pipeline=pipe)
73
 
 
74
  # 4. Retriever
75
  retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
76
 
 
87
  {context}
88
 
89
  Question: {question}
90
+ Answer :
91
  """,
92
  )
93