Hemanth-05 commited on
Commit
9eee9c8
·
1 Parent(s): 79eb9e2

fix(rag): switch HF generation from text_generation to chat_completion for Mistral

Browse files
Files changed (1) hide show
  1. services/rag_engine.py +14 -6
services/rag_engine.py CHANGED
@@ -176,15 +176,23 @@ def _generate_answer(question: str, context_chunks: list[dict]) -> str:
176
  client = InferenceClient(token=token, timeout=TIMEOUT_SEC)
177
  prompt = _build_prompt(question, context_chunks)
178
 
179
- output = client.text_generation(
180
- prompt=prompt,
181
  model=GEN_MODEL,
182
- max_new_tokens=MAX_NEW_TOKENS,
 
 
 
 
 
 
 
 
 
 
183
  temperature=TEMPERATURE,
184
- do_sample=True,
185
- return_full_text=False,
186
  )
187
- return (output or "").strip()
 
188
 
189
 
190
  def rag_answer(question: str, notebook_id: str) -> dict:
 
176
  client = InferenceClient(token=token, timeout=TIMEOUT_SEC)
177
  prompt = _build_prompt(question, context_chunks)
178
 
179
+ response = client.chat_completion(
 
180
  model=GEN_MODEL,
181
+ messages=[
182
+ {
183
+ "role": "system",
184
+ "content": (
185
+ "You are a grounded assistant. Use only the provided context, "
186
+ "and explicitly say when the answer is not present."
187
+ ),
188
+ },
189
+ {"role": "user", "content": prompt},
190
+ ],
191
+ max_tokens=MAX_NEW_TOKENS,
192
  temperature=TEMPERATURE,
 
 
193
  )
194
+ content = response.choices[0].message.content if response and response.choices else ""
195
+ return (content or "").strip()
196
 
197
 
198
  def rag_answer(question: str, notebook_id: str) -> dict: