Spaces:

Group-1-5010
/

NotebookLM

Sleeping

Hemanth-05 commited on Feb 24

Commit

9eee9c8

1 Parent(s): 79eb9e2

fix(rag): switch HF generation from text_generation to chat_completion for Mistral

Files changed (1) hide show

services/rag_engine.py CHANGED Viewed

@@ -176,15 +176,23 @@ def _generate_answer(question: str, context_chunks: list[dict]) -> str:
     client = InferenceClient(token=token, timeout=TIMEOUT_SEC)
     prompt = _build_prompt(question, context_chunks)
-    output = client.text_generation(
-        prompt=prompt,
         model=GEN_MODEL,
-        max_new_tokens=MAX_NEW_TOKENS,
         temperature=TEMPERATURE,
-        do_sample=True,
-        return_full_text=False,
     )
-    return (output or "").strip()
 def rag_answer(question: str, notebook_id: str) -> dict:

     client = InferenceClient(token=token, timeout=TIMEOUT_SEC)
     prompt = _build_prompt(question, context_chunks)
+    response = client.chat_completion(
         model=GEN_MODEL,
+        messages=[
+            {
+                "role": "system",
+                "content": (
+                    "You are a grounded assistant. Use only the provided context, "
+                    "and explicitly say when the answer is not present."
+                ),
+            },
+            {"role": "user", "content": prompt},
+        ],
+        max_tokens=MAX_NEW_TOKENS,
         temperature=TEMPERATURE,
     )
+    content = response.choices[0].message.content if response and response.choices else ""
+    return (content or "").strip()
 def rag_answer(question: str, notebook_id: str) -> dict: