Spaces:
Sleeping
Sleeping
Hemanth-05 commited on
Commit ·
9eee9c8
1
Parent(s): 79eb9e2
fix(rag): switch HF generation from text_generation to chat_completion for Mistral
Browse files- services/rag_engine.py +14 -6
services/rag_engine.py
CHANGED
|
@@ -176,15 +176,23 @@ def _generate_answer(question: str, context_chunks: list[dict]) -> str:
|
|
| 176 |
client = InferenceClient(token=token, timeout=TIMEOUT_SEC)
|
| 177 |
prompt = _build_prompt(question, context_chunks)
|
| 178 |
|
| 179 |
-
|
| 180 |
-
prompt=prompt,
|
| 181 |
model=GEN_MODEL,
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
temperature=TEMPERATURE,
|
| 184 |
-
do_sample=True,
|
| 185 |
-
return_full_text=False,
|
| 186 |
)
|
| 187 |
-
|
|
|
|
| 188 |
|
| 189 |
|
| 190 |
def rag_answer(question: str, notebook_id: str) -> dict:
|
|
|
|
| 176 |
client = InferenceClient(token=token, timeout=TIMEOUT_SEC)
|
| 177 |
prompt = _build_prompt(question, context_chunks)
|
| 178 |
|
| 179 |
+
response = client.chat_completion(
|
|
|
|
| 180 |
model=GEN_MODEL,
|
| 181 |
+
messages=[
|
| 182 |
+
{
|
| 183 |
+
"role": "system",
|
| 184 |
+
"content": (
|
| 185 |
+
"You are a grounded assistant. Use only the provided context, "
|
| 186 |
+
"and explicitly say when the answer is not present."
|
| 187 |
+
),
|
| 188 |
+
},
|
| 189 |
+
{"role": "user", "content": prompt},
|
| 190 |
+
],
|
| 191 |
+
max_tokens=MAX_NEW_TOKENS,
|
| 192 |
temperature=TEMPERATURE,
|
|
|
|
|
|
|
| 193 |
)
|
| 194 |
+
content = response.choices[0].message.content if response and response.choices else ""
|
| 195 |
+
return (content or "").strip()
|
| 196 |
|
| 197 |
|
| 198 |
def rag_answer(question: str, notebook_id: str) -> dict:
|