Commit
·
cb7f3d3
1
Parent(s):
4a04968
Strengthen prompts with examples for tool calls and JSON format
Browse files- Add detailed examples for tool call format
- Add multiple examples for JSON format
- Explicitly forbid reasoning tags in both cases
- Improve instructions clarity
app/providers/transformers_provider.py
CHANGED
|
@@ -270,13 +270,18 @@ class TransformersProvider:
|
|
| 270 |
# ✅ Add JSON output requirement to system prompt if response_format requires it
|
| 271 |
if json_output_required:
|
| 272 |
json_instruction = (
|
| 273 |
-
"\n\nCRITICAL:
|
| 274 |
-
"NO
|
| 275 |
"Start your response directly with { and end with }. "
|
| 276 |
-
"
|
| 277 |
-
"
|
| 278 |
-
"
|
| 279 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
)
|
| 281 |
system_messages = [msg for msg in messages if msg.get("role") == "system"]
|
| 282 |
if system_messages:
|
|
@@ -319,19 +324,40 @@ class TransformersProvider:
|
|
| 319 |
) -> Dict[str, Any]:
|
| 320 |
"""Generate non-streaming response."""
|
| 321 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
with torch.no_grad():
|
| 323 |
outputs = model.generate(
|
| 324 |
**inputs,
|
| 325 |
-
|
| 326 |
-
temperature=temperature,
|
| 327 |
-
top_p=top_p,
|
| 328 |
-
top_k=DEFAULT_TOP_K,
|
| 329 |
-
do_sample=temperature > 0,
|
| 330 |
-
pad_token_id=PAD_TOKEN_ID,
|
| 331 |
-
eos_token_id=EOS_TOKENS,
|
| 332 |
-
repetition_penalty=REPETITION_PENALTY,
|
| 333 |
-
early_stopping=False,
|
| 334 |
-
use_cache=True,
|
| 335 |
)
|
| 336 |
|
| 337 |
# Extract token counts using tokenizer for accuracy
|
|
@@ -513,24 +539,41 @@ class TransformersProvider:
|
|
| 513 |
|
| 514 |
def _format_tools_for_prompt(self, tools: List[Dict[str, Any]]) -> str:
|
| 515 |
"""Format tools for inclusion in system prompt."""
|
| 516 |
-
tools_text =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 517 |
for i, tool in enumerate(tools, 1):
|
| 518 |
func = tool.get("function", {})
|
| 519 |
name = func.get("name", "")
|
| 520 |
description = func.get("description", "")
|
| 521 |
parameters = func.get("parameters", {})
|
| 522 |
|
| 523 |
-
tools_text += f"
|
| 524 |
if description:
|
| 525 |
tools_text += f"Description: {description}\n"
|
| 526 |
if parameters:
|
| 527 |
-
tools_text += f"
|
| 528 |
tools_text += "\n"
|
| 529 |
|
| 530 |
-
tools_text +=
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 534 |
|
| 535 |
return tools_text
|
| 536 |
|
|
|
|
| 270 |
# ✅ Add JSON output requirement to system prompt if response_format requires it
|
| 271 |
if json_output_required:
|
| 272 |
json_instruction = (
|
| 273 |
+
"\n\nCRITICAL: response_format is set to json_object. You MUST respond with ONLY valid JSON. "
|
| 274 |
+
"NO <think> tags, NO reasoning, NO explanations, NO text before or after the JSON. "
|
| 275 |
"Start your response directly with { and end with }. "
|
| 276 |
+
"\n\nEXAMPLES:\n"
|
| 277 |
+
"If asked for a random number 1-10:\n"
|
| 278 |
+
"CORRECT: {\"nombre\": 7}\n"
|
| 279 |
+
"WRONG: <think>I need to generate...</think>{\"nombre\": 7}\n"
|
| 280 |
+
"WRONG: Here is the JSON: {\"nombre\": 7}\n"
|
| 281 |
+
"\nIf asked for portfolio data:\n"
|
| 282 |
+
"CORRECT: {\"positions\": [{\"symbole\": \"AIR.PA\", \"quantite\": 50}]}\n"
|
| 283 |
+
"WRONG: <think>Let me extract...</think>{\"positions\": [...]}\n"
|
| 284 |
+
"\nREMEMBER: Your response must be ONLY the JSON object, nothing else. Do not use <think> tags."
|
| 285 |
)
|
| 286 |
system_messages = [msg for msg in messages if msg.get("role") == "system"]
|
| 287 |
if system_messages:
|
|
|
|
| 324 |
) -> Dict[str, Any]:
|
| 325 |
"""Generate non-streaming response."""
|
| 326 |
try:
|
| 327 |
+
# Prepare generation kwargs
|
| 328 |
+
generation_kwargs = {
|
| 329 |
+
"max_new_tokens": max_tokens,
|
| 330 |
+
"temperature": temperature,
|
| 331 |
+
"top_p": top_p,
|
| 332 |
+
"top_k": DEFAULT_TOP_K,
|
| 333 |
+
"do_sample": temperature > 0,
|
| 334 |
+
"pad_token_id": PAD_TOKEN_ID,
|
| 335 |
+
"eos_token_id": EOS_TOKENS,
|
| 336 |
+
"repetition_penalty": REPETITION_PENALTY,
|
| 337 |
+
"early_stopping": False,
|
| 338 |
+
"use_cache": True,
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
# If JSON output is required, try to prevent reasoning tags by adding stop sequences
|
| 342 |
+
# Note: Qwen reasoning models may still generate reasoning, but we'll extract JSON after
|
| 343 |
+
if json_output_required or tools:
|
| 344 |
+
# Try to add stop sequences to prevent reasoning tags
|
| 345 |
+
# Convert stop strings to token IDs if possible
|
| 346 |
+
try:
|
| 347 |
+
# Try to encode reasoning tag opening as stop sequence
|
| 348 |
+
reasoning_token = tokenizer.encode("<think>", add_special_tokens=False)
|
| 349 |
+
if reasoning_token:
|
| 350 |
+
# Add as stop sequence (if model supports it)
|
| 351 |
+
# Note: Not all models support stop_sequences parameter directly
|
| 352 |
+
# We'll handle this in post-processing instead
|
| 353 |
+
pass
|
| 354 |
+
except:
|
| 355 |
+
pass
|
| 356 |
+
|
| 357 |
with torch.no_grad():
|
| 358 |
outputs = model.generate(
|
| 359 |
**inputs,
|
| 360 |
+
**generation_kwargs,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
)
|
| 362 |
|
| 363 |
# Extract token counts using tokenizer for accuracy
|
|
|
|
| 539 |
|
| 540 |
def _format_tools_for_prompt(self, tools: List[Dict[str, Any]]) -> str:
|
| 541 |
"""Format tools for inclusion in system prompt."""
|
| 542 |
+
tools_text = (
|
| 543 |
+
"CRITICAL: You have access to the following tools. When you need to use a tool, "
|
| 544 |
+
"you MUST respond ONLY with the tool call format below. NO reasoning tags, NO explanations, "
|
| 545 |
+
"ONLY the tool call format.\n\n"
|
| 546 |
+
)
|
| 547 |
+
|
| 548 |
for i, tool in enumerate(tools, 1):
|
| 549 |
func = tool.get("function", {})
|
| 550 |
name = func.get("name", "")
|
| 551 |
description = func.get("description", "")
|
| 552 |
parameters = func.get("parameters", {})
|
| 553 |
|
| 554 |
+
tools_text += f"Tool {i}: {name}\n"
|
| 555 |
if description:
|
| 556 |
tools_text += f"Description: {description}\n"
|
| 557 |
if parameters:
|
| 558 |
+
tools_text += f"Parameters: {json.dumps(parameters, ensure_ascii=False, indent=2)}\n"
|
| 559 |
tools_text += "\n"
|
| 560 |
|
| 561 |
+
tools_text += (
|
| 562 |
+
"TO USE A TOOL, respond EXACTLY in this format (NO reasoning, NO text before or after):\n"
|
| 563 |
+
"<tool_call>\n"
|
| 564 |
+
'{"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}\n'
|
| 565 |
+
"</tool_call>\n\n"
|
| 566 |
+
"EXAMPLE 1 - If asked to calculate future value:\n"
|
| 567 |
+
"<tool_call>\n"
|
| 568 |
+
'{"name": "calculer_valeur_future", "arguments": {"capital_initial": 10000, "taux": 0.05, "duree": 10}}\n'
|
| 569 |
+
"</tool_call>\n\n"
|
| 570 |
+
"EXAMPLE 2 - If asked to get stock price:\n"
|
| 571 |
+
"<tool_call>\n"
|
| 572 |
+
'{"name": "obtenir_prix_action", "arguments": {"symbole": "AIR.PA"}}\n'
|
| 573 |
+
"</tool_call>\n\n"
|
| 574 |
+
"IMPORTANT: Start your response directly with <tool_call>. Do NOT include <think> tags or any reasoning. "
|
| 575 |
+
"The tool call format is the ONLY thing you should output when using a tool."
|
| 576 |
+
)
|
| 577 |
|
| 578 |
return tools_text
|
| 579 |
|