jeanbaptdzd commited on
Commit
cb7f3d3
·
1 Parent(s): 4a04968

Strengthen prompts with examples for tool calls and JSON format

Browse files

- Add detailed examples for tool call format
- Add multiple examples for JSON format
- Explicitly forbid reasoning tags in both cases
- Improve instructions clarity

app/providers/transformers_provider.py CHANGED
@@ -270,13 +270,18 @@ class TransformersProvider:
270
  # ✅ Add JSON output requirement to system prompt if response_format requires it
271
  if json_output_required:
272
  json_instruction = (
273
- "\n\nCRITICAL: When response_format is json_object, you MUST respond with ONLY valid JSON. "
274
- "NO reasoning tags (<think>), NO explanations, NO text before or after. "
275
  "Start your response directly with { and end with }. "
276
- "Example: If asked for a number, respond with: {\"nombre\": 5} "
277
- "NOT: <think>...</think>{\"nombre\": 5} "
278
- "NOT: Here is the JSON: {\"nombre\": 5} "
279
- "ONLY: {\"nombre\": 5}"
 
 
 
 
 
280
  )
281
  system_messages = [msg for msg in messages if msg.get("role") == "system"]
282
  if system_messages:
@@ -319,19 +324,40 @@ class TransformersProvider:
319
  ) -> Dict[str, Any]:
320
  """Generate non-streaming response."""
321
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  with torch.no_grad():
323
  outputs = model.generate(
324
  **inputs,
325
- max_new_tokens=max_tokens,
326
- temperature=temperature,
327
- top_p=top_p,
328
- top_k=DEFAULT_TOP_K,
329
- do_sample=temperature > 0,
330
- pad_token_id=PAD_TOKEN_ID,
331
- eos_token_id=EOS_TOKENS,
332
- repetition_penalty=REPETITION_PENALTY,
333
- early_stopping=False,
334
- use_cache=True,
335
  )
336
 
337
  # Extract token counts using tokenizer for accuracy
@@ -513,24 +539,41 @@ class TransformersProvider:
513
 
514
  def _format_tools_for_prompt(self, tools: List[Dict[str, Any]]) -> str:
515
  """Format tools for inclusion in system prompt."""
516
- tools_text = "Vous avez accès aux outils suivants. Utilisez-les quand nécessaire.\n\n"
 
 
 
 
 
517
  for i, tool in enumerate(tools, 1):
518
  func = tool.get("function", {})
519
  name = func.get("name", "")
520
  description = func.get("description", "")
521
  parameters = func.get("parameters", {})
522
 
523
- tools_text += f"Outil {i}: {name}\n"
524
  if description:
525
  tools_text += f"Description: {description}\n"
526
  if parameters:
527
- tools_text += f"Paramètres: {json.dumps(parameters, ensure_ascii=False, indent=2)}\n"
528
  tools_text += "\n"
529
 
530
- tools_text += "Pour utiliser un outil, répondez au format suivant:\n"
531
- tools_text += "<tool_call>\n"
532
- tools_text += '{"name": "nom_de_l_outil", "arguments": {"param1": "valeur1", "param2": "valeur2"}}\n'
533
- tools_text += "</tool_call>\n"
 
 
 
 
 
 
 
 
 
 
 
 
534
 
535
  return tools_text
536
 
 
270
  # ✅ Add JSON output requirement to system prompt if response_format requires it
271
  if json_output_required:
272
  json_instruction = (
273
+ "\n\nCRITICAL: response_format is set to json_object. You MUST respond with ONLY valid JSON. "
274
+ "NO <think> tags, NO reasoning, NO explanations, NO text before or after the JSON. "
275
  "Start your response directly with { and end with }. "
276
+ "\n\nEXAMPLES:\n"
277
+ "If asked for a random number 1-10:\n"
278
+ "CORRECT: {\"nombre\": 7}\n"
279
+ "WRONG: <think>I need to generate...</think>{\"nombre\": 7}\n"
280
+ "WRONG: Here is the JSON: {\"nombre\": 7}\n"
281
+ "\nIf asked for portfolio data:\n"
282
+ "CORRECT: {\"positions\": [{\"symbole\": \"AIR.PA\", \"quantite\": 50}]}\n"
283
+ "WRONG: <think>Let me extract...</think>{\"positions\": [...]}\n"
284
+ "\nREMEMBER: Your response must be ONLY the JSON object, nothing else. Do not use <think> tags."
285
  )
286
  system_messages = [msg for msg in messages if msg.get("role") == "system"]
287
  if system_messages:
 
324
  ) -> Dict[str, Any]:
325
  """Generate non-streaming response."""
326
  try:
327
+ # Prepare generation kwargs
328
+ generation_kwargs = {
329
+ "max_new_tokens": max_tokens,
330
+ "temperature": temperature,
331
+ "top_p": top_p,
332
+ "top_k": DEFAULT_TOP_K,
333
+ "do_sample": temperature > 0,
334
+ "pad_token_id": PAD_TOKEN_ID,
335
+ "eos_token_id": EOS_TOKENS,
336
+ "repetition_penalty": REPETITION_PENALTY,
337
+ "early_stopping": False,
338
+ "use_cache": True,
339
+ }
340
+
341
+ # If JSON output is required, try to prevent reasoning tags by adding stop sequences
342
+ # Note: Qwen reasoning models may still generate reasoning, but we'll extract JSON after
343
+ if json_output_required or tools:
344
+ # Try to add stop sequences to prevent reasoning tags
345
+ # Convert stop strings to token IDs if possible
346
+ try:
347
+ # Try to encode reasoning tag opening as stop sequence
348
+ reasoning_token = tokenizer.encode("<think>", add_special_tokens=False)
349
+ if reasoning_token:
350
+ # Add as stop sequence (if model supports it)
351
+ # Note: Not all models support stop_sequences parameter directly
352
+ # We'll handle this in post-processing instead
353
+ pass
354
+ except:
355
+ pass
356
+
357
  with torch.no_grad():
358
  outputs = model.generate(
359
  **inputs,
360
+ **generation_kwargs,
 
 
 
 
 
 
 
 
 
361
  )
362
 
363
  # Extract token counts using tokenizer for accuracy
 
539
 
540
  def _format_tools_for_prompt(self, tools: List[Dict[str, Any]]) -> str:
541
  """Format tools for inclusion in system prompt."""
542
+ tools_text = (
543
+ "CRITICAL: You have access to the following tools. When you need to use a tool, "
544
+ "you MUST respond ONLY with the tool call format below. NO reasoning tags, NO explanations, "
545
+ "ONLY the tool call format.\n\n"
546
+ )
547
+
548
  for i, tool in enumerate(tools, 1):
549
  func = tool.get("function", {})
550
  name = func.get("name", "")
551
  description = func.get("description", "")
552
  parameters = func.get("parameters", {})
553
 
554
+ tools_text += f"Tool {i}: {name}\n"
555
  if description:
556
  tools_text += f"Description: {description}\n"
557
  if parameters:
558
+ tools_text += f"Parameters: {json.dumps(parameters, ensure_ascii=False, indent=2)}\n"
559
  tools_text += "\n"
560
 
561
+ tools_text += (
562
+ "TO USE A TOOL, respond EXACTLY in this format (NO reasoning, NO text before or after):\n"
563
+ "<tool_call>\n"
564
+ '{"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}\n'
565
+ "</tool_call>\n\n"
566
+ "EXAMPLE 1 - If asked to calculate future value:\n"
567
+ "<tool_call>\n"
568
+ '{"name": "calculer_valeur_future", "arguments": {"capital_initial": 10000, "taux": 0.05, "duree": 10}}\n'
569
+ "</tool_call>\n\n"
570
+ "EXAMPLE 2 - If asked to get stock price:\n"
571
+ "<tool_call>\n"
572
+ '{"name": "obtenir_prix_action", "arguments": {"symbole": "AIR.PA"}}\n'
573
+ "</tool_call>\n\n"
574
+ "IMPORTANT: Start your response directly with <tool_call>. Do NOT include <think> tags or any reasoning. "
575
+ "The tool call format is the ONLY thing you should output when using a tool."
576
+ )
577
 
578
  return tools_text
579