mnoorchenar commited on
Commit
3544fa1
Β·
1 Parent(s): 945cec1

Update 2026-03-22 07:57:50

Browse files
Files changed (2) hide show
  1. agent/llm.py +60 -15
  2. code.txt +60 -15
agent/llm.py CHANGED
@@ -26,17 +26,39 @@ Rules:
26
  - After receiving tool results, always write a Final Answer
27
  - Maximum 4 tool calls per turn"""
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def build_messages(user_msg: str, history: list, tool_obs: list) -> list:
30
- msgs = [{"role":"system","content":SYSTEM_PROMPT}]
31
  for m in history[-12:]:
32
- if m.get("role") in ("user","assistant"):
33
- msgs.append({"role":m["role"],"content":m["content"]})
34
- msgs.append({"role":"user","content":user_msg})
35
  if tool_obs:
36
  obs = "\n\n".join(f"[{o['tool']} result]\n{o['result']}" for o in tool_obs)
37
- msgs.append({"role":"user","content":f"Tool results:\n{obs}\n\nNow write your Final Answer."})
38
  return msgs
39
 
 
40
  def parse_tool_call(text: str) -> Optional[tuple]:
41
  action = re.search(r"Action:\s*(\w+)", text, re.IGNORECASE)
42
  if not action:
@@ -57,24 +79,47 @@ def parse_tool_call(text: str) -> Optional[tuple]:
57
  return name, {"query": r}
58
  return name, {}
59
 
 
60
  def parse_final_answer(text: str) -> Optional[str]:
61
  m = re.search(r"Final Answer:\s*(.+)", text, re.DOTALL | re.IGNORECASE)
62
  if m:
63
  return re.sub(r"\s*---\s*$", "", m.group(1)).strip()
64
  return None
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def call_llm_streaming(client: InferenceClient, model: str, messages: list,
67
  emit_token: Callable[[str], None], max_tokens: int = 900) -> str:
68
- full = ""
69
  try:
70
- for chunk in client.chat_completion(messages=messages, model=model,
71
- max_tokens=max_tokens, temperature=0.25, stream=True):
72
- delta = chunk.choices[0].delta.content
73
- if delta:
74
- full += delta
75
- emit_token(delta)
76
  except Exception as e:
77
- msg = f"\n[LLM error: {str(e)[:120]}]"
78
- full += msg
 
 
 
 
 
 
 
 
 
 
 
 
79
  emit_token(msg)
80
- return full
 
26
  - After receiving tool results, always write a Final Answer
27
  - Maximum 4 tool calls per turn"""
28
 
29
+
30
+ def _merge_system_into_user(messages: list) -> list:
31
+ """Fallback: prepend system prompt into the first user message for models
32
+ that reject the system role (e.g. Mistral v0.3 on the free HF tier)."""
33
+ out = []
34
+ sys_content = ""
35
+ for m in messages:
36
+ if m["role"] == "system":
37
+ sys_content = m["content"]
38
+ else:
39
+ out.append(m)
40
+ if sys_content and out:
41
+ first_user_idx = next((i for i, m in enumerate(out) if m["role"] == "user"), None)
42
+ if first_user_idx is not None:
43
+ out[first_user_idx] = {
44
+ "role": "user",
45
+ "content": f"[Instructions]\n{sys_content}\n\n[Customer message]\n{out[first_user_idx]['content']}"
46
+ }
47
+ return out
48
+
49
+
50
  def build_messages(user_msg: str, history: list, tool_obs: list) -> list:
51
+ msgs = [{"role": "system", "content": SYSTEM_PROMPT}]
52
  for m in history[-12:]:
53
+ if m.get("role") in ("user", "assistant"):
54
+ msgs.append({"role": m["role"], "content": m["content"]})
55
+ msgs.append({"role": "user", "content": user_msg})
56
  if tool_obs:
57
  obs = "\n\n".join(f"[{o['tool']} result]\n{o['result']}" for o in tool_obs)
58
+ msgs.append({"role": "user", "content": f"Tool results:\n{obs}\n\nNow write your Final Answer."})
59
  return msgs
60
 
61
+
62
  def parse_tool_call(text: str) -> Optional[tuple]:
63
  action = re.search(r"Action:\s*(\w+)", text, re.IGNORECASE)
64
  if not action:
 
79
  return name, {"query": r}
80
  return name, {}
81
 
82
+
83
  def parse_final_answer(text: str) -> Optional[str]:
84
  m = re.search(r"Final Answer:\s*(.+)", text, re.DOTALL | re.IGNORECASE)
85
  if m:
86
  return re.sub(r"\s*---\s*$", "", m.group(1)).strip()
87
  return None
88
 
89
+
90
+ def _try_stream(client: InferenceClient, model: str, messages: list,
91
+ emit_token: Callable[[str], None], max_tokens: int) -> str:
92
+ full = ""
93
+ for chunk in client.chat_completion(
94
+ messages=messages, model=model,
95
+ max_tokens=max_tokens, temperature=0.25, stream=True
96
+ ):
97
+ delta = chunk.choices[0].delta.content
98
+ if delta:
99
+ full += delta
100
+ emit_token(delta)
101
+ return full
102
+
103
+
104
  def call_llm_streaming(client: InferenceClient, model: str, messages: list,
105
  emit_token: Callable[[str], None], max_tokens: int = 900) -> str:
106
+ # Attempt 1: standard messages with system role
107
  try:
108
+ return _try_stream(client, model, messages, emit_token, max_tokens)
 
 
 
 
 
109
  except Exception as e:
110
+ err_str = str(e)
111
+ # Only retry on bad-request / role errors; surface all others immediately
112
+ if "Bad request" not in err_str and "400" not in err_str and "role" not in err_str.lower():
113
+ msg = f"\n[LLM error: {err_str[:180]}]"
114
+ emit_token(msg)
115
+ return msg
116
+
117
+ # Attempt 2: merge system prompt into first user message as fallback
118
+ emit_token("\n[Retrying with merged prompt…]\n")
119
+ merged = _merge_system_into_user(messages)
120
+ try:
121
+ return _try_stream(client, model, merged, emit_token, max_tokens)
122
+ except Exception as e2:
123
+ msg = f"\n[LLM error after retry: {str(e2)[:180]}]"
124
  emit_token(msg)
125
+ return msg
code.txt CHANGED
@@ -247,17 +247,39 @@ Rules:
247
  - After receiving tool results, always write a Final Answer
248
  - Maximum 4 tool calls per turn"""
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  def build_messages(user_msg: str, history: list, tool_obs: list) -> list:
251
- msgs = [{"role":"system","content":SYSTEM_PROMPT}]
252
  for m in history[-12:]:
253
- if m.get("role") in ("user","assistant"):
254
- msgs.append({"role":m["role"],"content":m["content"]})
255
- msgs.append({"role":"user","content":user_msg})
256
  if tool_obs:
257
  obs = "\n\n".join(f"[{o['tool']} result]\n{o['result']}" for o in tool_obs)
258
- msgs.append({"role":"user","content":f"Tool results:\n{obs}\n\nNow write your Final Answer."})
259
  return msgs
260
 
 
261
  def parse_tool_call(text: str) -> Optional[tuple]:
262
  action = re.search(r"Action:\s*(\w+)", text, re.IGNORECASE)
263
  if not action:
@@ -278,27 +300,50 @@ def parse_tool_call(text: str) -> Optional[tuple]:
278
  return name, {"query": r}
279
  return name, {}
280
 
 
281
  def parse_final_answer(text: str) -> Optional[str]:
282
  m = re.search(r"Final Answer:\s*(.+)", text, re.DOTALL | re.IGNORECASE)
283
  if m:
284
  return re.sub(r"\s*---\s*$", "", m.group(1)).strip()
285
  return None
286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  def call_llm_streaming(client: InferenceClient, model: str, messages: list,
288
  emit_token: Callable[[str], None], max_tokens: int = 900) -> str:
289
- full = ""
290
  try:
291
- for chunk in client.chat_completion(messages=messages, model=model,
292
- max_tokens=max_tokens, temperature=0.25, stream=True):
293
- delta = chunk.choices[0].delta.content
294
- if delta:
295
- full += delta
296
- emit_token(delta)
297
  except Exception as e:
298
- msg = f"\n[LLM error: {str(e)[:120]}]"
299
- full += msg
 
 
 
 
 
 
 
 
 
 
 
 
300
  emit_token(msg)
301
- return full
302
 
303
 
304
  # ══════════════════════════════════════════════════════════════════════════════
 
247
  - After receiving tool results, always write a Final Answer
248
  - Maximum 4 tool calls per turn"""
249
 
250
+
251
+ def _merge_system_into_user(messages: list) -> list:
252
+ """Fallback: prepend system prompt into the first user message for models
253
+ that reject the system role (e.g. Mistral v0.3 on the free HF tier)."""
254
+ out = []
255
+ sys_content = ""
256
+ for m in messages:
257
+ if m["role"] == "system":
258
+ sys_content = m["content"]
259
+ else:
260
+ out.append(m)
261
+ if sys_content and out:
262
+ first_user_idx = next((i for i, m in enumerate(out) if m["role"] == "user"), None)
263
+ if first_user_idx is not None:
264
+ out[first_user_idx] = {
265
+ "role": "user",
266
+ "content": f"[Instructions]\n{sys_content}\n\n[Customer message]\n{out[first_user_idx]['content']}"
267
+ }
268
+ return out
269
+
270
+
271
  def build_messages(user_msg: str, history: list, tool_obs: list) -> list:
272
+ msgs = [{"role": "system", "content": SYSTEM_PROMPT}]
273
  for m in history[-12:]:
274
+ if m.get("role") in ("user", "assistant"):
275
+ msgs.append({"role": m["role"], "content": m["content"]})
276
+ msgs.append({"role": "user", "content": user_msg})
277
  if tool_obs:
278
  obs = "\n\n".join(f"[{o['tool']} result]\n{o['result']}" for o in tool_obs)
279
+ msgs.append({"role": "user", "content": f"Tool results:\n{obs}\n\nNow write your Final Answer."})
280
  return msgs
281
 
282
+
283
  def parse_tool_call(text: str) -> Optional[tuple]:
284
  action = re.search(r"Action:\s*(\w+)", text, re.IGNORECASE)
285
  if not action:
 
300
  return name, {"query": r}
301
  return name, {}
302
 
303
+
304
  def parse_final_answer(text: str) -> Optional[str]:
305
  m = re.search(r"Final Answer:\s*(.+)", text, re.DOTALL | re.IGNORECASE)
306
  if m:
307
  return re.sub(r"\s*---\s*$", "", m.group(1)).strip()
308
  return None
309
 
310
+
311
+ def _try_stream(client: InferenceClient, model: str, messages: list,
312
+ emit_token: Callable[[str], None], max_tokens: int) -> str:
313
+ full = ""
314
+ for chunk in client.chat_completion(
315
+ messages=messages, model=model,
316
+ max_tokens=max_tokens, temperature=0.25, stream=True
317
+ ):
318
+ delta = chunk.choices[0].delta.content
319
+ if delta:
320
+ full += delta
321
+ emit_token(delta)
322
+ return full
323
+
324
+
325
  def call_llm_streaming(client: InferenceClient, model: str, messages: list,
326
  emit_token: Callable[[str], None], max_tokens: int = 900) -> str:
327
+ # Attempt 1: standard messages with system role
328
  try:
329
+ return _try_stream(client, model, messages, emit_token, max_tokens)
 
 
 
 
 
330
  except Exception as e:
331
+ err_str = str(e)
332
+ # Only retry on bad-request / role errors; surface all others immediately
333
+ if "Bad request" not in err_str and "400" not in err_str and "role" not in err_str.lower():
334
+ msg = f"\n[LLM error: {err_str[:180]}]"
335
+ emit_token(msg)
336
+ return msg
337
+
338
+ # Attempt 2: merge system prompt into first user message as fallback
339
+ emit_token("\n[Retrying with merged prompt…]\n")
340
+ merged = _merge_system_into_user(messages)
341
+ try:
342
+ return _try_stream(client, model, merged, emit_token, max_tokens)
343
+ except Exception as e2:
344
+ msg = f"\n[LLM error after retry: {str(e2)[:180]}]"
345
  emit_token(msg)
346
+ return msg
347
 
348
 
349
  # ══════════════════════════════════════════════════════════════════════════════