Ghisalbertifederico commited on
Commit
a0b70c8
Β·
verified Β·
1 Parent(s): 3853928

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -150
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import io
2
  import os
3
  import re
4
  import sys
@@ -9,11 +8,8 @@ if sys.platform == "win32":
9
  sys.stdout.reconfigure(encoding="utf-8", errors="replace")
10
  sys.stderr.reconfigure(encoding="utf-8", errors="replace")
11
  import gradio as gr
12
- import pypdf
13
  import requests
14
- import inspect
15
  import pandas as pd
16
- import markdownify
17
  from typing import Literal, TypedDict, get_args
18
  from langchain_core.messages import HumanMessage, SystemMessage
19
  from langchain_openai import ChatOpenAI
@@ -22,22 +18,27 @@ from config import DEFAULT_API_URL, HF_TOKEN, GROQ_API_KEY, OPENROUTER_API_KEY,
22
  from tools import (
23
  web_search,
24
  wikipedia_search,
 
25
  get_youtube_transcript,
26
  describe_image,
27
  transcribe_audio,
28
  run_python_file,
29
- read_task_file
30
  )
31
 
32
  # ---------------------------------------------------------------------------
33
  # Model fallback chain (primary β†’ backup β†’ last-resort)
34
  # ---------------------------------------------------------------------------
35
- MODEL_CONFIGS = [
36
- {"model_id": "llama-3.3-70b-versatile"},
37
- {"model_id": "meta-llama/llama-4-scout-17b-16e-instruct"},
38
- {"model_id": "moonshotai/kimi-k2-instruct"},
39
- {"model_id": "openai/gpt-oss-120b"},
40
- {"model_id": "llama-3.1-8b-instant"},
 
 
 
 
41
  ]
42
 
43
  _LABELS = Literal[
@@ -53,23 +54,18 @@ _LABELS = Literal[
53
  def _download_task_file(task_id: str, api_url: str = DEFAULT_API_URL) -> tuple[bytes, str]:
54
  """Download a file attached to a GAIA task."""
55
  url = f"{api_url}/files/{task_id}"
56
- # local_path = os.path.join(_DOWNLOAD_DIR, f"task_{task_id}_{file_name}")
57
-
58
- # Try with auth first, then without (some endpoints don't require it)
59
- # for headers in [
60
- # {"Authorization": f"Bearer {HF_TOKEN}"},
61
- # {},
62
- # ]:
63
  try:
64
  headers = {"Authorization": f"Bearer {HF_TOKEN}"}
65
  resp = requests.get(url, headers=headers, timeout=30)
66
- except requests.exceptions.HTTPError as e:
67
- status = e.response.status_code if e.response is not None else "?"
68
- print(f"Download attempt for {task_id} returned {status}")
69
  if resp.status_code != 200:
70
  print(f"[DEBUG] GET {url} β†’ {resp.status_code}")
71
  return b"", ""
72
- return resp.content, resp.headers.get("content-type", "").lower()
 
 
73
 
74
  class AgentState(TypedDict):
75
  question: str
@@ -80,72 +76,28 @@ class AgentState(TypedDict):
80
  file_name: str | None
81
 
82
 
83
-
84
- # class WebSearchAgent:
85
-
86
- # def __init__(self, model_id: str = None):
87
- # model_id = model_id or MODEL_CONFIGS[0]["model_id"]
88
- # print(f"Initializing WebSearchAgent with {model_id}...")
89
-
90
- # self.agent = CodeAgent(
91
- # model=OpenAIServerModel(
92
- # model_id=model_id,
93
- # api_base="https://api.groq.com/openai/v1",
94
- # api_key=GROQ_API_KEY,
95
- # timeout=60,
96
- # ),
97
- # tools=[
98
- # web_search,
99
- # visit_webpage,
100
- # wikipedia_search,
101
- # get_youtube_transcript,
102
- # describe_image,
103
- # read_task_file,
104
- # transcribe_audio,
105
- # run_python_file,
106
- # ],
107
- # name="fast_agent",
108
- # description="Answers questions using web search, Wikipedia, or attached files as appropriate.",
109
- # additional_authorized_imports=[
110
- # "re", "math", "datetime", "collections", "itertools",
111
- # "statistics", "random", "unicodedata", "json", "string",
112
- # "pandas", "csv", "os", "subprocess",
113
- # ],
114
- # verbosity_level=1,
115
- # max_steps=10,
116
- # )
117
- # # Prepend guidance so the LLM knows which tools exist
118
- # self.agent.prompt_templates["system_prompt"] = self.agent.prompt_templates["system_prompt"] + "\n\n" + SYSTEM_PROMPT_ADDITION
119
- # print("WebSearchAgent initialized.")
120
-
121
- # def __call__(self, question: str) -> str:
122
- # print(f"\nAgent received question: {question[:50]}...")
123
- # try:
124
- # result = self.agent.run(question)
125
- # print("Agent final answer:", result)
126
- # return result
127
- # except Exception as e:
128
- # print("Agent error:", e)
129
- # msg = str(e)
130
- # # Re-raise rate-limit errors so _answer_question can fall back to the next model
131
- # if "rate_limit_exceeded" in msg or "429" in msg or "413" in msg or "Request too large" in msg or "model_decommissioned" in msg or "decommissioned" in msg:
132
- # raise
133
- # return f"AGENT ERROR: {e}"
134
-
135
- MAX_WORKERS = 1 # sequential to stay within Groq's 12K tokens/min limit
136
  QUESTION_TIMEOUT = 300 # seconds before a single question is abandoned
137
- _exhausted_models: set[str] = set() # models that hit daily rate limits are skipped for remaining questions
138
 
139
  # --------------------------------------------------------------------------- #
140
  # NODES (LangGraph functions) #
141
  # --------------------------------------------------------------------------- #
 
142
  _llm_router = ChatOpenAI(
143
- model=MODEL_CONFIGS[0]["model_id"],
144
  base_url="https://api.groq.com/openai/v1",
145
  api_key=GROQ_API_KEY,
146
  timeout=60,
147
  )
148
- _llm_answer = _llm_router
 
 
 
 
 
 
 
149
 
150
  def route_question(state: AgentState) -> AgentState:
151
  """Label the task so we know which toolchain to invoke."""
@@ -168,65 +120,120 @@ def call_tools(state: AgentState) -> AgentState:
168
  matched_obj = re.search(r"https?://\S+", question)
169
 
170
  # ---- attachment (only when a file is actually attached to this task) -----
171
- file_fetched = False
172
  if task_id and file_name:
173
  blob, ctype = _download_task_file(api_url=DEFAULT_API_URL, task_id=task_id)
174
- if any([blob, ctype]):
175
- file_fetched = True
176
- print(f"[DEBUG] attachment type={ctype}")
177
- if "python" in ctype:
178
  print("[DEBUG] Working with a Python attachment file")
179
- state["answer"] = run_python_file.invoke({"code": blob.decode("utf-8")})
180
  state["label"] = "python_script"
181
  return state
182
- if "audio" in ctype:
183
  print("[DEBUG] Working with an audio attachment file")
184
  state["context"] = transcribe_audio.invoke({"audio_bytes": blob})
185
  state["label"] = "audio"
186
  return state
187
- if "image" in ctype:
188
  print("[DEBUG] Working with an image attachment file")
189
  state["answer"] = describe_image.invoke({"img_bytes": blob, "question": question})
190
  state["label"] = "image"
191
  return state
192
- # Excel / CSV / other binary
193
- print("[DEBUG] Working with an Excel/CSV attachment file")
194
- state["answer"] = read_task_file.invoke({"xls_bytes": blob})
195
  state["label"] = "other_ext"
196
  return state
197
 
198
- # ---- label-based routing (always runs when no file was fetched) ----------
199
  if label == "youtube":
200
  print("[TOOL] youtube_transcript")
201
  if matched_obj:
202
  url = re.sub(r'[.,;:!?")\]]+$', '', matched_obj.group(0))
203
  print(f"[TOOL] fetching transcript for: {url}")
204
- state["context"] = get_youtube_transcript.invoke({"video_url": url})
 
 
 
 
 
 
 
205
  else:
206
  print("[TOOL] youtube label but no URL found β€” falling back to web search")
207
  state["context"] = web_search.invoke({"query": question})
 
208
  elif label == "research":
209
- print("[TOOL] web search")
 
210
  search_query_prompt = (
211
- "Write a short Google search query (max 10 words) to answer this question. "
 
212
  "Output ONLY the query, nothing else.\n\nQuestion: " + question
213
  )
214
- focused_query = _llm_router.invoke(search_query_prompt).content.strip().strip('"')
215
  print(f"[TOOL] search query: {focused_query}")
 
 
216
  search_json = web_search.invoke({"query": focused_query})
217
  wiki_text = wikipedia_search.invoke({"query": focused_query})
218
- state["context"] = f"{search_json}\n\n{wiki_text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  else:
 
220
  print("[TOOL] reasoning only (no search)")
221
  state["context"] = ""
222
  return state
223
 
224
  def synthesize_response(state: AgentState) -> AgentState:
225
- # Skip if a tool already produced a direct answer (image / python / excel paths)
226
- if state.get("answer"):
227
- print(f"[SYNTHESIZE] skipped β€” answer already set by tool")
228
  return state
229
 
 
 
 
 
 
 
 
 
 
 
230
  # Pass 1: chain-of-thought reasoning
231
  reasoning_prompt = [
232
  SystemMessage(content=get_prompt("reasoning_system")),
@@ -295,16 +302,24 @@ def build_graph() -> StateGraph:
295
  class LGAgent:
296
  """Callable wrapper used by run_and_submit_all."""
297
 
298
- def __init__(self, model_id: str | None = None) -> None:
299
  global _llm_router, _llm_answer
300
- mid = model_id or MODEL_CONFIGS[0]["model_id"]
 
301
  _llm_router = ChatOpenAI(
302
- model=mid,
303
  base_url="https://api.groq.com/openai/v1",
304
  api_key=GROQ_API_KEY,
305
  timeout=60,
306
  )
307
- _llm_answer = _llm_router
 
 
 
 
 
 
 
308
  self.graph = build_graph()
309
 
310
  def __call__(self, question: str, task_id: str | None = None, file_name: str | None = None) -> str:
@@ -348,70 +363,47 @@ def _to_str(val) -> str:
348
 
349
 
350
  def _answer_question(item: dict) -> str:
351
- """Instantiate a fresh agent and answer one question, retrying on 429."""
352
  question_text = item["question"]
353
  task_id = item.get("task_id", "")
354
  file_name = item.get("file_name") or ""
355
 
356
- # Download attached file (if any) and inject its path into the question
357
  augmented_question = question_text
358
- # if file_name:
359
- # local_path = _download_task_file(task_id, file_name)
360
- # if local_path:
361
- # ext = os.path.splitext(file_name)[1].lower()
362
- # if ext in ('.png', '.jpg', '.jpeg', '.gif', '.webp'):
363
- # augmented_question = (
364
- # f"{question_text}\n\n"
365
- # f"[Task context: an image file is available at local path '{local_path}'. "
366
- # f"Use the describe_image tool with this path and a focused question to analyze it.]"
367
- # )
368
- # elif ext == '.py':
369
- # augmented_question = (
370
- # f"{question_text}\n\n"
371
- # f"[Task context: a Python file is available at local path '{local_path}'. "
372
- # f"Use run_python_file to execute it and/or read_task_file to read its source.]"
373
- # )
374
- # else:
375
- # augmented_question = (
376
- # f"{question_text}\n\n"
377
- # f"[Task context: an attached file is available at local path '{local_path}'. "
378
- # f"Use the read_task_file tool with this path to read its contents.]"
379
- # )
380
-
381
- for cfg in MODEL_CONFIGS:
382
- model_id = cfg["model_id"]
383
- if model_id in _exhausted_models:
384
- print(f"[{model_id}] Skipped (previously rate-limited)")
385
  continue
386
  for attempt in range(2):
387
  try:
388
- result = LGAgent(model_id=model_id)(augmented_question, task_id=task_id, file_name=file_name)
389
- # Pause between questions to respect Groq's tokens/min limit
390
- time.sleep(5)
 
 
 
391
  return result
392
  except Exception as e:
393
  msg = str(e)
394
- # Model permanently removed by provider β€” skip forever
395
  if "model_decommissioned" in msg or "decommissioned" in msg:
396
- _exhausted_models.add(model_id)
397
- print(f"[{model_id}] Model decommissioned β€” skipping permanently")
398
  break
399
  if "rate_limit_exceeded" in msg or "429" in msg or "413" in msg or "Request too large" in msg:
400
- # Check if it's a daily (TPD) limit β€” skip model for all remaining questions
401
  if "on tokens per day" in msg or "TPD" in msg:
402
- _exhausted_models.add(model_id)
403
- print(f"[{model_id}] Daily token limit hit β€” skipping for remaining questions")
404
- break # move to next model immediately
405
- # TPM or 413 β€” skip to next model for THIS question only
406
- # (don't add to _exhausted_models so it's retried on the next question)
407
- print(f"[{model_id}] TPM rate limit or request too large β€” trying next model for this question")
408
- break # move to next model immediately
409
  else:
410
- return f"AGENT ERROR: {e}"
411
- else:
412
- # Only reached if inner loop didn't break (both retries used on non-TPD limits)
413
- print(f"[{model_id}] Exhausted retries, falling back to next model...")
414
- return "AGENT ERROR: all models rate-limited"
415
 
416
  def run_and_submit_all( profile: gr.OAuthProfile | None):
417
  """
 
 
1
  import os
2
  import re
3
  import sys
 
8
  sys.stdout.reconfigure(encoding="utf-8", errors="replace")
9
  sys.stderr.reconfigure(encoding="utf-8", errors="replace")
10
  import gradio as gr
 
11
  import requests
 
12
  import pandas as pd
 
13
  from typing import Literal, TypedDict, get_args
14
  from langchain_core.messages import HumanMessage, SystemMessage
15
  from langchain_openai import ChatOpenAI
 
18
  from tools import (
19
  web_search,
20
  wikipedia_search,
21
+ visit_webpage,
22
  get_youtube_transcript,
23
  describe_image,
24
  transcribe_audio,
25
  run_python_file,
26
+ read_task_file,
27
  )
28
 
29
  # ---------------------------------------------------------------------------
30
  # Model fallback chain (primary β†’ backup β†’ last-resort)
31
  # ---------------------------------------------------------------------------
32
+ # Use OpenRouter for the main reasoning model (better quality) and Groq for routing (fast)
33
+ GROQ_MODELS = [
34
+ {"model_id": "llama-3.3-70b-versatile"},
35
+ {"model_id": "llama-3.1-8b-instant"},
36
+ ]
37
+
38
+ OPENROUTER_MODELS = [
39
+ {"model_id": "google/gemini-2.0-flash-001"},
40
+ {"model_id": "qwen/qwen-2.5-72b-instruct"},
41
+ {"model_id": "meta-llama/llama-3.3-70b-instruct"},
42
  ]
43
 
44
  _LABELS = Literal[
 
54
  def _download_task_file(task_id: str, api_url: str = DEFAULT_API_URL) -> tuple[bytes, str]:
55
  """Download a file attached to a GAIA task."""
56
  url = f"{api_url}/files/{task_id}"
 
 
 
 
 
 
 
57
  try:
58
  headers = {"Authorization": f"Bearer {HF_TOKEN}"}
59
  resp = requests.get(url, headers=headers, timeout=30)
60
+ except requests.exceptions.RequestException as e:
61
+ print(f"[DEBUG] Download error for {task_id}: {e}")
62
+ return b"", ""
63
  if resp.status_code != 200:
64
  print(f"[DEBUG] GET {url} β†’ {resp.status_code}")
65
  return b"", ""
66
+ ctype = resp.headers.get("content-type", "").lower()
67
+ print(f"[DEBUG] Downloaded file for {task_id}: {len(resp.content)} bytes, type={ctype}")
68
+ return resp.content, ctype
69
 
70
  class AgentState(TypedDict):
71
  question: str
 
76
  file_name: str | None
77
 
78
 
79
+ MAX_WORKERS = 1 # sequential to stay within rate limits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  QUESTION_TIMEOUT = 300 # seconds before a single question is abandoned
81
+ _exhausted_models: set[str] = set()
82
 
83
  # --------------------------------------------------------------------------- #
84
  # NODES (LangGraph functions) #
85
  # --------------------------------------------------------------------------- #
86
+ # Router uses Groq (fast, cheap)
87
  _llm_router = ChatOpenAI(
88
+ model=GROQ_MODELS[0]["model_id"],
89
  base_url="https://api.groq.com/openai/v1",
90
  api_key=GROQ_API_KEY,
91
  timeout=60,
92
  )
93
+
94
+ # Reasoning uses OpenRouter (higher quality)
95
+ _llm_answer = ChatOpenAI(
96
+ model=OPENROUTER_MODELS[0]["model_id"],
97
+ base_url="https://openrouter.ai/api/v1",
98
+ api_key=OPENROUTER_API_KEY,
99
+ timeout=120,
100
+ )
101
 
102
  def route_question(state: AgentState) -> AgentState:
103
  """Label the task so we know which toolchain to invoke."""
 
120
  matched_obj = re.search(r"https?://\S+", question)
121
 
122
  # ---- attachment (only when a file is actually attached to this task) -----
 
123
  if task_id and file_name:
124
  blob, ctype = _download_task_file(api_url=DEFAULT_API_URL, task_id=task_id)
125
+ if blob:
126
+ print(f"[DEBUG] attachment type={ctype}, size={len(blob)} bytes")
127
+ if "python" in ctype or file_name.endswith(".py"):
 
128
  print("[DEBUG] Working with a Python attachment file")
129
+ state["answer"] = run_python_file.invoke({"code": blob.decode("utf-8", errors="replace")})
130
  state["label"] = "python_script"
131
  return state
132
+ if "audio" in ctype or any(file_name.endswith(ext) for ext in [".mp3", ".wav", ".m4a", ".flac"]):
133
  print("[DEBUG] Working with an audio attachment file")
134
  state["context"] = transcribe_audio.invoke({"audio_bytes": blob})
135
  state["label"] = "audio"
136
  return state
137
+ if "image" in ctype or any(file_name.endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".gif", ".webp"]):
138
  print("[DEBUG] Working with an image attachment file")
139
  state["answer"] = describe_image.invoke({"img_bytes": blob, "question": question})
140
  state["label"] = "image"
141
  return state
142
+ # Excel / CSV / PDF / other binary
143
+ print("[DEBUG] Working with a data file attachment")
144
+ state["context"] = read_task_file.invoke({"xls_bytes": blob})
145
  state["label"] = "other_ext"
146
  return state
147
 
148
+ # ---- label-based routing (when no file was fetched) ----------
149
  if label == "youtube":
150
  print("[TOOL] youtube_transcript")
151
  if matched_obj:
152
  url = re.sub(r'[.,;:!?")\]]+$', '', matched_obj.group(0))
153
  print(f"[TOOL] fetching transcript for: {url}")
154
+ transcript = get_youtube_transcript.invoke({"video_url": url})
155
+ if transcript and transcript != "TRANSCRIPT_UNAVAILABLE":
156
+ state["context"] = transcript
157
+ else:
158
+ # Fallback: search for info about the video
159
+ print("[TOOL] Transcript unavailable β€” searching web for video info")
160
+ search_json = web_search.invoke({"query": f"youtube {url} transcript content"})
161
+ state["context"] = f"TRANSCRIPT_UNAVAILABLE. Web search results about the video:\n{search_json}"
162
  else:
163
  print("[TOOL] youtube label but no URL found β€” falling back to web search")
164
  state["context"] = web_search.invoke({"query": question})
165
+
166
  elif label == "research":
167
+ print("[TOOL] research β€” multi-step search")
168
+ # Step 1: Generate a focused search query
169
  search_query_prompt = (
170
+ "Write a short, precise search query (max 10 words) to answer this question. "
171
+ "Include key proper nouns, dates, and specific terms. "
172
  "Output ONLY the query, nothing else.\n\nQuestion: " + question
173
  )
174
+ focused_query = _llm_router.invoke(search_query_prompt).content.strip().strip('"').strip("'")
175
  print(f"[TOOL] search query: {focused_query}")
176
+
177
+ # Step 2: Run web search + Wikipedia in parallel
178
  search_json = web_search.invoke({"query": focused_query})
179
  wiki_text = wikipedia_search.invoke({"query": focused_query})
180
+
181
+ context_parts = []
182
+
183
+ # Step 3: Visit top search result URLs to get full page content
184
+ if search_json and search_json != "No search results found.":
185
+ context_parts.append(f"WEB SEARCH RESULTS:\n{search_json}")
186
+ try:
187
+ import json as _json
188
+ hits = _json.loads(search_json)
189
+ # Visit top 2 result URLs for detailed content
190
+ visited = 0
191
+ for hit in hits[:4]:
192
+ link = hit.get("link", "")
193
+ if link and visited < 2:
194
+ page_content = visit_webpage.invoke({"url": link})
195
+ if page_content and "Could not fetch" not in page_content:
196
+ context_parts.append(f"\nPAGE CONTENT ({link}):\n{page_content[:15000]}")
197
+ visited += 1
198
+ except Exception as e:
199
+ print(f"[TOOL] Error visiting search results: {e}")
200
+
201
+ if wiki_text and "No Wikipedia results found" not in wiki_text and "failed" not in wiki_text.lower():
202
+ context_parts.append(f"\nWIKIPEDIA:\n{wiki_text}")
203
+
204
+ # Step 4: If initial results are thin, try an alternative query
205
+ if not context_parts or all("No " in p or "error" in p.lower() for p in context_parts):
206
+ print("[TOOL] Initial search thin β€” trying alternative query")
207
+ alt_query = focused_query.replace('"', '').replace("'", "")
208
+ if alt_query != focused_query:
209
+ alt_results = web_search.invoke({"query": alt_query})
210
+ if alt_results and alt_results != "No search results found.":
211
+ context_parts.append(f"\nALTERNATIVE SEARCH:\n{alt_results}")
212
+
213
+ state["context"] = "\n\n".join(context_parts) if context_parts else "No information found from web search or Wikipedia."
214
+
215
  else:
216
+ # Logic / pure reasoning β€” no search needed
217
  print("[TOOL] reasoning only (no search)")
218
  state["context"] = ""
219
  return state
220
 
221
  def synthesize_response(state: AgentState) -> AgentState:
222
+ # If a tool produced a direct final answer (python execution), skip reasoning
223
+ if state.get("answer") and state["label"] == "python_script":
224
+ print(f"[SYNTHESIZE] skipped β€” python output: {state['answer'][:200]}")
225
  return state
226
 
227
+ # For image: the vision model already answered, but wrap it through reasoning
228
+ # to extract the precise answer from the description
229
+ if state.get("answer") and state["label"] == "image":
230
+ state["context"] = f"VISION MODEL OUTPUT:\n{state['answer']}"
231
+ state["answer"] = "" # clear so reasoning runs
232
+
233
+ # For other_ext with context (file data), make sure reasoning runs
234
+ if state["label"] == "other_ext" and state.get("context") and not state.get("answer"):
235
+ pass # context is set, reasoning will run below
236
+
237
  # Pass 1: chain-of-thought reasoning
238
  reasoning_prompt = [
239
  SystemMessage(content=get_prompt("reasoning_system")),
 
302
  class LGAgent:
303
  """Callable wrapper used by run_and_submit_all."""
304
 
305
+ def __init__(self, model_id: str | None = None, answer_model_id: str | None = None) -> None:
306
  global _llm_router, _llm_answer
307
+ # Router: fast Groq model
308
+ router_mid = model_id or GROQ_MODELS[0]["model_id"]
309
  _llm_router = ChatOpenAI(
310
+ model=router_mid,
311
  base_url="https://api.groq.com/openai/v1",
312
  api_key=GROQ_API_KEY,
313
  timeout=60,
314
  )
315
+ # Answering: higher quality OpenRouter model
316
+ answer_mid = answer_model_id or OPENROUTER_MODELS[0]["model_id"]
317
+ _llm_answer = ChatOpenAI(
318
+ model=answer_mid,
319
+ base_url="https://openrouter.ai/api/v1",
320
+ api_key=OPENROUTER_API_KEY,
321
+ timeout=120,
322
+ )
323
  self.graph = build_graph()
324
 
325
  def __call__(self, question: str, task_id: str | None = None, file_name: str | None = None) -> str:
 
363
 
364
 
365
  def _answer_question(item: dict) -> str:
366
+ """Instantiate a fresh agent and answer one question, retrying on errors."""
367
  question_text = item["question"]
368
  task_id = item.get("task_id", "")
369
  file_name = item.get("file_name") or ""
370
 
 
371
  augmented_question = question_text
372
+
373
+ # Try each OpenRouter answer model with Groq router
374
+ for answer_cfg in OPENROUTER_MODELS:
375
+ answer_model_id = answer_cfg["model_id"]
376
+ if answer_model_id in _exhausted_models:
377
+ print(f"[{answer_model_id}] Skipped (previously rate-limited)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  continue
379
  for attempt in range(2):
380
  try:
381
+ result = LGAgent(
382
+ model_id=GROQ_MODELS[0]["model_id"],
383
+ answer_model_id=answer_model_id,
384
+ )(augmented_question, task_id=task_id, file_name=file_name)
385
+ # Pause between questions to respect rate limits
386
+ time.sleep(3)
387
  return result
388
  except Exception as e:
389
  msg = str(e)
 
390
  if "model_decommissioned" in msg or "decommissioned" in msg:
391
+ _exhausted_models.add(answer_model_id)
392
+ print(f"[{answer_model_id}] Model decommissioned β€” skipping permanently")
393
  break
394
  if "rate_limit_exceeded" in msg or "429" in msg or "413" in msg or "Request too large" in msg:
 
395
  if "on tokens per day" in msg or "TPD" in msg:
396
+ _exhausted_models.add(answer_model_id)
397
+ print(f"[{answer_model_id}] Daily token limit hit β€” skipping for remaining questions")
398
+ break
399
+ wait = _parse_retry_after(msg)
400
+ print(f"[{answer_model_id}] Rate limited β€” waiting {wait:.0f}s then retry")
401
+ time.sleep(min(wait, 30))
402
+ continue
403
  else:
404
+ print(f"[{answer_model_id}] Error: {msg[:200]}")
405
+ break # try next model
406
+ return "AGENT ERROR: all models exhausted"
 
 
407
 
408
  def run_and_submit_all( profile: gr.OAuthProfile | None):
409
  """