Paperbag commited on
Commit
3f4fc54
·
1 Parent(s): 21be703

Refactor and add new debugging scripts; update question fetching logic

Browse files

- Modified `app copy.py` to limit question processing to the first two items.
- Introduced `check_questions.py` to fetch and display questions with metadata.
- Added `debug_chess.py` and `debug_chess2.py` for image analysis using the agent.
- Created `debug_issues.py` to analyze specific questions and compare with ground truth.
- Implemented `debug_search.py` for web and Wikipedia searches on a specific question.
- Developed `debug_test.py` for testing agent responses against ground truth.
- Added multiple `debug_wiki` scripts for refined Wikipedia searches.
- Created `debug_youtube.py` to fetch YouTube transcripts for specific videos.
- Enhanced `find_gaia_answers.py` to load metadata from a Parquet file and match answers.
- Introduced `proxy.py` for handling API requests with multiple providers.
- Added various test scripts (`quick_test.py`, `test_5.py`, `test_10.py`, `test_all.py`) for validating agent responses against ground truth.

.claude/settings nvidia.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env": {
3
+ "ANTHROPIC_BASE_URL": "http://localhost:8082",
4
+ "ANTHROPIC_AUTH_TOKEN": "asd",
5
+ "ANTHROPIC_MODEL": "nvidia_nim/z-ai/glm5",
6
+ "ANTHROPIC_DEFAULT_OPUS_MODEL": "nvidia_nim/z-ai/glm5",
7
+ "ANTHROPIC_DEFAULT_SONNET_MODEL": "nvidia_nim/moonshotai/kimi-k2.5",
8
+ "ANTHROPIC_DEFAULT_HAIKU_MODEL": "nvidia_nim/stepfun-ai/step-3.5-flash",
9
+ "CLAUDE_CODE_SUBAGENT_MODEL": "nvidia_nim/z-ai/glm5"
10
+ }
11
+ }
.claude/settings old.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Open router
2
+ // {
3
+ // "env": {
4
+ // "ANTHROPIC_BASE_URL": "https://openrouter.ai/api",
5
+ // // "ANTHROPIC_AUTH_TOKEN": "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",
6
+ // "ANTHROPIC_AUTH_TOKEN": "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",
7
+ // "ANTHROPIC_API_KEY": "",
8
+ // "ANTHROPIC_MODEL": "qwen/qwen3-coder:free"
9
+
10
+ // }
11
+ // }
12
+
13
+
14
+ // Nvidia
15
+ {
16
+ "env": {
17
+ "ANTHROPIC_BASE_URL": "https://integrate.api.nvidia.com/v1",
18
+ // "ANTHROPIC_AUTH_TOKEN": "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",
19
+ "ANTHROPIC_AUTH_TOKEN": "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",
20
+ "ANTHROPIC_API_KEY": "",
21
+ "ANTHROPIC_MODEL": "nvidia_nim/z-ai/glm4.7"
22
+
23
+ }
24
+ }
.gitignore CHANGED
@@ -1,5 +1,16 @@
1
  .env
 
2
  .cursorignore
3
  .venv_old
4
  .venv
5
- */settings.json
 
 
 
 
 
 
 
 
 
 
 
1
  .env
2
+ .env_old
3
  .cursorignore
4
  .venv_old
5
  .venv
6
+ */settings.json
7
+ */settings.local.json
8
+
9
+ # Python cache / bytecode
10
+ __pycache__/
11
+ *.py[cod]
12
+
13
+ # Common Python tooling caches
14
+ .pytest_cache/
15
+ .mypy_cache/
16
+ .ruff_cache/
.opencode/package-lock.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": ".opencode",
3
+ "lockfileVersion": 3,
4
+ "requires": true,
5
+ "packages": {
6
+ "": {
7
+ "dependencies": {
8
+ "@opencode-ai/plugin": "1.3.15"
9
+ }
10
+ },
11
+ "node_modules/@opencode-ai/plugin": {
12
+ "version": "1.3.15",
13
+ "resolved": "https://registry.npmjs.org/@opencode-ai/plugin/-/plugin-1.3.15.tgz",
14
+ "integrity": "sha512-jZJbuvUXc5Limz8pacQl+ffATjjKGlq+xaA4wTUeW+/spwOf7Yr5Ryyvan8eNlYM8wy6h5SLfznl1rlFpjYC8w==",
15
+ "license": "MIT",
16
+ "dependencies": {
17
+ "@opencode-ai/sdk": "1.3.15",
18
+ "zod": "4.1.8"
19
+ },
20
+ "peerDependencies": {
21
+ "@opentui/core": ">=0.1.96",
22
+ "@opentui/solid": ">=0.1.96"
23
+ },
24
+ "peerDependenciesMeta": {
25
+ "@opentui/core": {
26
+ "optional": true
27
+ },
28
+ "@opentui/solid": {
29
+ "optional": true
30
+ }
31
+ }
32
+ },
33
+ "node_modules/@opencode-ai/sdk": {
34
+ "version": "1.3.15",
35
+ "resolved": "https://registry.npmjs.org/@opencode-ai/sdk/-/sdk-1.3.15.tgz",
36
+ "integrity": "sha512-Uk59C7wsK20wpdr277yx7Xz7TqG5jGqlZUpSW3wDH/7a2K2iBg0lXc2wskHuCXLRXMhXpPZtb4a3SOpPENkkbg==",
37
+ "license": "MIT",
38
+ "dependencies": {
39
+ "cross-spawn": "7.0.6"
40
+ }
41
+ },
42
+ "node_modules/cross-spawn": {
43
+ "version": "7.0.6",
44
+ "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
45
+ "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
46
+ "license": "MIT",
47
+ "dependencies": {
48
+ "path-key": "^3.1.0",
49
+ "shebang-command": "^2.0.0",
50
+ "which": "^2.0.1"
51
+ },
52
+ "engines": {
53
+ "node": ">= 8"
54
+ }
55
+ },
56
+ "node_modules/isexe": {
57
+ "version": "2.0.0",
58
+ "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
59
+ "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
60
+ "license": "ISC"
61
+ },
62
+ "node_modules/path-key": {
63
+ "version": "3.1.1",
64
+ "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
65
+ "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
66
+ "license": "MIT",
67
+ "engines": {
68
+ "node": ">=8"
69
+ }
70
+ },
71
+ "node_modules/shebang-command": {
72
+ "version": "2.0.0",
73
+ "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
74
+ "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
75
+ "license": "MIT",
76
+ "dependencies": {
77
+ "shebang-regex": "^3.0.0"
78
+ },
79
+ "engines": {
80
+ "node": ">=8"
81
+ }
82
+ },
83
+ "node_modules/shebang-regex": {
84
+ "version": "3.0.0",
85
+ "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
86
+ "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
87
+ "license": "MIT",
88
+ "engines": {
89
+ "node": ">=8"
90
+ }
91
+ },
92
+ "node_modules/which": {
93
+ "version": "2.0.2",
94
+ "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
95
+ "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
96
+ "license": "ISC",
97
+ "dependencies": {
98
+ "isexe": "^2.0.0"
99
+ },
100
+ "bin": {
101
+ "node-which": "bin/node-which"
102
+ },
103
+ "engines": {
104
+ "node": ">= 8"
105
+ }
106
+ },
107
+ "node_modules/zod": {
108
+ "version": "4.1.8",
109
+ "license": "MIT",
110
+ "funding": {
111
+ "url": "https://github.com/sponsors/colinhacks"
112
+ }
113
+ }
114
+ }
115
+ }
1htKBjuUWec.en-orig.vtt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ WEBVTT
2
+ Kind: captions
3
+ Language: en
4
+
5
+ 00:00:00.030 --> 00:00:03.830 align:start position:0%
6
+
7
+ Wow<00:00:00.539><c> this</c><00:00:00.870><c> coffee's</c><00:00:01.800><c> great</c><00:00:02.129><c> I</c><00:00:02.659><c> was</c><00:00:03.659><c> just</c>
8
+
9
+ 00:00:03.830 --> 00:00:03.840 align:start position:0%
10
+ Wow this coffee's great I was just
11
+
12
+
13
+ 00:00:03.840 --> 00:00:05.410 align:start position:0%
14
+ Wow this coffee's great I was just
15
+ thinking<00:00:04.110><c> that</c>
16
+
17
+ 00:00:05.410 --> 00:00:05.420 align:start position:0%
18
+ thinking that
19
+
20
+
21
+ 00:00:05.420 --> 00:00:17.710 align:start position:0%
22
+ thinking that
23
+ yeah<00:00:06.420><c> is</c><00:00:06.569><c> that</c><00:00:06.720><c> cinnamon</c><00:00:07.639><c> chicory</c>
24
+
25
+ 00:00:17.710 --> 00:00:17.720 align:start position:0%
26
+
27
+
28
+
29
+ 00:00:17.720 --> 00:00:21.530 align:start position:0%
30
+
31
+ tea<00:00:18.720><c> oak</c>
32
+
33
+ 00:00:21.530 --> 00:00:21.540 align:start position:0%
34
+
35
+
36
+
37
+ 00:00:21.540 --> 00:00:24.670 align:start position:0%
38
+
39
+ [Music]
40
+
41
+ 00:00:24.670 --> 00:00:24.680 align:start position:0%
42
+
43
+
44
+
45
+ 00:00:24.680 --> 00:00:26.710 align:start position:0%
46
+
47
+ isn't<00:00:25.680><c> that</c><00:00:25.800><c> hot</c>
48
+
49
+ 00:00:26.710 --> 00:00:26.720 align:start position:0%
50
+ isn't that hot
51
+
52
+
53
+ 00:00:26.720 --> 00:00:29.720 align:start position:0%
54
+ isn't that hot
55
+ extremely
56
+
1htKBjuUWec.en.vtt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ WEBVTT
2
+ Kind: captions
3
+ Language: en
4
+
5
+ 00:00:00.030 --> 00:00:03.830 align:start position:0%
6
+
7
+ Wow<00:00:00.539><c> this</c><00:00:00.870><c> coffee's</c><00:00:01.800><c> great</c><00:00:02.129><c> I</c><00:00:02.659><c> was</c><00:00:03.659><c> just</c>
8
+
9
+ 00:00:03.830 --> 00:00:03.840 align:start position:0%
10
+ Wow this coffee's great I was just
11
+
12
+
13
+ 00:00:03.840 --> 00:00:05.410 align:start position:0%
14
+ Wow this coffee's great I was just
15
+ thinking<00:00:04.110><c> that</c>
16
+
17
+ 00:00:05.410 --> 00:00:05.420 align:start position:0%
18
+ thinking that
19
+
20
+
21
+ 00:00:05.420 --> 00:00:17.710 align:start position:0%
22
+ thinking that
23
+ yeah<00:00:06.420><c> is</c><00:00:06.569><c> that</c><00:00:06.720><c> cinnamon</c><00:00:07.639><c> chicory</c>
24
+
25
+ 00:00:17.710 --> 00:00:17.720 align:start position:0%
26
+
27
+
28
+
29
+ 00:00:17.720 --> 00:00:21.530 align:start position:0%
30
+
31
+ tea<00:00:18.720><c> oak</c>
32
+
33
+ 00:00:21.530 --> 00:00:21.540 align:start position:0%
34
+
35
+
36
+
37
+ 00:00:21.540 --> 00:00:24.670 align:start position:0%
38
+
39
+ [Music]
40
+
41
+ 00:00:24.670 --> 00:00:24.680 align:start position:0%
42
+
43
+
44
+
45
+ 00:00:24.680 --> 00:00:26.710 align:start position:0%
46
+
47
+ isn't<00:00:25.680><c> that</c><00:00:25.800><c> hot</c>
48
+
49
+ 00:00:26.710 --> 00:00:26.720 align:start position:0%
50
+ isn't that hot
51
+
52
+
53
+ 00:00:26.720 --> 00:00:29.720 align:start position:0%
54
+ isn't that hot
55
+ extremely
56
+
__pycache__/agent.cpython-312.pyc DELETED
Binary file (30.6 kB)
 
__pycache__/app.cpython-312.pyc CHANGED
Binary files a/__pycache__/app.cpython-312.pyc and b/__pycache__/app.cpython-312.pyc differ
 
agent.py CHANGED
@@ -1,615 +1,435 @@
1
  import os
2
- import base64
3
- import requests
4
- import json
5
- import traceback
6
- import datetime
7
  import subprocess
8
  import tempfile
9
- import time
10
- from typing import TypedDict, List, Dict, Any, Optional, Union
11
- from langchain_core import tools
12
- from langgraph.graph import StateGraph, START, END
13
- from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline
14
- from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
15
- from langchain_core.tools import tool
16
- from langchain_community.document_loaders import WikipediaLoader
17
  from ddgs import DDGS
18
  from dotenv import load_dotenv
19
- from groq import Groq
 
20
  from langchain_groq import ChatGroq
 
 
21
  from langchain_community.document_loaders.image import UnstructuredImageLoader
22
- from langchain_community.document_loaders import WebBaseLoader
23
- from langchain_google_genai import ChatGoogleGenerativeAI
24
-
25
- try:
26
- import cv2
27
- except ImportError:
28
- cv2 = None
29
 
30
- # os.environ["USER_AGENT"] = "gaia-agent/1.0"
31
-
32
- whisper_model = None
33
- def get_whisper():
34
- global whisper_model
35
- if whisper_model is None:
36
- import whisper
37
- # Lazy load the smallest, fastest model
38
- whisper_model = whisper.load_model("base")
39
- return whisper_model
40
-
41
- load_dotenv(override=True)
42
-
43
- # Base Hugging Face LLM used by the chat wrapper
44
- # base_llm = HuggingFaceEndpoint(
45
- # repo_id="openai/gpt-oss-20b:hyperbolic",
46
- # # deepseek-ai/DeepSeek-OCR:novita
47
- # task="text-generation",
48
- # temperature=0.0,
49
- # huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
50
- # )
51
-
52
- # Model initializations moved to smart_invoke for lazy loading to prevent import errors if keys are missing.
53
-
54
- def smart_invoke(msgs, use_tools=False, start_tier=0):
55
- """
56
- Tiered fallback: OpenRouter -> Gemini -> Groq -> NVIDIA -> Vercel.
57
- Retries next tier if a 429 (rate limit), 402 (credits), or 404 (model found) error occurs.
58
- """
59
-
60
- # Adaptive Gemini names verified via list_models (REST API)
61
- gemini_alternatives = ["gemini-2.5-flash", "gemini-2.0-flash", "gemini-flash-latest", "gemini-pro-latest"]
62
-
63
- tiers_config = [
64
- {"name": "Qwen3-Next-80B", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "qwen/qwen3-next-80b-a3b-instruct:free", "base_url": "https://openrouter.ai/api/v1"},
65
- {"name": "Gemma-3-27B", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "google/gemma-3-27b-it:free", "base_url": "https://openrouter.ai/api/v1"},
66
- {"name": "NVIDIA-Nemotron-Super", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "nvidia/nemotron-3-super-120b-a12b:free", "base_url": "https://openrouter.ai/api/v1"},
67
- {"name": "OpenRouter-FreeRouter", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "openrouter/free", "base_url": "https://openrouter.ai/api/v1"},
68
- {"name": "DeepSeek-R1", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "deepseek/deepseek-r1:free", "base_url": "https://openrouter.ai/api/v1"},
69
- {"name": "Gemini-Flash", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash", "alternatives": gemini_alternatives},
70
- {"name": "Groq", "key": "GROQ_API_KEY", "provider": "groq", "model_name": "llama-3.3-70b-versatile"},
71
- ]
72
-
73
- last_exception = None
74
- for i in range(start_tier, len(tiers_config)):
75
- tier = tiers_config[i]
76
- api_key = os.getenv(tier["key"])
77
- if not api_key:
78
- continue
79
-
80
- def create_model_instance(m_name, provider, b_url=None):
81
- if provider == "openai":
82
- from langchain_openai import ChatOpenAI
83
- return ChatOpenAI(model=m_name, openai_api_key=api_key, openai_api_base=b_url, temperature=0)
84
- elif provider == "google":
85
- from langchain_google_genai import ChatGoogleGenerativeAI
86
- return ChatGoogleGenerativeAI(model=m_name, temperature=0)
87
- elif provider == "groq":
88
- from langchain_groq import ChatGroq
89
- return ChatGroq(model=m_name, temperature=0, max_retries=2)
90
- return None
91
-
92
- primary_model = create_model_instance(tier["model_name"], tier["provider"], tier.get("base_url"))
93
- if use_tools:
94
- primary_model = primary_model.bind_tools(tools)
95
-
96
- models_to_try = [primary_model]
97
- if "alternatives" in tier:
98
- for alt_name in tier["alternatives"]:
99
- alt_model = create_model_instance(alt_name, tier["provider"], tier.get("base_url"))
100
- if use_tools:
101
- alt_model = alt_model.bind_tools(tools)
102
- models_to_try.append(alt_model)
103
-
104
- for current_model in models_to_try:
105
- try:
106
- model_name = getattr(current_model, "model", tier["name"])
107
- print(f"--- Calling {tier['name']} ({model_name}) ---")
108
- return current_model.invoke(msgs), i
109
- except Exception as e:
110
- err_str = str(e).lower()
111
- # If it's a 404 (not found) and we have more alternatives, continue to the next alternative
112
- if any(x in err_str for x in ["not_found", "404"]) and current_model != models_to_try[-1]:
113
- print(f"--- {tier['name']} model {model_name} not found. Trying alternative... ---")
114
- continue
115
-
116
- # Catch other fallback triggers
117
- if any(x in err_str for x in ["rate_limit", "429", "500", "503", "overloaded", "not_found", "404", "402", "credits", "decommissioned", "invalid_request_error"]):
118
- print(f"--- {tier['name']} Error: {e}. Trying next model/tier... ---")
119
- last_exception = e
120
- # If this tier has more alternatives, continue to the next one
121
- if current_model != models_to_try[-1]:
122
- continue
123
- break # Move to next tier
124
- raise e
125
-
126
- if last_exception:
127
- print("CRITICAL: All fallback tiers failed.")
128
- raise last_exception
129
- return None, 0
130
 
131
  @tool
132
  def web_search(keywords: str) -> str:
133
- """
134
- Uses duckduckgo to search the top 5 result on web
135
-
136
- Use cases:
137
- - Identify personal information
138
- - Information search
139
- - Finding organisation information
140
- - Obtain the latest news
141
-
142
- Args:
143
- keywords: keywords used to search the web
144
-
145
- Returns:
146
- Search result (Header + body + url)
147
- """
148
- max_retries = 3
149
- for attempt in range(max_retries):
150
- try:
151
- with DDGS() as ddgs:
152
- output = ""
153
- results = ddgs.text(keywords, max_results = 5)
154
- for result in results:
155
- output += f"Results: {result['title']}\n{result['body']}\n{result['href']}\n\n"
156
- return output
157
- except Exception as e:
158
- if attempt < max_retries - 1:
159
- time.sleep(2 ** attempt)
160
- continue
161
- return f"Search failed after {max_retries} attempts: {str(e)}"
162
 
163
- @tool
164
  def wiki_search(query: str) -> str:
165
- """
166
- Search Wikipedia for a query and return up to 3 results.
167
-
168
- Use cases:
169
- When the question requires the use of information from wikipedia
170
-
171
- Args:
172
- query: The search query
173
- """
174
-
175
- search_docs = WikipediaLoader(query=query, load_max_docs=3, doc_content_chars_max=15000).load()
176
-
177
- if not search_docs:
178
- return "No Wikipedia results found."
179
-
180
- formatted_search_docs = "\n\n---\n\n".join(
181
- [
182
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("title", "Unknown Title")}"/>\n{doc.page_content}\n</Document>'
183
- for doc in search_docs
184
- ])
185
- return formatted_search_docs
186
-
187
- def get_vision_models():
188
- """Returns a list of vision models to try, in order of preference."""
189
- configs = [
190
- {"name": "OpenRouter-Qwen3-VL", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "qwen/qwen3-vl-235b-thinking:free", "base_url": "https://openrouter.ai/api/v1"},
191
- {"name": "NVIDIA-Nemotron-VL", "key": "NVIDIA_API_KEY", "provider": "openai", "model_name": "nvidia/nemotron-nano-2-vl:free", "base_url": "https://integrate.api.nvidia.com/v1"},
192
- {"name": "OpenRouter-Gemma-3-27b-it", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "google/gemma-3-27b-it:free", "base_url": "https://openrouter.ai/api/v1"},
193
- {"name": "Google-Gemini-2.0-Flash", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash"},
194
- {"name": "Google-Gemini-Flash-Latest", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-flash-latest"},
195
- ]
196
- models = []
197
- for cfg in configs:
198
- api_key = os.getenv(cfg["key"])
199
- if not api_key:
200
- continue
201
- if cfg["provider"] == "openai":
202
- from langchain_openai import ChatOpenAI
203
- m = ChatOpenAI(model=cfg["model_name"], openai_api_key=api_key, openai_api_base=cfg.get("base_url"), temperature=0)
204
- elif cfg["provider"] == "google":
205
- from langchain_google_genai import ChatGoogleGenerativeAI
206
- m = ChatGoogleGenerativeAI(model=cfg["model_name"], temperature=0)
207
- elif cfg["provider"] == "groq":
208
- from langchain_groq import ChatGroq
209
- m = ChatGroq(model=cfg["model_name"], temperature=0)
210
- models.append({"name": cfg["name"], "model": m})
211
- return models
212
 
213
  @tool
214
- def analyze_image(image_path: str, question: str) -> str:
215
- """
216
- EXTERNAL SIGHT API: Sends an image path to a Vision Model to answer a specific question.
217
- YOU MUST CALL THIS TOOL ANY TIME an image (.png, .jpg, .jpeg) is attached to the prompt.
218
- NEVER claim you cannot see images. Use this tool instead.
219
-
220
- Args:
221
- image_path: The local path or URL to the image file.
222
- question: Specific question describing what you want the vision model to look for.
223
- """
224
  try:
225
- if not os.path.exists(image_path):
226
- return f"Error: Image file not found at {image_path}"
227
-
228
- # If it's a local file, we encode it to base64
229
- with open(image_path, "rb") as image_file:
230
- encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
231
-
232
- message = HumanMessage(
233
- content=[
234
- {"type": "text", "text": question},
235
- {
236
- "type": "image_url",
237
- "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"},
238
- },
239
- ]
240
- )
241
-
242
- vision_models = get_vision_models()
243
- if not vision_models:
244
- return "Error: No vision models configured (missing API keys)."
245
-
246
- last_err = None
247
- for item in vision_models:
248
- try:
249
- m_name = getattr(item['model'], 'model', 'unknown')
250
- print(f"--- Calling Vision Model: {item['name']} ({m_name}) ---")
251
- response = item['model'].invoke([message])
252
- return extract_text_from_content(response.content)
253
- except Exception as e:
254
- print(f"Vision Model {item['name']} failed.")
255
- traceback.print_exc()
256
- last_err = e
257
- return f"Error analyzing image: All vision models failed. Last error: {str(last_err)}"
258
  except Exception as e:
259
- traceback.print_exc()
260
- return f"Error reading/processing image: {str(e)}"
261
 
262
  @tool
263
- def analyze_audio(audio_path: str, question: str) -> str:
264
- """
265
- Transcribes an audio file (.mp3, .wav, .m4a) to answer questions about what is spoken.
266
-
267
- Args:
268
- audio_path: The local path to the audio file.
269
- question: The specific question to ask.
270
- """
271
  try:
272
- model = get_whisper()
273
- result = model.transcribe(audio_path)
274
- transcript = result["text"]
275
- return f"Audio Transcript:\n{transcript}"
 
 
 
 
 
276
  except Exception as e:
277
- return f"Error analyzing audio: {str(e)}. Tip: You requires 'ffmpeg' installed on your system."
278
 
279
  @tool
280
- def analyze_video(video_path: str, question: str) -> str:
281
- """
282
- EXTERNAL SIGHT/HEARING API: Sends a video file to an external Vision/Audio model.
283
- YOU MUST CALL THIS TOOL ANY TIME a video (.mp4, .avi) is attached to the prompt.
284
- NEVER claim you cannot analyze videos. Use this tool instead.
285
-
286
- Args:
287
- video_path: The local path to the video file.
288
- question: Specific question describing what you want to extract from the video.
289
- """
290
- if cv2 is None:
291
- return "Error: cv2 is not installed. Please install opencv-python."
292
-
293
- temp_dir = tempfile.gettempdir()
294
- downloaded_video = None
295
-
296
- try:
297
- # Check if video_path is a URL
298
- if video_path.startswith("http"):
299
- print(f"Downloading video from URL: {video_path}")
300
- downloaded_video = os.path.join(temp_dir, f"video_{int(time.time())}.mp4")
301
- try:
302
- # Use yt-dlp to download the video
303
- # Note: --ffmpeg-location could be used if we knew where it was, but we assume it's in path or missing
304
- subprocess.run(["yt-dlp", "-f", "best[ext=mp4]/mp4", "-o", downloaded_video, video_path], check=True, timeout=120)
305
- video_path = downloaded_video
306
- except Exception as e:
307
- return f"Error downloading video from URL: {str(e)}. Tip: Check if yt-dlp is installed and the URL is valid."
308
 
309
- # 1. Extract frames evenly spaced throughout the video
310
- cap = cv2.VideoCapture(video_path)
311
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
312
- if total_frames == 0:
313
- return "Error: Could not read video frames."
314
-
315
- # Take 5 frames as a summary
316
- frame_indices = [int(i * total_frames / 5) for i in range(5)]
317
- extracted_descriptions = []
318
 
319
- vision_models = get_vision_models()
320
- # Ensure Groq-Llama is at the front for video if preferred, but we'll use the default order for now.
321
 
322
- for idx_num, frame_idx in enumerate(frame_indices):
323
- cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
324
- ret, frame = cap.read()
325
- if ret:
326
- # Convert frame to base64
327
- _, buffer = cv2.imencode('.jpg', frame)
328
- encoded_image = base64.b64encode(buffer).decode('utf-8')
329
-
330
- # Ask a vision model to describe the frame (with fallback)
331
- msg = HumanMessage(
332
- content=[
333
- {"type": "text", "text": f"Describe what is happening in this video frame concisely. Focus on aspects related to: {question}"},
334
- {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}},
335
- ]
336
- )
337
-
338
- desc = "No description available."
339
- for item in vision_models:
340
- try:
341
- print(f"--- Calling Vision Model for Frame {idx_num+1}: {item['name']} ---")
342
- desc = item['model'].invoke([msg]).content
343
- break
344
- except Exception as e:
345
- print(f"Vision Model {item['name']} failed for frame: {e}")
346
- continue
347
-
348
- extracted_descriptions.append(f"Frame {idx_num + 1}: {desc}")
349
-
350
- cap.release()
351
-
352
- # 2. Compile the context for the agent
353
- video_context = "\n".join(extracted_descriptions)
354
 
355
- # 3. Transcribe audio if possible
356
  try:
357
- whisper_mod = get_whisper()
358
- trans_result = whisper_mod.transcribe(video_path)
359
- transcript = trans_result.get("text", "")
360
- if transcript.strip():
361
- video_context += f"\n\nVideo Audio Transcript:\n{transcript}"
362
- except Exception as e:
363
- video_context += f"\n\n(No audio transcript generated: {e})"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
 
365
- return f"Video Summary based on extracted frames and audio:\n{video_context}"
366
  except Exception as e:
367
- err_msg = str(e)
368
- if "No address associated with hostname" in err_msg or "Failed to resolve" in err_msg:
369
- return f"Error: The environment cannot access the internet (DNS failure). Please use 'web_search' or 'wiki_search' to find information about this video content instead of trying to download it."
370
- return f"Error analyzing video: {err_msg}"
371
- finally:
372
- if downloaded_video and os.path.exists(downloaded_video):
373
- try:
374
- os.remove(downloaded_video)
375
- except:
376
- pass
377
 
378
  @tool
379
- def read_url(url: str) -> str:
380
- """
381
- Reads and extracts text from a specific webpage URL.
382
- Use this if a web search snippet doesn't contain enough detail.
383
- """
384
  try:
385
- loader = WebBaseLoader(url)
386
- docs = loader.load()
387
- # Truncate to first 15000 characters to fit context
388
- if not docs:
389
- return "No content could be extracted from this URL."
390
- return docs[0].page_content[:15000]
391
  except Exception as e:
392
- return f"Error reading URL: {e}"
393
 
394
  @tool
395
- def run_python_script(code: str) -> str:
396
- """
397
- Executes a Python script locally and returns the stdout and stderr.
398
- Use this to perform complex math, data analysis (e.g. pandas), or file processing.
399
- When given a file path, you can write python code to read and analyze it.
400
- """
401
- with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
402
- f.write(code)
403
- temp_file_name = f.name
404
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  try:
406
- result = subprocess.run(
407
- ["python", temp_file_name],
408
- capture_output=True,
409
- text=True,
410
- timeout=60
411
- )
412
- os.remove(temp_file_name)
413
-
414
- output = result.stdout
415
- if result.stderr:
416
- output += f"\nErrors:\n{result.stderr}"
417
-
418
- return (output or "Script executed successfully with no output.")[:15000]
419
- except subprocess.TimeoutExpired:
420
- os.remove(temp_file_name)
421
- return "Script execution timed out after 60 seconds."
422
- except Exception as e:
423
- if os.path.exists(temp_file_name):
424
- os.remove(temp_file_name)
425
- return f"Failed to execute script: {str(e)}"
426
 
427
- @tool
428
- def read_document(file_path: str) -> str:
429
- """
430
- Reads the text contents of a local document (.txt, .csv, .json, .md).
431
- For binary files like .xlsx or .pdf, use run_python_script to process them instead.
432
- """
433
- try:
434
- with open(file_path, 'r', encoding='utf-8') as f:
435
- content = f.read()
436
- if len(content) > 15000:
437
- return content[:15000] + "... (truncated)"
438
- return content
 
 
 
439
  except Exception as e:
440
- return f"Error reading document: {str(e)}. Tip: You can try running a python script to read it!"
441
 
442
- system_prompt = """
443
- You are a helpful assistant tasked with answering questions using a set of tools.
444
- Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
445
- FINAL ANSWER: [YOUR FINAL ANSWER].
446
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
447
- Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
448
- """
449
 
450
  class AgentState(TypedDict):
451
  messages: List[Union[HumanMessage, AIMessage, SystemMessage]]
452
 
453
- def read_message(state: AgentState) -> AgentState:
454
- messages = state["messages"]
455
- print(f"Processing question: {messages[-1].content if messages else ''}")
456
- # Just pass the messages through to the next node
457
- return {"messages": messages}
458
-
459
- def restart_required(state: AgentState) -> AgentState:
460
- messages = state["messages"]
461
- print(f"Processing question: {messages[-1].content if messages else ''}")
462
- # Just pass the messages through to the next node
463
- return {"messages": messages}
464
-
465
- # def tool_message(state: AgentState) -> AgentState:
466
- # messages = state["messages"]
467
- # prompt = f"""
468
- # You are a GAIA question answering expert.
469
- # Your task is to decide whether to use a tool or not.
470
- # If you need to use a tool, answer ONLY:
471
- # CALL_TOOL: <your tool name>
472
- # If you do not need to use a tool, answer ONLY:
473
- # NO_TOOL
474
- # Here is the question:
475
- # {messages}
476
- # """
477
- # return {"messages": messages}
478
- # response = model_with_tools.invoke(prompt)
479
- # return {"messages": messages + [response]}
480
-
481
- # Augment the LLM with tools
482
- tools = [web_search, wiki_search, analyze_image, analyze_audio, analyze_video, read_url, run_python_script, read_document]
483
- tools_by_name = {tool.name: tool for tool in tools}
484
- def extract_text_from_content(content: Any) -> str:
485
- """Extracts a simple string from various possible AIMessage content formats."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
486
  if isinstance(content, str):
487
- return content
488
- if isinstance(content, list):
489
- text_parts = []
490
- for part in content:
491
- if isinstance(part, str):
492
- text_parts.append(part)
493
- elif isinstance(part, dict) and "text" in part:
494
- text_parts.append(part["text"])
495
- elif isinstance(part, dict) and "type" in part and part["type"] == "text":
496
- text_parts.append(part.get("text", ""))
497
- return "".join(text_parts)
 
 
 
 
 
 
 
498
  return str(content)
499
 
500
- def answer_message(state: AgentState) -> AgentState:
501
  messages = state["messages"]
502
- current_date = datetime.datetime.now().strftime("%Y-%m-%d")
503
 
504
- prompt = [SystemMessage(f"""
505
- You are a master of the GAIA benchmark, a general AI assistant designed to solve complex multi-step tasks.
506
- Think carefully and logically. Use your tools effectively. Use your internal monologue to plan your steps.
507
-
508
- TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
509
-
510
- CRITICAL RULES:
511
- 1. If you see a path like `[Attached File Local Path: ...]` followed by an image, video, or audio file, YOU MUST USE THE CORRESPONDING TOOL (analyze_image, analyze_video, analyze_audio) IMMEDIATELY in your next step.
512
- 2. Plan your steps ahead. 12 steps is your LIMIT for the reasoning loop, so make every step count.
513
- 3. If a tool fails (e.g., 429 or 402), the system will automatically try another model for you, so just keep going!
514
- 4. Be concise and accurate. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
515
- 5. CHAIN-OF-THOUGHT: For complex questions, show your reasoning step by step before giving the final answer.
516
- 6. USE TOOLS AGGRESSIVELY: If a question requires computation, file reading, or web search, use the appropriate tools - don't try to answer from memory.
517
- 7. VERIFY YOUR ANSWER: Double-check calculations and facts using tools when uncertain.
518
- """)]
519
- messages = prompt + messages
520
-
521
- # Force tool usage if image path is detected
522
- for msg in state["messages"]:
523
- if isinstance(msg, HumanMessage) and "[Attached File Local Path:" in msg.content:
524
- messages.append(HumanMessage(content="IMPORTANT: I see an image path in the message. I MUST call the analyze_image tool IMMEDIATELY in my next step to see it."))
525
-
526
- # Multi-step ReAct Loop (Up to 12 reasoning steps)
527
- max_steps = 12
528
- draft_response = None
529
- current_tier = 0
530
 
531
- for step in range(max_steps):
532
- if step > 0:
533
- time.sleep(3)
534
-
535
- print(f"--- ReAct Step {step + 1} ---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
 
537
- # Max history truncation to avoid 413 Request Too Large errors
538
- safe_messages = messages[:2] + messages[-6:] if len(messages) > 10 else messages
 
 
 
 
 
539
 
540
- ai_msg, current_tier = smart_invoke(safe_messages, use_tools=True, start_tier=current_tier)
541
- messages.append(ai_msg)
542
-
543
- # Check if the model requested tools
544
- tool_calls = getattr(ai_msg, "tool_calls", None) or []
545
- if not tool_calls:
546
- # Model decided it has enough info to answer
547
- draft_response = ai_msg
548
- print(f"Model found answer or stopped tools: {ai_msg.content}")
549
- break
550
-
551
- # Execute requested tools and append their text output into the conversation
552
- for tool_call in tool_calls:
553
- name = tool_call["name"]
554
- args = tool_call["args"]
555
- tool_call_id = tool_call.get("id")
556
- print(f"Calling tool: {name} with args: {args}")
557
- try:
558
- tool = tools_by_name[name]
559
- tool_result = tool.invoke(args)
560
- except Exception as e:
561
- tool_result = f"Error executing tool {name}: {str(e)}"
562
-
563
- # Using ToolMessage allows the model to map the result back perfectly to its request
564
- messages.append(ToolMessage(content=str(tool_result), tool_call_id=tool_call_id, name=name))
565
-
566
- # If we exhausted all steps without an answer, force a draft response
567
- if draft_response is None:
568
- print("Max reasoning steps reached. Forcing answer extraction.")
569
- forced_msg = HumanMessage(content="You have reached the maximum reasoning steps. Please provide your best final answer based on the current context without any more tool calls.")
570
- messages.append(forced_msg)
571
- draft_response, _ = smart_invoke(messages, use_tools=False)
572
-
573
- # Third pass: strict GAIA formatting extraction
574
- formatting_sys = SystemMessage(
575
- content=(
576
- "You are a strict output formatter for the GAIA benchmark. "
577
- "Given a verbose draft answer, extract ONLY the final exact answer required. "
578
- "Return nothing else. DO NOT include prefixes like 'The answer is'. "
579
- "Strip trailing whitespace only. "
580
- "If the answer is a number, just return the number. "
581
- "If the answer is a list or set of elements, return them as a COMMA-SEPARATED list (e.g., 'a, b, c'). "
582
- "Preserve necessary punctuation within answers (e.g., 'Dr. Smith' should keep the period)."
583
- )
584
- )
585
- final_response, _ = smart_invoke([formatting_sys, HumanMessage(content=extract_text_from_content(draft_response.content))], use_tools=False, start_tier=current_tier)
586
- print(f"Draft response: {draft_response.content}")
587
- print(f"Strict Final response: {final_response.content}")
588
-
589
- # Return messages including the final AIMessage so BasicAgent reads .content
590
- # Ensure final_response has string content for basic agents
591
- if not isinstance(final_response.content, str):
592
- final_response.content = extract_text_from_content(final_response.content)
593
 
594
- messages.append(draft_response)
595
- messages.append(final_response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
  return {"messages": messages}
597
 
598
-
599
  def build_graph():
600
- agent_graph = StateGraph(AgentState)
601
-
602
- # Add nodes
603
- agent_graph.add_node("read_message", read_message)
604
- agent_graph.add_node("answer_message", answer_message)
605
-
606
- # Add edges
607
- agent_graph.add_edge(START, "read_message")
608
- agent_graph.add_edge("read_message", "answer_message")
609
-
610
- # Final edge
611
- agent_graph.add_edge("answer_message", END)
612
-
613
- # Compile and return the executable graph for use in app.py
614
- compiled_graph = agent_graph.compile()
615
- return compiled_graph
 
1
  import os
2
+ import re
 
 
 
 
3
  import subprocess
4
  import tempfile
5
+ from pathlib import Path
6
+ from typing import TypedDict, List, Union
7
+
8
+ import pandas as pd
9
+ import fitz
 
 
 
10
  from ddgs import DDGS
11
  from dotenv import load_dotenv
12
+ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
13
+ from langchain_core.tools import tool
14
  from langchain_groq import ChatGroq
15
+ from langgraph.graph import StateGraph, START, END
16
+ from langchain_community.document_loaders import WikipediaLoader
17
  from langchain_community.document_loaders.image import UnstructuredImageLoader
 
 
 
 
 
 
 
18
 
19
+ load_dotenv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  @tool
22
  def web_search(keywords: str) -> str:
23
+ """Search the web."""
24
+ try:
25
+ with DDGS() as ddgs:
26
+ results = ddgs.text(keywords, max_results=5)
27
+ return "\n".join([f"{r['title']}: {r['body'][:300]}" for r in results]) or "NO_RESULTS"
28
+ except Exception as e:
29
+ return f"SEARCH_ERROR: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ @tool
32
  def wiki_search(query: str) -> str:
33
+ """Search Wikipedia."""
34
+ try:
35
+ docs = WikipediaLoader(query=query, load_max_docs=2).load()
36
+ return "\n".join([f"{d.metadata.get('title', 'Unknown')}: {d.page_content[:500]}" for d in docs]) or "NO_RESULTS"
37
+ except Exception as e:
38
+ return f"WIKI_ERROR: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  @tool
41
+ def read_file(path: str) -> str:
42
+ """Read a local file."""
43
+ if not path or not os.path.exists(path):
44
+ return "ERROR: File not found"
 
 
 
 
 
 
45
  try:
46
+ ext = os.path.splitext(path)[1].lower()
47
+ if ext in {".txt", ".md", ".py", ".json", ".csv"}:
48
+ with open(path, "r", encoding="utf-8", errors="replace") as f:
49
+ return f.read()[:15000]
50
+ if ext in {".xlsx", ".xls"}:
51
+ return pd.read_excel(path).to_csv(index=False)[:15000]
52
+ if ext == ".pdf":
53
+ doc = fitz.open(path)
54
+ return "\n".join([doc.load_page(i).get_text() for i in range(min(5, doc.page_count))])[:15000]
55
+ return f"Unsupported: {ext}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  except Exception as e:
57
+ return f"ERROR: {e}"
 
58
 
59
  @tool
60
+ def get_youtube_transcript(url: str) -> str:
61
+ """Get YouTube transcript."""
 
 
 
 
 
 
62
  try:
63
+ with tempfile.TemporaryDirectory() as tmp:
64
+ cmd = ["yt-dlp", "--skip-download", "--write-auto-subs", "--sub-lang", "en", "-o", f"{tmp}/video", url]
65
+ subprocess.run(cmd, capture_output=True, timeout=60)
66
+ vtt_files = list(Path(tmp).glob("*.vtt"))
67
+ if vtt_files:
68
+ content = vtt_files[0].read_text(encoding="utf-8", errors="replace")
69
+ lines = [l for l in content.splitlines() if l and not l.startswith(('<', '-->', 'WEBVTT')) and not l.isdigit()]
70
+ return "\n".join(lines)[:15000] or "NO_TRANSCRIPT"
71
+ return "NO_SUBTITLES"
72
  except Exception as e:
73
+ return f"TRANSCRIPT_ERROR: {e}"
74
 
75
  @tool
76
+ def reverse_text(text: str) -> str:
77
+ """Reverse the given text."""
78
+ return text[::-1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ @tool
81
+ def analyze_image(path: str) -> str:
82
+ """Analyze an image file and describe its contents."""
83
+ try:
84
+ from PIL import Image
85
+ import pytesseract
 
 
 
86
 
87
+ img = Image.open(path)
 
88
 
89
+ # Try OCR first
90
+ try:
91
+ text = pytesseract.image_to_string(img)
92
+ if text and len(text.strip()) > 10:
93
+ return f"OCR TEXT:\n{text[:2000]}"
94
+ except Exception as ocr_err:
95
+ print(f"OCR failed: {ocr_err}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ # Try detecting chess board pattern
98
  try:
99
+ import numpy as np
100
+ img_array = np.array(img)
101
+ if len(img_array.shape) == 3:
102
+ gray = np.mean(img_array, axis=2)
103
+ else:
104
+ gray = img_array
105
+
106
+ h, w = gray.shape
107
+ if h > 100 and w > 100:
108
+ corner_check = [
109
+ gray[50:100, 50:100].mean(),
110
+ gray[50:100, w-100:w-50].mean(),
111
+ gray[h-100:h-50, 50:100].mean(),
112
+ gray[h-100:h-50, w-100:w-50].mean()
113
+ ]
114
+ if min(corner_check) < 100 and max(corner_check) > 150:
115
+ return "Chess board detected. Cannot parse position without advanced computer vision."
116
+ except:
117
+ pass
118
+
119
+ desc = f"Image: {img.size[0]}x{img.size[1]}, Mode: {img.mode}"
120
+ if img.size[0] > 200 and img.size[1] > 200:
121
+ desc += "\nImage appears to be a photograph or diagram"
122
 
123
+ return desc
124
  except Exception as e:
125
+ return f"IMAGE_ERROR: {e}"
 
 
 
 
 
 
 
 
 
126
 
127
  @tool
128
+ def transcribe_audio(path: str) -> str:
129
+ """Transcribe audio file to text."""
 
 
 
130
  try:
131
+ import whisper
132
+ model = whisper.load_model("base")
133
+ result = model.transcribe(path)
134
+ return result["text"][:5000] or "NO_TRANSCRIPTION"
 
 
135
  except Exception as e:
136
+ return f"AUDIO_TRANSCRIPTION_ERROR: {e}"
137
 
138
  @tool
139
+ def analyze_counting_question(query: str, search_results: str) -> str:
140
+ """Analyze search results for counting/numerical questions."""
141
+ question_lower = query.lower()
142
+
143
+ # Determine what type of question it is
144
+ is_sum = 'sum' in question_lower or 'total' in question_lower
145
+ is_highest = 'highest' in question_lower or 'maximum' in question_lower or 'max' in question_lower
146
+ is_lowest = 'lowest' in question_lower or 'minimum' in question_lower or 'min' in question_lower
147
+ is_count = 'how many' in question_lower or 'number of' in question_lower
148
+
149
+ year_match = re.search(r'(\d{4})\s*[-–to]+\s*(\d{4})', query)
150
+ years = year_match.groups() if year_match else None
151
+
152
+ year_instruction = ""
153
+ if years:
154
+ year_instruction = f"""
155
+ YEAR FILTER: The question asks for items between {years[0]} and {years[1]} (inclusive).
156
+ - Only count items with years clearly in this range"""
157
+
158
+ question_type = ""
159
+ if is_sum:
160
+ question_type = "SUMMATION: Add up all the numbers found."
161
+ elif is_highest:
162
+ question_type = "HIGHEST: Find the maximum/largest number."
163
+ elif is_lowest:
164
+ question_type = "LOWEST: Find the minimum/smallest number."
165
+ elif is_count:
166
+ question_type = "COUNT: Carefully count items matching the criteria."
167
+
168
  try:
169
+ prompt = f"""Analyze these search results to answer a numerical question.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
+ QUESTION: {query}
172
+ SEARCH RESULTS:
173
+ {search_results[:3000]}
174
+ {year_instruction}
175
+
176
+ TASK: {question_type}
177
+ 1. Extract relevant data from the search results
178
+ 2. Be precise about year filters if applicable
179
+ 3. Calculate the answer
180
+ 4. Provide your answer as JUST a number
181
+
182
+ FINAL ANSWER: """
183
+
184
+ response = _invoke_llm([HumanMessage(content=prompt)])
185
+ return response.content if hasattr(response, 'content') else str(response)
186
  except Exception as e:
187
+ return f"ANALYSIS_ERROR: {e}"
188
 
189
+ tools = [web_search, wiki_search, read_file, get_youtube_transcript, reverse_text, analyze_image, transcribe_audio, analyze_counting_question]
190
+ tools_by_name = {t.name: t for t in tools}
 
 
 
 
 
191
 
192
  class AgentState(TypedDict):
193
  messages: List[Union[HumanMessage, AIMessage, SystemMessage]]
194
 
195
+ def _invoke_llm(messages, fallback_count=0):
196
+ # Try Groq first
197
+ try:
198
+ model = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)
199
+ return model.invoke(messages)
200
+ except Exception as e:
201
+ if "rate limit" in str(e).lower() or "429" in str(e):
202
+ # Try OpenRouter fallback
203
+ try:
204
+ from langchain_openai import ChatOpenAI
205
+ import os
206
+ from dotenv import load_dotenv
207
+ load_dotenv()
208
+
209
+ model = ChatOpenAI(
210
+ model="openrouter/mistralai/mistral-small",
211
+ openai_api_base="https://openrouter.ai/api/v1",
212
+ openai_api_key=os.getenv("OPENROUTER_API_KEY"),
213
+ temperature=0
214
+ )
215
+ return model.invoke(messages)
216
+ except Exception as fe:
217
+ print(f"Fallback failed: {fe}")
218
+ if fallback_count < 2:
219
+ import time
220
+ wait_time = 60
221
+ print(f"Rate limited, waiting {wait_time}s...")
222
+ time.sleep(wait_time)
223
+ return _invoke_llm(messages, fallback_count + 1)
224
+ print(f"LLM Error: {e}")
225
+ return type('obj', (object,), {'content': 'ERROR: ' + str(e)})()
226
+
227
+ def extract_numbers_from_text(text: str) -> List[str]:
228
+ """Extract all numbers from text that could be answers."""
229
+ patterns = [
230
+ r'(\d+)\s+(?:albums?|songs?|items?|years?|times?|players?|medals?|athletes?|votes?)',
231
+ r'(?:total|count|number)[:\s]+(\d+)',
232
+ r'(?:^|\s)(\d+)(?:\s|$|\.)',
233
+ r'(\d{4})\s*[-–]\s*(\d{4})',
234
+ ]
235
+ numbers = []
236
+ for pattern in patterns:
237
+ matches = re.findall(pattern, text, re.I | re.M)
238
+ numbers.extend(matches)
239
+ return list(set(numbers))
240
+
241
+ def is_counting_question(question: str) -> bool:
242
+ """Check if the question is asking for a count."""
243
+ question_lower = question.lower()
244
+ count_phrases = ['how many', 'number of', 'count', 'total']
245
+ return any(phrase in question_lower for phrase in count_phrases)
246
+
247
+ def is_reversed_text(question: str) -> bool:
248
+ """Check if text appears to be reversed."""
249
+ words = question.split()
250
+ if len(words) < 3:
251
+ return False
252
+ # Check if reversing makes it readable
253
+ reversed_test = question[::-1]
254
+ # Check if reversed version has more valid words
255
+ orig_words = set(w.lower() for w in words if len(w) > 3)
256
+ rev_words = set(w.lower() for w in reversed_test.split() if len(w) > 3)
257
+ # Simple heuristic: if reversed has valid common words, it's reversed
258
+ common_words = {'the', 'is', 'in', 'of', 'and', 'what', 'how', 'for', 'with', 'from', 'this', 'that'}
259
+ orig_valid = len([w for w in orig_words if w in common_words])
260
+ rev_valid = len([w for w in rev_words if w in common_words])
261
+ return rev_valid > orig_valid
262
+
263
+ def extract_answer(content) -> str:
264
  if isinstance(content, str):
265
+ # Look for FINAL ANSWER: pattern first
266
+ match = re.search(r'FINAL ANSWER:\s*(.+?)(?:\n|$)', content, re.IGNORECASE)
267
+ if match:
268
+ answer = match.group(1).strip()
269
+ # Extract just the number if it looks like "3" at the end
270
+ num_match = re.search(r'(\d+)\s*$', answer)
271
+ if num_match:
272
+ return num_match.group(1)
273
+ return answer
274
+ # Try to find answer at end
275
+ match = re.search(r'(\d+)\s*$', content.strip())
276
+ if match:
277
+ return match.group(1)
278
+ # Return first short sentence
279
+ sentences = content.split('.')
280
+ if sentences and len(sentences[0].strip()) < 50:
281
+ return sentences[0].strip()
282
+ return content.strip()[:100]
283
  return str(content)
284
 
285
+ def answer_question(state: AgentState) -> AgentState:
286
  messages = state["messages"]
287
+ user_msg = messages[-1].content if messages else ""
288
 
289
+ # Pre-process: detect and fix reversed text
290
+ if is_reversed_text(user_msg):
291
+ fixed_msg = user_msg[::-1]
292
+ messages.append(HumanMessage(content=f"ORIGINAL (REVERSED): {user_msg}\nFIXED: {fixed_msg}"))
293
+ user_msg = fixed_msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
+ # Pre-process: check for attached file
296
+ file_match = re.search(r"\[Attached File Local Path:\s*(.+?)\]", user_msg)
297
+ if file_match:
298
+ file_path = file_match.group(1).strip()
299
+ try:
300
+ ext = os.path.splitext(file_path)[1].lower()
301
+ if ext in {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff"}:
302
+ file_text = analyze_image.invoke({"path": file_path})
303
+ elif ext in {".mp3", ".wav", ".m4a", ".flac", ".ogg"}:
304
+ file_text = transcribe_audio.invoke({"path": file_path})
305
+ else:
306
+ file_text = read_file.invoke({"path": file_path})
307
+ messages.append(HumanMessage(content=f"FILE CONTENT:\n{file_text}"))
308
+ except Exception as e:
309
+ messages.append(HumanMessage(content=f"FILE ERROR: {e}"))
310
+
311
+ # Pre-process: check for YouTube
312
+ yt_match = re.search(r"(youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]+)", user_msg)
313
+ if yt_match:
314
+ video_id = yt_match.group(2)
315
+ url = f"https://www.youtube.com/watch?v={video_id}"
316
 
317
+ # Try transcript first
318
+ try:
319
+ transcript = get_youtube_transcript.invoke({"url": url})
320
+ if transcript and transcript != "NO_SUBTITLES" and "ERROR" not in transcript:
321
+ messages.append(HumanMessage(content=f"YOUTUBE TRANSCRIPT:\n{transcript}"))
322
+ except Exception as e:
323
+ messages.append(HumanMessage(content=f"YOUTUBE ERROR: {e}"))
324
 
325
+ # Search for video content on web
326
+ try:
327
+ yt_search = web_search.invoke({"keywords": f"youtube video {video_id} transcript or script"})
328
+ messages.append(HumanMessage(content=f"YOUTUBE SEARCH:\n{yt_search}"))
329
+ except:
330
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
+ # Also search for the video topic
333
+ try:
334
+ topic_search = web_search.invoke({"keywords": f'"{video_id}" youtube video content'})
335
+ messages.append(HumanMessage(content=f"VIDEO CONTENT:\n{topic_search}"))
336
+ except:
337
+ pass
338
+
339
+ # Do web and wiki searches
340
+ # For Wikipedia questions, use more targeted search
341
+ if "wikipedia" in user_msg.lower() and "featured article" in user_msg.lower():
342
+ try:
343
+ # Extract key terms from Wikipedia question
344
+ search_terms = []
345
+ if "dinosaur" in user_msg.lower():
346
+ search_terms.append('"FunkMonk" Wikipedia featured article dinosaur')
347
+ if "november 2016" in user_msg.lower():
348
+ search_terms.append("Featured Article dinosaur November 2016 nomination")
349
+
350
+ for term in search_terms:
351
+ try:
352
+ result = web_search.invoke({"keywords": term})
353
+ messages.append(HumanMessage(content=f"WIKI SEARCH {term}:\n{result}"))
354
+ except:
355
+ pass
356
+ except Exception as e:
357
+ messages.append(HumanMessage(content=f"WIKI SEARCH ERROR: {e}"))
358
+
359
+ try:
360
+ search_result = web_search.invoke({"keywords": user_msg[:200]})
361
+ messages.append(HumanMessage(content=f"WEB SEARCH:\n{search_result}"))
362
+ except Exception as e:
363
+ messages.append(HumanMessage(content=f"WEB SEARCH ERROR: {e}"))
364
+
365
+ # Do wiki search if not already done
366
+ if "wikipedia" not in user_msg.lower():
367
+ try:
368
+ wiki_result = wiki_search.invoke({"query": user_msg[:100]})
369
+ messages.append(HumanMessage(content=f"WIKIPEDIA:\n{wiki_result}"))
370
+ except Exception as e:
371
+ messages.append(HumanMessage(content=f"WIKIPEDIA ERROR: {e}"))
372
+
373
+ # Collect all search results for analysis
374
+ all_search_results = ""
375
+ for msg in messages:
376
+ if hasattr(msg, 'content') and isinstance(msg.content, str):
377
+ if msg.content.startswith(("WEB SEARCH:", "WIKIPEDIA:", "YOUTUBE", "FILE")):
378
+ all_search_results += msg.content + "\n"
379
+ # Also check for "no results" messages
380
+ elif "no search results" in msg.content.lower():
381
+ all_search_results += msg.content + "\n"
382
+
383
+ # If no useful search results at all, do a fallback web search
384
+ if not all_search_results.strip() or "no search results" in all_search_results.lower():
385
+ try:
386
+ fallback = web_search.invoke({"keywords": user_msg[:200]})
387
+ all_search_results = f"WEB SEARCH:\n{fallback}"
388
+ messages.append(HumanMessage(content=all_search_results))
389
+ except:
390
+ pass
391
+
392
+ # For counting questions, use specialized analysis tool
393
+ is_count = is_counting_question(user_msg)
394
+ if is_count:
395
+ try:
396
+ analysis_result = analyze_counting_question.invoke({
397
+ "query": user_msg,
398
+ "search_results": all_search_results
399
+ })
400
+ messages.append(HumanMessage(content=f"COUNTING ANALYSIS:\n{analysis_result}"))
401
+ final_answer = extract_answer(analysis_result)
402
+ messages.append(HumanMessage(content=final_answer))
403
+ return {"messages": messages}
404
+ except Exception as e:
405
+ messages.append(HumanMessage(content=f"ANALYSIS ERROR: {e}"))
406
+
407
+ # Build prompt for non-counting questions
408
+ prompt = SystemMessage(content="""Answer question based on search results. Format: FINAL ANSWER: answer""")
409
+
410
+ # Get answer
411
+ try:
412
+ response = _invoke_llm([prompt, HumanMessage(content=f"Question: {user_msg}\n\nSearch results:\n{all_search_results[:6000]}\n\nAnswer:")])
413
+ messages.append(response)
414
+ except Exception as e:
415
+ messages.append(HumanMessage(content=f"LLM ERROR: {e}"))
416
+
417
+ # Get answer
418
+ try:
419
+ response = _invoke_llm([prompt, HumanMessage(content="Use the search results above to answer: " + user_msg)])
420
+ messages.append(response)
421
+ except Exception as e:
422
+ messages.append(HumanMessage(content=f"LLM ERROR: {e}"))
423
+
424
+ # Extract final answer
425
+ final_answer = extract_answer(getattr(response, 'content', str(response)))
426
+ messages.append(HumanMessage(content=final_answer))
427
+
428
  return {"messages": messages}
429
 
 
430
  def build_graph():
431
+ g = StateGraph(AgentState)
432
+ g.add_node("answer", answer_question)
433
+ g.add_edge(START, "answer")
434
+ g.add_edge("answer", END)
435
+ return g.compile()
 
 
 
 
 
 
 
 
 
 
 
agent_old.py ADDED
@@ -0,0 +1,615 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import requests
4
+ import json
5
+ import traceback
6
+ import datetime
7
+ import subprocess
8
+ import tempfile
9
+ import time
10
+ from typing import TypedDict, List, Dict, Any, Optional, Union
11
+ from langchain_core import tools
12
+ from langgraph.graph import StateGraph, START, END
13
+ from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline
14
+ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
15
+ from langchain_core.tools import tool
16
+ from langchain_community.document_loaders import WikipediaLoader
17
+ from ddgs import DDGS
18
+ from dotenv import load_dotenv
19
+ from groq import Groq
20
+ from langchain_groq import ChatGroq
21
+ from langchain_community.document_loaders.image import UnstructuredImageLoader
22
+ from langchain_community.document_loaders import WebBaseLoader
23
+ from langchain_google_genai import ChatGoogleGenerativeAI
24
+
25
+ try:
26
+ import cv2
27
+ except ImportError:
28
+ cv2 = None
29
+
30
+ # os.environ["USER_AGENT"] = "gaia-agent/1.0"
31
+
32
+ whisper_model = None
33
+ def get_whisper():
34
+ global whisper_model
35
+ if whisper_model is None:
36
+ import whisper
37
+ # Lazy load the smallest, fastest model
38
+ whisper_model = whisper.load_model("base")
39
+ return whisper_model
40
+
41
+ load_dotenv(override=True)
42
+
43
+ # Base Hugging Face LLM used by the chat wrapper
44
+ # base_llm = HuggingFaceEndpoint(
45
+ # repo_id="openai/gpt-oss-20b:hyperbolic",
46
+ # # deepseek-ai/DeepSeek-OCR:novita
47
+ # task="text-generation",
48
+ # temperature=0.0,
49
+ # huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
50
+ # )
51
+
52
+ # Model initializations moved to smart_invoke for lazy loading to prevent import errors if keys are missing.
53
+
54
+ def smart_invoke(msgs, use_tools=False, start_tier=0):
55
+ """
56
+ Tiered fallback: OpenRouter -> Gemini -> Groq -> NVIDIA -> Vercel.
57
+ Retries next tier if a 429 (rate limit), 402 (credits), or 404 (model found) error occurs.
58
+ """
59
+
60
+ # Adaptive Gemini names verified via list_models (REST API)
61
+ gemini_alternatives = ["gemini-2.5-flash", "gemini-2.0-flash", "gemini-flash-latest", "gemini-pro-latest"]
62
+
63
+ tiers_config = [
64
+ {"name": "Qwen3-Next-80B", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "qwen/qwen3-next-80b-a3b-instruct:free", "base_url": "https://openrouter.ai/api/v1"},
65
+ {"name": "Gemma-3-27B", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "google/gemma-3-27b-it:free", "base_url": "https://openrouter.ai/api/v1"},
66
+ {"name": "NVIDIA-Nemotron-Super", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "nvidia/nemotron-3-super-120b-a12b:free", "base_url": "https://openrouter.ai/api/v1"},
67
+ {"name": "OpenRouter-FreeRouter", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "openrouter/free", "base_url": "https://openrouter.ai/api/v1"},
68
+ {"name": "DeepSeek-R1", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "deepseek/deepseek-r1:free", "base_url": "https://openrouter.ai/api/v1"},
69
+ {"name": "Gemini-Flash", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash", "alternatives": gemini_alternatives},
70
+ {"name": "Groq", "key": "GROQ_API_KEY", "provider": "groq", "model_name": "llama-3.3-70b-versatile"},
71
+ ]
72
+
73
+ last_exception = None
74
+ for i in range(start_tier, len(tiers_config)):
75
+ tier = tiers_config[i]
76
+ api_key = os.getenv(tier["key"])
77
+ if not api_key:
78
+ continue
79
+
80
+ def create_model_instance(m_name, provider, b_url=None):
81
+ if provider == "openai":
82
+ from langchain_openai import ChatOpenAI
83
+ return ChatOpenAI(model=m_name, openai_api_key=api_key, openai_api_base=b_url, temperature=0)
84
+ elif provider == "google":
85
+ from langchain_google_genai import ChatGoogleGenerativeAI
86
+ return ChatGoogleGenerativeAI(model=m_name, temperature=0)
87
+ elif provider == "groq":
88
+ from langchain_groq import ChatGroq
89
+ return ChatGroq(model=m_name, temperature=0, max_retries=2)
90
+ return None
91
+
92
+ primary_model = create_model_instance(tier["model_name"], tier["provider"], tier.get("base_url"))
93
+ if use_tools:
94
+ primary_model = primary_model.bind_tools(tools)
95
+
96
+ models_to_try = [primary_model]
97
+ if "alternatives" in tier:
98
+ for alt_name in tier["alternatives"]:
99
+ alt_model = create_model_instance(alt_name, tier["provider"], tier.get("base_url"))
100
+ if use_tools:
101
+ alt_model = alt_model.bind_tools(tools)
102
+ models_to_try.append(alt_model)
103
+
104
+ for current_model in models_to_try:
105
+ try:
106
+ model_name = getattr(current_model, "model", tier["name"])
107
+ print(f"--- Calling {tier['name']} ({model_name}) ---")
108
+ return current_model.invoke(msgs), i
109
+ except Exception as e:
110
+ err_str = str(e).lower()
111
+ # If it's a 404 (not found) and we have more alternatives, continue to the next alternative
112
+ if any(x in err_str for x in ["not_found", "404"]) and current_model != models_to_try[-1]:
113
+ print(f"--- {tier['name']} model {model_name} not found. Trying alternative... ---")
114
+ continue
115
+
116
+ # Catch other fallback triggers
117
+ if any(x in err_str for x in ["rate_limit", "429", "500", "503", "overloaded", "not_found", "404", "402", "credits", "decommissioned", "invalid_request_error"]):
118
+ print(f"--- {tier['name']} Error: {e}. Trying next model/tier... ---")
119
+ last_exception = e
120
+ # If this tier has more alternatives, continue to the next one
121
+ if current_model != models_to_try[-1]:
122
+ continue
123
+ break # Move to next tier
124
+ raise e
125
+
126
+ if last_exception:
127
+ print("CRITICAL: All fallback tiers failed.")
128
+ raise last_exception
129
+ return None, 0
130
+
131
+ @tool
132
+ def web_search(keywords: str) -> str:
133
+ """
134
+ Uses duckduckgo to search the top 5 result on web
135
+
136
+ Use cases:
137
+ - Identify personal information
138
+ - Information search
139
+ - Finding organisation information
140
+ - Obtain the latest news
141
+
142
+ Args:
143
+ keywords: keywords used to search the web
144
+
145
+ Returns:
146
+ Search result (Header + body + url)
147
+ """
148
+ max_retries = 3
149
+ for attempt in range(max_retries):
150
+ try:
151
+ with DDGS() as ddgs:
152
+ output = ""
153
+ results = ddgs.text(keywords, max_results = 5)
154
+ for result in results:
155
+ output += f"Results: {result['title']}\n{result['body']}\n{result['href']}\n\n"
156
+ return output
157
+ except Exception as e:
158
+ if attempt < max_retries - 1:
159
+ time.sleep(2 ** attempt)
160
+ continue
161
+ return f"Search failed after {max_retries} attempts: {str(e)}"
162
+
163
+ @tool
164
+ def wiki_search(query: str) -> str:
165
+ """
166
+ Search Wikipedia for a query and return up to 3 results.
167
+
168
+ Use cases:
169
+ When the question requires the use of information from wikipedia
170
+
171
+ Args:
172
+ query: The search query
173
+ """
174
+
175
+ search_docs = WikipediaLoader(query=query, load_max_docs=3, doc_content_chars_max=15000).load()
176
+
177
+ if not search_docs:
178
+ return "No Wikipedia results found."
179
+
180
+ formatted_search_docs = "\n\n---\n\n".join(
181
+ [
182
+ f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("title", "Unknown Title")}"/>\n{doc.page_content}\n</Document>'
183
+ for doc in search_docs
184
+ ])
185
+ return formatted_search_docs
186
+
187
+ def get_vision_models():
188
+ """Returns a list of vision models to try, in order of preference."""
189
+ configs = [
190
+ {"name": "OpenRouter-Qwen3-VL", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "qwen/qwen3-vl-235b-thinking:free", "base_url": "https://openrouter.ai/api/v1"},
191
+ {"name": "NVIDIA-Nemotron-VL", "key": "NVIDIA_API_KEY", "provider": "openai", "model_name": "nvidia/nemotron-nano-2-vl:free", "base_url": "https://integrate.api.nvidia.com/v1"},
192
+ {"name": "OpenRouter-Gemma-3-27b-it", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "google/gemma-3-27b-it:free", "base_url": "https://openrouter.ai/api/v1"},
193
+ {"name": "Google-Gemini-2.0-Flash", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash"},
194
+ {"name": "Google-Gemini-Flash-Latest", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-flash-latest"},
195
+ ]
196
+ models = []
197
+ for cfg in configs:
198
+ api_key = os.getenv(cfg["key"])
199
+ if not api_key:
200
+ continue
201
+ if cfg["provider"] == "openai":
202
+ from langchain_openai import ChatOpenAI
203
+ m = ChatOpenAI(model=cfg["model_name"], openai_api_key=api_key, openai_api_base=cfg.get("base_url"), temperature=0)
204
+ elif cfg["provider"] == "google":
205
+ from langchain_google_genai import ChatGoogleGenerativeAI
206
+ m = ChatGoogleGenerativeAI(model=cfg["model_name"], temperature=0)
207
+ elif cfg["provider"] == "groq":
208
+ from langchain_groq import ChatGroq
209
+ m = ChatGroq(model=cfg["model_name"], temperature=0)
210
+ models.append({"name": cfg["name"], "model": m})
211
+ return models
212
+
213
+ @tool
214
+ def analyze_image(image_path: str, question: str) -> str:
215
+ """
216
+ EXTERNAL SIGHT API: Sends an image path to a Vision Model to answer a specific question.
217
+ YOU MUST CALL THIS TOOL ANY TIME an image (.png, .jpg, .jpeg) is attached to the prompt.
218
+ NEVER claim you cannot see images. Use this tool instead.
219
+
220
+ Args:
221
+ image_path: The local path or URL to the image file.
222
+ question: Specific question describing what you want the vision model to look for.
223
+ """
224
+ try:
225
+ if not os.path.exists(image_path):
226
+ return f"Error: Image file not found at {image_path}"
227
+
228
+ # If it's a local file, we encode it to base64
229
+ with open(image_path, "rb") as image_file:
230
+ encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
231
+
232
+ message = HumanMessage(
233
+ content=[
234
+ {"type": "text", "text": question},
235
+ {
236
+ "type": "image_url",
237
+ "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"},
238
+ },
239
+ ]
240
+ )
241
+
242
+ vision_models = get_vision_models()
243
+ if not vision_models:
244
+ return "Error: No vision models configured (missing API keys)."
245
+
246
+ last_err = None
247
+ for item in vision_models:
248
+ try:
249
+ m_name = getattr(item['model'], 'model', 'unknown')
250
+ print(f"--- Calling Vision Model: {item['name']} ({m_name}) ---")
251
+ response = item['model'].invoke([message])
252
+ return extract_text_from_content(response.content)
253
+ except Exception as e:
254
+ print(f"Vision Model {item['name']} failed.")
255
+ traceback.print_exc()
256
+ last_err = e
257
+ return f"Error analyzing image: All vision models failed. Last error: {str(last_err)}"
258
+ except Exception as e:
259
+ traceback.print_exc()
260
+ return f"Error reading/processing image: {str(e)}"
261
+
262
+ @tool
263
+ def analyze_audio(audio_path: str, question: str) -> str:
264
+ """
265
+ Transcribes an audio file (.mp3, .wav, .m4a) to answer questions about what is spoken.
266
+
267
+ Args:
268
+ audio_path: The local path to the audio file.
269
+ question: The specific question to ask.
270
+ """
271
+ try:
272
+ model = get_whisper()
273
+ result = model.transcribe(audio_path)
274
+ transcript = result["text"]
275
+ return f"Audio Transcript:\n{transcript}"
276
+ except Exception as e:
277
+ return f"Error analyzing audio: {str(e)}. Tip: You requires 'ffmpeg' installed on your system."
278
+
279
+ @tool
280
+ def analyze_video(video_path: str, question: str) -> str:
281
+ """
282
+ EXTERNAL SIGHT/HEARING API: Sends a video file to an external Vision/Audio model.
283
+ YOU MUST CALL THIS TOOL ANY TIME a video (.mp4, .avi) is attached to the prompt.
284
+ NEVER claim you cannot analyze videos. Use this tool instead.
285
+
286
+ Args:
287
+ video_path: The local path to the video file.
288
+ question: Specific question describing what you want to extract from the video.
289
+ """
290
+ if cv2 is None:
291
+ return "Error: cv2 is not installed. Please install opencv-python."
292
+
293
+ temp_dir = tempfile.gettempdir()
294
+ downloaded_video = None
295
+
296
+ try:
297
+ # Check if video_path is a URL
298
+ if video_path.startswith("http"):
299
+ print(f"Downloading video from URL: {video_path}")
300
+ downloaded_video = os.path.join(temp_dir, f"video_{int(time.time())}.mp4")
301
+ try:
302
+ # Use yt-dlp to download the video
303
+ # Note: --ffmpeg-location could be used if we knew where it was, but we assume it's in path or missing
304
+ subprocess.run(["yt-dlp", "-f", "best[ext=mp4]/mp4", "-o", downloaded_video, video_path], check=True, timeout=120)
305
+ video_path = downloaded_video
306
+ except Exception as e:
307
+ return f"Error downloading video from URL: {str(e)}. Tip: Check if yt-dlp is installed and the URL is valid."
308
+
309
+ # 1. Extract frames evenly spaced throughout the video
310
+ cap = cv2.VideoCapture(video_path)
311
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
312
+ if total_frames == 0:
313
+ return "Error: Could not read video frames."
314
+
315
+ # Take 5 frames as a summary
316
+ frame_indices = [int(i * total_frames / 5) for i in range(5)]
317
+ extracted_descriptions = []
318
+
319
+ vision_models = get_vision_models()
320
+ # Ensure Groq-Llama is at the front for video if preferred, but we'll use the default order for now.
321
+
322
+ for idx_num, frame_idx in enumerate(frame_indices):
323
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
324
+ ret, frame = cap.read()
325
+ if ret:
326
+ # Convert frame to base64
327
+ _, buffer = cv2.imencode('.jpg', frame)
328
+ encoded_image = base64.b64encode(buffer).decode('utf-8')
329
+
330
+ # Ask a vision model to describe the frame (with fallback)
331
+ msg = HumanMessage(
332
+ content=[
333
+ {"type": "text", "text": f"Describe what is happening in this video frame concisely. Focus on aspects related to: {question}"},
334
+ {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}},
335
+ ]
336
+ )
337
+
338
+ desc = "No description available."
339
+ for item in vision_models:
340
+ try:
341
+ print(f"--- Calling Vision Model for Frame {idx_num+1}: {item['name']} ---")
342
+ desc = item['model'].invoke([msg]).content
343
+ break
344
+ except Exception as e:
345
+ print(f"Vision Model {item['name']} failed for frame: {e}")
346
+ continue
347
+
348
+ extracted_descriptions.append(f"Frame {idx_num + 1}: {desc}")
349
+
350
+ cap.release()
351
+
352
+ # 2. Compile the context for the agent
353
+ video_context = "\n".join(extracted_descriptions)
354
+
355
+ # 3. Transcribe audio if possible
356
+ try:
357
+ whisper_mod = get_whisper()
358
+ trans_result = whisper_mod.transcribe(video_path)
359
+ transcript = trans_result.get("text", "")
360
+ if transcript.strip():
361
+ video_context += f"\n\nVideo Audio Transcript:\n{transcript}"
362
+ except Exception as e:
363
+ video_context += f"\n\n(No audio transcript generated: {e})"
364
+
365
+ return f"Video Summary based on extracted frames and audio:\n{video_context}"
366
+ except Exception as e:
367
+ err_msg = str(e)
368
+ if "No address associated with hostname" in err_msg or "Failed to resolve" in err_msg:
369
+ return f"Error: The environment cannot access the internet (DNS failure). Please use 'web_search' or 'wiki_search' to find information about this video content instead of trying to download it."
370
+ return f"Error analyzing video: {err_msg}"
371
+ finally:
372
+ if downloaded_video and os.path.exists(downloaded_video):
373
+ try:
374
+ os.remove(downloaded_video)
375
+ except:
376
+ pass
377
+
378
+ @tool
379
+ def read_url(url: str) -> str:
380
+ """
381
+ Reads and extracts text from a specific webpage URL.
382
+ Use this if a web search snippet doesn't contain enough detail.
383
+ """
384
+ try:
385
+ loader = WebBaseLoader(url)
386
+ docs = loader.load()
387
+ # Truncate to first 15000 characters to fit context
388
+ if not docs:
389
+ return "No content could be extracted from this URL."
390
+ return docs[0].page_content[:15000]
391
+ except Exception as e:
392
+ return f"Error reading URL: {e}"
393
+
394
+ @tool
395
+ def run_python_script(code: str) -> str:
396
+ """
397
+ Executes a Python script locally and returns the stdout and stderr.
398
+ Use this to perform complex math, data analysis (e.g. pandas), or file processing.
399
+ When given a file path, you can write python code to read and analyze it.
400
+ """
401
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
402
+ f.write(code)
403
+ temp_file_name = f.name
404
+
405
+ try:
406
+ result = subprocess.run(
407
+ ["python", temp_file_name],
408
+ capture_output=True,
409
+ text=True,
410
+ timeout=60
411
+ )
412
+ os.remove(temp_file_name)
413
+
414
+ output = result.stdout
415
+ if result.stderr:
416
+ output += f"\nErrors:\n{result.stderr}"
417
+
418
+ return (output or "Script executed successfully with no output.")[:15000]
419
+ except subprocess.TimeoutExpired:
420
+ os.remove(temp_file_name)
421
+ return "Script execution timed out after 60 seconds."
422
+ except Exception as e:
423
+ if os.path.exists(temp_file_name):
424
+ os.remove(temp_file_name)
425
+ return f"Failed to execute script: {str(e)}"
426
+
427
+ @tool
428
+ def read_document(file_path: str) -> str:
429
+ """
430
+ Reads the text contents of a local document (.txt, .csv, .json, .md).
431
+ For binary files like .xlsx or .pdf, use run_python_script to process them instead.
432
+ """
433
+ try:
434
+ with open(file_path, 'r', encoding='utf-8') as f:
435
+ content = f.read()
436
+ if len(content) > 15000:
437
+ return content[:15000] + "... (truncated)"
438
+ return content
439
+ except Exception as e:
440
+ return f"Error reading document: {str(e)}. Tip: You can try running a python script to read it!"
441
+
442
+ system_prompt = """
443
+ You are a helpful assistant tasked with answering questions using a set of tools.
444
+ Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
445
+ FINAL ANSWER: [YOUR FINAL ANSWER].
446
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
447
+ Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
448
+ """
449
+
450
+ class AgentState(TypedDict):
451
+ messages: List[Union[HumanMessage, AIMessage, SystemMessage]]
452
+
453
+ def read_message(state: AgentState) -> AgentState:
454
+ messages = state["messages"]
455
+ print(f"Processing question: {messages[-1].content if messages else ''}")
456
+ # Just pass the messages through to the next node
457
+ return {"messages": messages}
458
+
459
+ def restart_required(state: AgentState) -> AgentState:
460
+ messages = state["messages"]
461
+ print(f"Processing question: {messages[-1].content if messages else ''}")
462
+ # Just pass the messages through to the next node
463
+ return {"messages": messages}
464
+
465
+ # def tool_message(state: AgentState) -> AgentState:
466
+ # messages = state["messages"]
467
+ # prompt = f"""
468
+ # You are a GAIA question answering expert.
469
+ # Your task is to decide whether to use a tool or not.
470
+ # If you need to use a tool, answer ONLY:
471
+ # CALL_TOOL: <your tool name>
472
+ # If you do not need to use a tool, answer ONLY:
473
+ # NO_TOOL
474
+ # Here is the question:
475
+ # {messages}
476
+ # """
477
+ # return {"messages": messages}
478
+ # response = model_with_tools.invoke(prompt)
479
+ # return {"messages": messages + [response]}
480
+
481
+ # Augment the LLM with tools
482
+ tools = [web_search, wiki_search, analyze_image, analyze_audio, analyze_video, read_url, run_python_script, read_document]
483
+ tools_by_name = {tool.name: tool for tool in tools}
484
+ def extract_text_from_content(content: Any) -> str:
485
+ """Extracts a simple string from various possible AIMessage content formats."""
486
+ if isinstance(content, str):
487
+ return content
488
+ if isinstance(content, list):
489
+ text_parts = []
490
+ for part in content:
491
+ if isinstance(part, str):
492
+ text_parts.append(part)
493
+ elif isinstance(part, dict) and "text" in part:
494
+ text_parts.append(part["text"])
495
+ elif isinstance(part, dict) and "type" in part and part["type"] == "text":
496
+ text_parts.append(part.get("text", ""))
497
+ return "".join(text_parts)
498
+ return str(content)
499
+
500
+ def answer_message(state: AgentState) -> AgentState:
501
+ messages = state["messages"]
502
+ current_date = datetime.datetime.now().strftime("%Y-%m-%d")
503
+
504
+ prompt = [SystemMessage(f"""
505
+ You are a master of the GAIA benchmark, a general AI assistant designed to solve complex multi-step tasks.
506
+ Think carefully and logically. Use your tools effectively. Use your internal monologue to plan your steps.
507
+
508
+ TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
509
+
510
+ CRITICAL RULES:
511
+ 1. If you see a path like `[Attached File Local Path: ...]` followed by an image, video, or audio file, YOU MUST USE THE CORRESPONDING TOOL (analyze_image, analyze_video, analyze_audio) IMMEDIATELY in your next step.
512
+ 2. Plan your steps ahead. 12 steps is your LIMIT for the reasoning loop, so make every step count.
513
+ 3. If a tool fails (e.g., 429 or 402), the system will automatically try another model for you, so just keep going!
514
+ 4. Be concise and accurate. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
515
+ 5. CHAIN-OF-THOUGHT: For complex questions, show your reasoning step by step before giving the final answer.
516
+ 6. USE TOOLS AGGRESSIVELY: If a question requires computation, file reading, or web search, use the appropriate tools - don't try to answer from memory.
517
+ 7. VERIFY YOUR ANSWER: Double-check calculations and facts using tools when uncertain.
518
+ """)]
519
+ messages = prompt + messages
520
+
521
+ # Force tool usage if image path is detected
522
+ for msg in state["messages"]:
523
+ if isinstance(msg, HumanMessage) and "[Attached File Local Path:" in msg.content:
524
+ messages.append(HumanMessage(content="IMPORTANT: I see an image path in the message. I MUST call the analyze_image tool IMMEDIATELY in my next step to see it."))
525
+
526
+ # Multi-step ReAct Loop (Up to 12 reasoning steps)
527
+ max_steps = 12
528
+ draft_response = None
529
+ current_tier = 0
530
+
531
+ for step in range(max_steps):
532
+ if step > 0:
533
+ time.sleep(3)
534
+
535
+ print(f"--- ReAct Step {step + 1} ---")
536
+
537
+ # Max history truncation to avoid 413 Request Too Large errors
538
+ safe_messages = messages[:2] + messages[-6:] if len(messages) > 10 else messages
539
+
540
+ ai_msg, current_tier = smart_invoke(safe_messages, use_tools=True, start_tier=current_tier)
541
+ messages.append(ai_msg)
542
+
543
+ # Check if the model requested tools
544
+ tool_calls = getattr(ai_msg, "tool_calls", None) or []
545
+ if not tool_calls:
546
+ # Model decided it has enough info to answer
547
+ draft_response = ai_msg
548
+ print(f"Model found answer or stopped tools: {ai_msg.content}")
549
+ break
550
+
551
+ # Execute requested tools and append their text output into the conversation
552
+ for tool_call in tool_calls:
553
+ name = tool_call["name"]
554
+ args = tool_call["args"]
555
+ tool_call_id = tool_call.get("id")
556
+ print(f"Calling tool: {name} with args: {args}")
557
+ try:
558
+ tool = tools_by_name[name]
559
+ tool_result = tool.invoke(args)
560
+ except Exception as e:
561
+ tool_result = f"Error executing tool {name}: {str(e)}"
562
+
563
+ # Using ToolMessage allows the model to map the result back perfectly to its request
564
+ messages.append(ToolMessage(content=str(tool_result), tool_call_id=tool_call_id, name=name))
565
+
566
+ # If we exhausted all steps without an answer, force a draft response
567
+ if draft_response is None:
568
+ print("Max reasoning steps reached. Forcing answer extraction.")
569
+ forced_msg = HumanMessage(content="You have reached the maximum reasoning steps. Please provide your best final answer based on the current context without any more tool calls.")
570
+ messages.append(forced_msg)
571
+ draft_response, _ = smart_invoke(messages, use_tools=False)
572
+
573
+ # Third pass: strict GAIA formatting extraction
574
+ formatting_sys = SystemMessage(
575
+ content=(
576
+ "You are a strict output formatter for the GAIA benchmark. "
577
+ "Given a verbose draft answer, extract ONLY the final exact answer required. "
578
+ "Return nothing else. DO NOT include prefixes like 'The answer is'. "
579
+ "Strip trailing whitespace only. "
580
+ "If the answer is a number, just return the number. "
581
+ "If the answer is a list or set of elements, return them as a COMMA-SEPARATED list (e.g., 'a, b, c'). "
582
+ "Preserve necessary punctuation within answers (e.g., 'Dr. Smith' should keep the period)."
583
+ )
584
+ )
585
+ final_response, _ = smart_invoke([formatting_sys, HumanMessage(content=extract_text_from_content(draft_response.content))], use_tools=False, start_tier=current_tier)
586
+ print(f"Draft response: {draft_response.content}")
587
+ print(f"Strict Final response: {final_response.content}")
588
+
589
+ # Return messages including the final AIMessage so BasicAgent reads .content
590
+ # Ensure final_response has string content for basic agents
591
+ if not isinstance(final_response.content, str):
592
+ final_response.content = extract_text_from_content(final_response.content)
593
+
594
+ messages.append(draft_response)
595
+ messages.append(final_response)
596
+ return {"messages": messages}
597
+
598
+
599
+ def build_graph():
600
+ agent_graph = StateGraph(AgentState)
601
+
602
+ # Add nodes
603
+ agent_graph.add_node("read_message", read_message)
604
+ agent_graph.add_node("answer_message", answer_message)
605
+
606
+ # Add edges
607
+ agent_graph.add_edge(START, "read_message")
608
+ agent_graph.add_edge("read_message", "answer_message")
609
+
610
+ # Final edge
611
+ agent_graph.add_edge("answer_message", END)
612
+
613
+ # Compile and return the executable graph for use in app.py
614
+ compiled_graph = agent_graph.compile()
615
+ return compiled_graph
app copy.py CHANGED
@@ -59,7 +59,7 @@ response.raise_for_status()
59
  questions_data = response.json()
60
  import time
61
  print(f"Running agent on {len(questions_data)} questions sequentially to avoid 429 errors...")
62
- for item in questions_data[6:7]:
63
  question_text = item.get("question")
64
  if question_text is None:
65
  continue
 
59
  questions_data = response.json()
60
  import time
61
  print(f"Running agent on {len(questions_data)} questions sequentially to avoid 429 errors...")
62
+ for item in questions_data[:2]:
63
  question_text = item.get("question")
64
  if question_text is None:
65
  continue
check_questions.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ resp = requests.get('https://agents-course-unit4-scoring.hf.space/questions')
3
+ questions = resp.json()
4
+ print(f"Total questions: {len(questions)}")
5
+ for i, q in enumerate(questions):
6
+ print(f"{i+1}. {q.get('question', 'N/A')[:120]}...")
7
+ print(f" File: {q.get('file_name', 'None')}")
8
+ print(f" Task ID: {q.get('task_id', 'N/A')[:20]}...")
9
+ print()
debug_chess.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ load_dotenv(override=True)
4
+
5
+ from agent import analyze_image
6
+
7
+ # Use a sample image path
8
+ path = r"C:\Users\Admin\.cache\huggingface\hub\datasets--gaia-benchmark--GAIA\snapshots\682dd723ee1e1697e00360edccf2366dc8418dd9\2023\validation\cca530fc-4052-43b2-b130-b30968d8aa44.png"
9
+
10
+ try:
11
+ result = analyze_image.invoke({"path": path})
12
+ print("Image analysis:")
13
+ print(result[:500])
14
+ except Exception as e:
15
+ print(f"Error: {e}")
debug_chess2.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import hf_hub_download
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv(override=True)
6
+
7
+ # Download chess image
8
+ token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
9
+ path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/cca530fc-4052-43b2-b130-b30968d8aa44.png', repo_type='dataset', token=token)
10
+ print(f"Image path: {path}")
11
+
12
+ # Test analyze_image
13
+ from agent import analyze_image
14
+ result = analyze_image.invoke({"path": path})
15
+ print(f"Image analysis: {result[:1000]}")
debug_issues.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from langchain_core.messages import HumanMessage
4
+ from agent import build_graph
5
+ from huggingface_hub import hf_hub_download
6
+ import pyarrow.parquet as pq
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv(override=True)
10
+
11
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
+
13
+ # Initialize agent
14
+ graph = build_graph()
15
+
16
+ # Fetch questions 4-8 (where issues are)
17
+ resp = requests.get(f"{DEFAULT_API_URL}/questions")
18
+ questions = resp.json()[3:8]
19
+
20
+ # Load ground truth
21
+ token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
22
+ path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
23
+ df = pq.read_table(path).to_pandas()
24
+ answer_map = dict(zip(df['task_id'], df['Final answer']))
25
+
26
+ for i, q in enumerate(questions):
27
+ task_id = q['task_id']
28
+ question = q['question']
29
+ file_name = q.get('file_name')
30
+ ground_truth = answer_map.get(task_id, "NOT FOUND")
31
+
32
+ print(f"\nQ{i+4}: {question[:60]}...")
33
+ print(f"File: {file_name}")
34
+ print(f"GT: {ground_truth}")
35
+
36
+ result = graph.invoke({"messages": [HumanMessage(content=question)]})
37
+
38
+ # Print all messages
39
+ for j, msg in enumerate(result['messages']):
40
+ if hasattr(msg, 'content'):
41
+ content = msg.content[:200] if len(msg.content) > 200 else msg.content
42
+ print(f" Msg {j}: {content}")
43
+
44
+ answer = result['messages'][-1].content
45
+ print(f"Final Ans: {answer[:80]}")
debug_search.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from langchain_core.messages import HumanMessage
4
+ from agent import web_search, wiki_search, analyze_counting_question
5
+ from huggingface_hub import hf_hub_download
6
+ import pyarrow.parquet as pq
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv(override=True)
10
+
11
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
+
13
+ # Test Q1
14
+ question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use web search."
15
+
16
+ # Do searches
17
+ search = web_search.invoke({"keywords": question[:200]})
18
+ print("WEB SEARCH:")
19
+ print(search[:1000].encode('ascii', 'replace').decode('ascii'))
20
+ print()
21
+
22
+ wiki = wiki_search.invoke({"query": question[:100]})
23
+ print("WIKIPEDIA:")
24
+ print(wiki[:1000].encode('ascii', 'replace').decode('ascii'))
25
+ print()
26
+
27
+ # Try analysis
28
+ all_search = f"WEB SEARCH:\n{search}\nWIKIPEDIA:\n{wiki}"
29
+ analysis = analyze_counting_question.invoke({"query": question, "search_results": all_search})
30
+ print("ANALYSIS:")
31
+ print(analysis.encode('ascii', 'replace').decode('ascii'))
debug_test.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from langchain_core.messages import HumanMessage
4
+ from agent import build_graph
5
+ from huggingface_hub import hf_hub_download
6
+ import pyarrow.parquet as pq
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv(override=True)
10
+
11
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
+
13
+ # Initialize agent
14
+ graph = build_graph()
15
+
16
+ # Fetch questions
17
+ resp = requests.get(f"{DEFAULT_API_URL}/questions")
18
+ questions = resp.json()
19
+
20
+ # Load ground truth
21
+ token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
22
+ path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
23
+ df = pq.read_table(path).to_pandas()
24
+ answer_map = dict(zip(df['task_id'], df['Final answer']))
25
+
26
+ # Test questions 3-5 specifically
27
+ for i in [2, 3, 4]:
28
+ q = questions[i]
29
+ task_id = q['task_id']
30
+ question = q['question']
31
+ file_name = q.get('file_name')
32
+ ground_truth = answer_map.get(task_id, "NOT FOUND")
33
+
34
+ print(f"\nQ{i+1}: {question[:80]}...")
35
+ print(f"File: {file_name}")
36
+ print(f"Ground Truth: {ground_truth}")
37
+
38
+ result = graph.invoke({"messages": [HumanMessage(content=question)]})
39
+
40
+ # Print all messages
41
+ for j, msg in enumerate(result['messages']):
42
+ if hasattr(msg, 'content'):
43
+ content = msg.content
44
+ if len(content) > 200:
45
+ content = content[:200] + "..."
46
+ print(f" Msg {j}: {content}")
47
+
48
+ answer = result['messages'][-1].content
49
+ print(f"Agent Answer: {answer}")
50
+ is_correct = answer.strip().lower() == str(ground_truth).strip().lower()
51
+ print(f"Result: {'CORRECT' if is_correct else 'WRONG'}")
debug_wiki.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agent import web_search, wiki_search
2
+
3
+ # Q5 question
4
+ q = "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?"
5
+
6
+ # Test searches
7
+ ws = web_search.invoke({"keywords": q[:200]})
8
+ print("WEB SEARCH:")
9
+ print(ws[:1500])
10
+ print()
11
+
12
+ # Try Wikipedia
13
+ wik = wiki_search.invoke({"query": "Giganotosaurus featured article nomination"})
14
+ print("WIKI:")
15
+ print(wik[:1500])
debug_wiki2.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from agent import web_search, wiki_search
2
+
3
+ # Q5 - more specific search
4
+ q = "Featured Article dinosaur November 2016 Wikipedia nomination"
5
+
6
+ ws = web_search.invoke({"keywords": q})
7
+ print("WEB SEARCH:")
8
+ print(ws[:2000])
debug_wiki3.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from agent import web_search
2
+
3
+ # Better search for Wikipedia question
4
+ q = "Wikipedia Featured Article dinosaur November 2016 nominating user"
5
+
6
+ ws = web_search.invoke({"keywords": q})
7
+ print(ws[:3000])
debug_wiki4.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from agent import web_search
2
+
3
+ # Very specific search
4
+ q = '"FunkMonk" Wikipedia featured article dinosaur'
5
+
6
+ ws = web_search.invoke({"keywords": q})
7
+ print(ws[:2000])
debug_youtube.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from agent import get_youtube_transcript, web_search
2
+
3
+ # Q2 - YouTube
4
+ url2 = "https://www.youtube.com/watch?v=L1vXCYZAYYM"
5
+ transcript = get_youtube_transcript.invoke({"url": url2})
6
+ print("Q2 Transcript:", transcript[:500])
7
+
8
+ # Q7 - YouTube
9
+ url7 = "https://www.youtube.com/watch?v=1htKBjuUWec"
10
+ transcript7 = get_youtube_transcript.invoke({"url": url7})
11
+ print("\nQ7 Transcript:", transcript7[:500])
12
+
13
+ # Also search web for content
14
+ ws = web_search.invoke({"keywords": "Stargate SG-1 Urgo Teal'c hot scene response"})
15
+ print("\nWeb search:", ws[:500])
find_gaia_answers.py CHANGED
@@ -1,9 +1,10 @@
1
- import requests
2
- import json
3
  import os
4
- from dotenv import load_dotenv
 
5
 
6
- load_dotenv(override=True)
 
 
7
 
8
  # 1. Fetch current questions from the scoring space
9
  QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
@@ -16,31 +17,90 @@ except Exception as e:
16
  print(f"Error fetching questions: {e}")
17
  current_questions = []
18
 
19
- # 2. Try to fetch GAIA Validation metadata from HF
20
- # Note: This file is large and might be gated, but we can try common URLs
21
- GAIA_VAL_URL = "https://huggingface.co/datasets/gaia-benchmark/GAIA/resolve/main/2023/validation/metadata.jsonl"
22
- print(f"Fetching ground truth answers from {GAIA_VAL_URL}...")
23
- # We need a token for gated datasets
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
25
- headers = {"Authorization": f"Bearer {hf_token}"} if hf_token else {}
26
 
27
  try:
28
- resp = requests.get(GAIA_VAL_URL, headers=headers)
29
- if resp.status_code == 200:
30
- lines = resp.text.strip().split("\n")
31
- val_data = [json.loads(line) for line in lines]
32
- # Create a map of task_id -> answer
33
- answer_map = {item["task_id"]: item.get("Final answer") for item in val_data}
34
-
35
- print("\n--- GAIA GROUND TRUTH ANSWERS ---")
36
- for i, q in enumerate(current_questions):
37
- task_id = q.get("task_id")
38
- answer = answer_map.get(task_id, "NOT FOUND")
39
- print(f"{i+1}. [ID: {task_id[:8]}...] Answer: {answer}")
40
- print(f" Q: {q.get('question')[:80]}...")
41
- print("-" * 20)
42
- else:
43
- print(f"Failed to fetch ground truth (Status {resp.status_code}). Likely gated or wrong URL.")
44
- print("Tip: You can find them at https://huggingface.co/datasets/gaia-benchmark/GAIA/viewer/2023/validation")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  except Exception as e:
46
  print(f"Error during matching: {e}")
 
 
 
 
 
1
  import os
2
+ import re
3
+ import sys
4
 
5
+ import pandas as pd
6
+ import requests
7
+ from huggingface_hub import hf_hub_download
8
 
9
  # 1. Fetch current questions from the scoring space
10
  QUESTIONS_URL = "https://agents-course-unit4-scoring.hf.space/questions"
 
17
  print(f"Error fetching questions: {e}")
18
  current_questions = []
19
 
20
+ def _load_simple_dotenv(path: str) -> None:
21
+ """
22
+ Minimal .env loader that ignores non KEY=VALUE lines.
23
+ This avoids python-dotenv parse warnings for non-standard .env entries.
24
+ """
25
+ if not os.path.exists(path):
26
+ return
27
+
28
+ key_re = re.compile(r"^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(.*)\s*$")
29
+ with open(path, "r", encoding="utf-8") as f:
30
+ for raw in f:
31
+ line = raw.strip()
32
+ if not line or line.startswith("#"):
33
+ continue
34
+ m = key_re.match(line)
35
+ if not m:
36
+ continue
37
+ k, v = m.group(1), m.group(2)
38
+ if (len(v) >= 2) and ((v[0] == v[-1]) and v[0] in ("'", '"')):
39
+ v = v[1:-1]
40
+ os.environ.setdefault(k, v)
41
+
42
+
43
+ # Load .env if present, but tolerate invalid lines
44
+ _load_simple_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
45
+
46
+ # Avoid Windows console encoding crashes on Unicode characters
47
+ try:
48
+ sys.stdout.reconfigure(encoding="utf-8", errors="replace")
49
+ except Exception:
50
+ pass
51
+
52
+ # 2. Fetch GAIA 2023 validation metadata from HF (Parquet)
53
+ GAIA_REPO_ID = "gaia-benchmark/GAIA"
54
+ GAIA_VAL_FILENAME = "2023/validation/metadata.parquet"
55
+ print(f"Fetching ground truth answers from HF dataset {GAIA_REPO_ID} ({GAIA_VAL_FILENAME})...")
56
+
57
+ # Token can be required for gated datasets
58
  hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
 
59
 
60
  try:
61
+ parquet_path = hf_hub_download(
62
+ repo_id=GAIA_REPO_ID,
63
+ filename=GAIA_VAL_FILENAME,
64
+ repo_type="dataset",
65
+ token=hf_token,
66
+ )
67
+ df = pd.read_parquet(parquet_path)
68
+
69
+ # Build a map task_id -> answer with some tolerance to column naming
70
+ task_col = "task_id" if "task_id" in df.columns else None
71
+ answer_col = None
72
+ for c in ["Final answer", "final_answer", "answer", "Final Answer"]:
73
+ if c in df.columns:
74
+ answer_col = c
75
+ break
76
+
77
+ if not task_col or not answer_col:
78
+ raise KeyError(
79
+ f"Expected columns not found. Have columns: {list(df.columns)[:30]}"
80
+ )
81
+
82
+ answer_map = dict(zip(df[task_col].astype(str), df[answer_col].astype(str)))
83
+
84
+ print("\n--- GAIA GROUND TRUTH ANSWERS (matched to scoring questions) ---")
85
+ found = 0
86
+ total = len(current_questions)
87
+ for i, q in enumerate(current_questions):
88
+ task_id = q.get("task_id")
89
+ task_id_str = str(task_id) if task_id is not None else ""
90
+ answer = answer_map.get(task_id_str)
91
+ ok = answer is not None and answer != "nan"
92
+ found += int(ok)
93
+
94
+ task_preview = (task_id_str[:8] + "...") if task_id_str else "MISSING"
95
+ print(f"{i+1}. [ID: {task_preview}] Answer: {answer if ok else 'NOT FOUND'}")
96
+ question = q.get("question") or ""
97
+ print(f" Q: {question[:80]}...")
98
+ print("-" * 20)
99
+
100
+ print(f"\nMatched answers: {found}/{total}")
101
+ if total and found != total:
102
+ print("Some answers were NOT FOUND. This is usually an ID mismatch or missing HF access.")
103
  except Exception as e:
104
  print(f"Error during matching: {e}")
105
+ print("If the GAIA dataset is gated, ensure your HF token is set in HF_TOKEN or HUGGINGFACEHUB_API_TOKEN.")
106
+ print("You can view the files at https://huggingface.co/datasets/gaia-benchmark/GAIA/tree/main/2023/validation")
proxy.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, httpx
2
+ from dotenv import load_dotenv
3
+ from fastapi import FastAPI, Request
4
+ from fastapi.responses import StreamingResponse, JSONResponse
5
+ import re
6
+
7
+ load_dotenv()
8
+
9
+ app = FastAPI()
10
+
11
+ # --- Provider config ---
12
+ PROVIDER = os.getenv("PROVIDER", "nvidia_nim")
13
+ MODEL = os.getenv("MODEL", "mistralai/devstral-2-123b-instruct-2512")
14
+
15
+ PROVIDERS = {
16
+ "nvidia_nim": {
17
+ "base_url": "https://integrate.api.nvidia.com/v1",
18
+ "api_key": os.getenv("NVIDIA_API_KEY"),
19
+ },
20
+ "openrouter": {
21
+ "base_url": "https://openrouter.ai/api/v1",
22
+ "api_key": os.getenv("OPENROUTER_API_KEY"),
23
+ },
24
+ "groq": {
25
+ "base_url": "https://api.groq.com/openai/v1",
26
+ "api_key": os.getenv("GROQ_API_KEY"),
27
+ },
28
+ "google": {
29
+ "base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
30
+ "api_key": os.getenv("GOOGLE_API_KEY"),
31
+ },
32
+ "zai": {
33
+ "base_url": "https://api.z.ai/api/paas/v4",
34
+ "api_key": os.getenv("ZAI_API_KEY"),
35
+ },
36
+ }
37
+
38
+ provider = PROVIDERS.get(PROVIDER)
39
+ if not provider:
40
+ raise ValueError(f"Unknown provider: {PROVIDER}. Choose from: {list(PROVIDERS.keys())}")
41
+ if not provider["api_key"]:
42
+ raise ValueError(f"Missing API key for provider: {PROVIDER}")
43
+
44
+ BASE_URL = provider["base_url"]
45
+ API_KEY = provider["api_key"]
46
+
47
+ print(f"✅ Provider: {PROVIDER}")
48
+ print(f"✅ Model: {MODEL}")
49
+
50
+
51
+ # --- Helpers ---
52
+ def clean_delta(text):
53
+ text = re.sub(r'<\|tool_calls_section_begin\|>.*?<\|tool_calls_section_end\|>', '', text, flags=re.DOTALL)
54
+ text = re.sub(r'<\|tool_call_begin\|>.*?<\|tool_call_end\|>', '', text, flags=re.DOTALL)
55
+ text = re.sub(r'<\|[^|]+\|>', '', text)
56
+ return text
57
+
58
+ def anthropic_to_openai(body):
59
+ messages = []
60
+ if body.get("system"):
61
+ system = body["system"]
62
+ if isinstance(system, list):
63
+ system = " ".join(b.get("text", "") for b in system if b.get("type") == "text")
64
+ messages.append({"role": "system", "content": system})
65
+ for m in body.get("messages", []):
66
+ content = m["content"]
67
+ if isinstance(content, list):
68
+ content = " ".join(b.get("text", "") for b in content if b.get("type") == "text")
69
+ messages.append({"role": m["role"], "content": content})
70
+ return {
71
+ "model": MODEL,
72
+ "messages": messages,
73
+ "max_tokens": body.get("max_tokens", 8192),
74
+ "stream": body.get("stream", False),
75
+ }
76
+
77
+
78
+ # --- Routes ---
79
+ @app.get("/v1/models")
80
+ async def models():
81
+ return JSONResponse({"data": [{"id": MODEL, "object": "model"}]})
82
+
83
+ @app.post("/v1/messages")
84
+ async def messages(request: Request):
85
+ body = await request.json()
86
+ oai_payload = anthropic_to_openai(body)
87
+ headers = {"Authorization": f"Bearer {API_KEY}"}
88
+
89
+ # Non-streaming
90
+ if not oai_payload["stream"]:
91
+ async with httpx.AsyncClient(timeout=120) as client:
92
+ r = await client.post(f"{BASE_URL}/chat/completions", json=oai_payload, headers=headers)
93
+ data = r.json()
94
+ text = data["choices"][0]["message"]["content"]
95
+ text = clean_delta(text)
96
+ return {
97
+ "id": "msg_1",
98
+ "type": "message",
99
+ "role": "assistant",
100
+ "content": [{"type": "text", "text": text}],
101
+ "model": body.get("model", MODEL),
102
+ "stop_reason": "end_turn",
103
+ "stop_sequence": None,
104
+ "usage": {"input_tokens": 0, "output_tokens": 0}
105
+ }
106
+
107
+ # Streaming
108
+ async def stream():
109
+ yield f"event: message_start\ndata: {json.dumps({'type':'message_start','message':{'id':'msg_1','type':'message','role':'assistant','content':[],'model':MODEL,'stop_reason':None,'stop_sequence':None,'usage':{'input_tokens':0,'output_tokens':0}}})}\n\n"
110
+ yield f"event: content_block_start\ndata: {json.dumps({'type':'content_block_start','index':0,'content_block':{'type':'text','text':''}})}\n\n"
111
+ yield f"event: ping\ndata: {json.dumps({'type':'ping'})}\n\n"
112
+ try:
113
+ async with httpx.AsyncClient(timeout=120) as client:
114
+ async with client.stream("POST", f"{BASE_URL}/chat/completions", json=oai_payload, headers=headers) as r:
115
+ async for line in r.aiter_lines():
116
+ if not line.startswith("data: ") or line.strip() == "data: [DONE]":
117
+ continue
118
+ try:
119
+ chunk = json.loads(line[6:])
120
+ delta = chunk["choices"][0].get("delta", {}).get("content") or ""
121
+ delta = clean_delta(delta)
122
+ if delta:
123
+ yield f"event: content_block_delta\ndata: {json.dumps({'type':'content_block_delta','index':0,'delta':{'type':'text_delta','text':delta}})}\n\n"
124
+ except Exception:
125
+ continue
126
+ except Exception as e:
127
+ yield f"event: content_block_delta\ndata: {json.dumps({'type':'content_block_delta','index':0,'delta':{'type':'text_delta','text':f'[proxy error: {str(e)}]'}})}\n\n"
128
+ yield f"event: content_block_stop\ndata: {json.dumps({'type':'content_block_stop','index':0})}\n\n"
129
+ yield f"event: message_delta\ndata: {json.dumps({'type':'message_delta','delta':{'stop_reason':'end_turn','stop_sequence':None},'usage':{'output_tokens':0}})}\n\n"
130
+ yield f"event: message_stop\ndata: {json.dumps({'type':'message_stop'})}\n\n"
131
+
132
+ return StreamingResponse(stream(), media_type="text/event-stream")
quick_test.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from langchain_core.messages import HumanMessage
4
+ from agent import build_graph
5
+ from huggingface_hub import hf_hub_download
6
+ import pyarrow.parquet as pq
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv(override=True)
10
+
11
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
+
13
+ # Initialize agent
14
+ graph = build_graph()
15
+
16
+ # Fetch 1 question
17
+ resp = requests.get(f"{DEFAULT_API_URL}/questions")
18
+ questions = resp.json()[:1]
19
+
20
+ # Load ground truth
21
+ token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
22
+ path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
23
+ df = pq.read_table(path).to_pandas()
24
+ answer_map = dict(zip(df['task_id'], df['Final answer']))
25
+
26
+ # Test
27
+ q = questions[0]
28
+ task_id = q['task_id']
29
+ question = q['question']
30
+ ground_truth = answer_map.get(task_id, "NOT FOUND")
31
+
32
+ print(f"Question: {question[:100]}...")
33
+ print(f"Ground Truth: {ground_truth}")
34
+ print("-" * 40)
35
+
36
+ result = graph.invoke({"messages": [HumanMessage(content=question)]})
37
+ answer = result['messages'][-1].content
38
+ print(f"Agent Answer: {answer}")
39
+ print("-" * 40)
40
+
41
+ is_correct = answer.strip().lower() == str(ground_truth).strip().lower()
42
+ print(f"Correct: {is_correct}")
test_10.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from langchain_core.messages import HumanMessage
4
+ from agent import build_graph
5
+ from huggingface_hub import hf_hub_download
6
+ import pyarrow.parquet as pq
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv(override=True)
10
+
11
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
+
13
+ # Initialize agent
14
+ graph = build_graph()
15
+
16
+ # Fetch 10 questions
17
+ resp = requests.get(f"{DEFAULT_API_URL}/questions")
18
+ questions = resp.json()[:10]
19
+
20
+ # Load ground truth
21
+ token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
22
+ path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
23
+ df = pq.read_table(path).to_pandas()
24
+ answer_map = dict(zip(df['task_id'], df['Final answer']))
25
+
26
+ correct = 0
27
+ total = 0
28
+
29
+ for q in questions:
30
+ task_id = q['task_id']
31
+ question = q['question']
32
+ file_name = q.get('file_name')
33
+ ground_truth = answer_map.get(task_id, "NOT FOUND")
34
+
35
+ print(f"\nQ{total+1}: {question[:60]}...")
36
+ print(f"File: {file_name}")
37
+ print(f"GT: {ground_truth}")
38
+
39
+ result = graph.invoke({"messages": [HumanMessage(content=question)]})
40
+ answer = result['messages'][-1].content
41
+ print(f"Ans: {answer[:50]}")
42
+
43
+ is_correct = answer.strip().lower() == str(ground_truth).strip().lower()
44
+ if is_correct:
45
+ correct += 1
46
+ total += 1
47
+ print(f"{'CORRECT' if is_correct else 'WRONG'}")
48
+
49
+ print(f"\n=== Score: {correct}/{total} = {correct/total*100:.0f}% ===")
test_5.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from langchain_core.messages import HumanMessage
4
+ from agent import build_graph
5
+ from huggingface_hub import hf_hub_download
6
+ import pyarrow.parquet as pq
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv(override=True)
10
+
11
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
+
13
+ # Initialize agent
14
+ graph = build_graph()
15
+
16
+ # Fetch 5 questions
17
+ resp = requests.get(f"{DEFAULT_API_URL}/questions")
18
+ questions = resp.json()[:5]
19
+
20
+ # Load ground truth
21
+ token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
22
+ path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
23
+ df = pq.read_table(path).to_pandas()
24
+ answer_map = dict(zip(df['task_id'], df['Final answer']))
25
+
26
+ correct = 0
27
+ total = 0
28
+
29
+ for q in questions:
30
+ task_id = q['task_id']
31
+ question = q['question']
32
+ ground_truth = answer_map.get(task_id, "NOT FOUND")
33
+
34
+ print(f"\nQ{total+1}: {question[:80]}...")
35
+ print(f"Ground Truth: {ground_truth}")
36
+
37
+ result = graph.invoke({"messages": [HumanMessage(content=question)]})
38
+ answer = result['messages'][-1].content
39
+ print(f"Agent Answer: {answer}")
40
+
41
+ is_correct = answer.strip().lower() == str(ground_truth).strip().lower()
42
+ if is_correct:
43
+ correct += 1
44
+ total += 1
45
+ print(f"Result: {'CORRECT' if is_correct else 'WRONG'}")
46
+
47
+ print(f"\n=== Score: {correct}/{total} = {correct/total*100:.0f}% ===")
test_all.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import time
4
+ from langchain_core.messages import HumanMessage
5
+ from agent import build_graph
6
+ from huggingface_hub import hf_hub_download
7
+ import pyarrow.parquet as pq
8
+ from dotenv import load_dotenv
9
+
10
+ load_dotenv(override=True)
11
+
12
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
+
14
+ # Initialize agent
15
+ graph = build_graph()
16
+
17
+ # Fetch ALL questions
18
+ resp = requests.get(f"{DEFAULT_API_URL}/questions")
19
+ questions = resp.json()
20
+
21
+ # Load ground truth
22
+ token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
23
+ path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
24
+ df = pq.read_table(path).to_pandas()
25
+ answer_map = dict(zip(df['task_id'], df['Final answer']))
26
+
27
+ correct = 0
28
+ total = 0
29
+
30
+ for q in questions:
31
+ task_id = q['task_id']
32
+ question = q['question']
33
+ file_name = q.get('file_name')
34
+ ground_truth = answer_map.get(task_id, "NOT FOUND")
35
+
36
+ print(f"\n[{total+1}/{len(questions)}] {question[:50]}...")
37
+
38
+ result = graph.invoke({"messages": [HumanMessage(content=question)]})
39
+ answer = result['messages'][-1].content
40
+
41
+ is_correct = answer.strip().lower() == str(ground_truth).strip().lower()
42
+ if is_correct:
43
+ correct += 1
44
+ total += 1
45
+
46
+ status = "✅" if is_correct else "❌"
47
+ print(f" {status} GT: {str(ground_truth)[:30]}")
48
+ print(f" Ans: {answer[:50]}")
49
+
50
+ time.sleep(1)
51
+
52
+ print(f"\n=== FINAL SCORE: {correct}/{total} = {correct/total*100:.0f}% ===")