Paperbag commited on
Commit
47b5c71
·
1 Parent(s): 2742715

feat: implement modular tool architecture and expand LLM provider support while cleaning up legacy test scripts.

Browse files
__pycache__/agent.cpython-39.pyc CHANGED
Binary files a/__pycache__/agent.cpython-39.pyc and b/__pycache__/agent.cpython-39.pyc differ
 
agent.py CHANGED
@@ -1,204 +1,25 @@
1
  import os
2
  import re
3
- import subprocess
4
- import tempfile
5
- from pathlib import Path
6
  from typing import TypedDict, List, Union
7
 
8
- import pandas as pd
9
- import fitz
10
- from langchain_tavily import TavilySearch
11
  from dotenv import load_dotenv
12
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
13
- from langchain_core.tools import tool
14
- from langchain_groq import ChatGroq
15
- from langchain_google_genai import ChatGoogleGenerativeAI
16
  from langgraph.graph import StateGraph, START, END
17
- from langchain_community.document_loaders import WikipediaLoader, UnstructuredFileLoader
18
- from langchain_community.document_loaders.image import UnstructuredImageLoader
19
 
20
- load_dotenv()
21
-
22
- @tool
23
- def python_repl(code: str) -> str:
24
- """Execute python code and return the output. Use this for calculations, data analysis, or processing files.
25
- The code should be a valid python script that prints the final result.
26
- You can use libraries like pandas, numpy, PIL, etc.
27
- Example: print(df.head()) or print(2 + 2)"""
28
- try:
29
- import sys
30
- from io import StringIO
31
- old_stdout = sys.stdout
32
- redirected_output = StringIO()
33
- sys.stdout = redirected_output
34
- try:
35
- # Execute in a persistent-ish way by using globals
36
- exec(code, globals())
37
- finally:
38
- sys.stdout = old_stdout
39
- return redirected_output.getvalue().strip() or "Code executed successfully (no output)."
40
- except Exception as e:
41
- return f"PYTHON_ERROR: {e}"
42
-
43
- @tool
44
- def web_search(keywords: str) -> str:
45
- """Search the web using Tavily. This tool performs a concise, focused search to answer factual questions or gather brief information snippets.
46
- For deeper research or browsing specific URLs, additional tools may be required.
47
- """
48
- try:
49
- tavily = TavilySearch(max_results=5)
50
- results = tavily.invoke(keywords)
51
- formatted_results = []
52
- for r in results:
53
- formatted_results.append(f"Title: {r['title']}\nURL: {r['url']}\nContent: {r['content'][:300]}")
54
- return "\n".join(formatted_results) or "NO_RESULTS"
55
- except Exception as e:
56
- return f"SEARCH_ERROR: {e}"
57
-
58
- @tool
59
- def wiki_search(query: str) -> str:
60
- """Search Wikipedia."""
61
- try:
62
- docs = WikipediaLoader(query=query, load_max_docs=2).load()
63
- return "\n".join([f"{d.metadata.get('title', 'Unknown')}: {d.page_content[:500]}" for d in docs]) or "NO_RESULTS"
64
- except Exception as e:
65
- return f"WIKI_ERROR: {e}"
66
-
67
- @tool
68
- def read_file(path: str) -> str:
69
- """Read a local file using robust parsing for various document types.
70
- For PDFs, it first tries PyMuPDF (fitz) for high-quality text extraction,
71
- falling back to UnstructuredFileLoader. For images, it uses UnstructuredImageLoader.
72
- The content will be truncated to 15000 characters.
73
- """
74
- if not path or not os.path.exists(path):
75
- return "ERROR: File not found"
76
- try:
77
- ext = os.path.splitext(path)[1].lower()
78
- if ext in {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"}:
79
- loader = UnstructuredImageLoader(path)
80
- docs = loader.load()
81
- content = "\n\n".join([doc.page_content for doc in docs])
82
- elif ext == ".pdf":
83
- try:
84
- doc = fitz.open(path)
85
- content = "\n".join([page.get_text() for page in doc])
86
- doc.close()
87
- if not content.strip():
88
- raise ValueError("No text extracted with fitz")
89
- except Exception:
90
- loader = UnstructuredFileLoader(path)
91
- docs = loader.load()
92
- content = "\n\n".join([doc.page_content for doc in docs])
93
- else:
94
- loader = UnstructuredFileLoader(path)
95
- docs = loader.load()
96
- content = "\n\n".join([doc.page_content for doc in docs])
97
-
98
- return content[:15000] if content else "EMPTY_FILE"
99
- except Exception as e:
100
- return f"ERROR: {e}"
101
 
102
- @tool
103
- def browse_url(url: str) -> str:
104
- """Browse a URL and return its clean text content. Use this to read the full content of a webpage identified by web_search.
105
- If the page content is too large, it will be truncated.
106
- """
107
- try:
108
- import requests
109
- from bs4 import BeautifulSoup
110
- response = requests.get(url, timeout=10, headers={"User-Agent": "mozilla/5.0"})
111
- response.raise_for_status()
112
- soup = BeautifulSoup(response.text, 'html.parser')
113
- for script in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'form']):
114
- script.extract()
115
- text = soup.get_text()
116
- lines = (line.strip() for line in text.splitlines())
117
- chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
118
- text = '\n'.join(chunk for chunk in chunks if chunk)
119
- return text[:15000] # Truncate to avoid long contexts
120
- except Exception as e:
121
- return f"BROWSE_ERROR: {e}"
122
-
123
- @tool
124
- def get_youtube_transcript(url: str) -> str:
125
- """Get YouTube transcript."""
126
- try:
127
- with tempfile.TemporaryDirectory() as tmp:
128
- cmd = ["yt-dlp", "--skip-download", "--write-auto-subs", "--sub-lang", "en", "-o", f"{tmp}/video", url]
129
- subprocess.run(cmd, capture_output=True, timeout=60)
130
- vtt_files = list(Path(tmp).glob("*.vtt"))
131
- if vtt_files:
132
- content = vtt_files[0].read_text(encoding="utf-8", errors="replace")
133
- lines = [l for l in content.splitlines() if l and not l.startswith(('<', '-->', 'WEBVTT')) and not l.isdigit()]
134
- return "\n".join(lines)[:15000] or "NO_TRANSCRIPT"
135
- return "NO_SUBTITLES"
136
- except Exception as e:
137
- return f"TRANSCRIPT_ERROR: {e}"
138
-
139
- @tool
140
- def reverse_text(text: str) -> str:
141
- """Reverse the given text."""
142
- return text[::-1]
143
-
144
-
145
- @tool
146
- def transcribe_audio(path: str) -> str:
147
- """Transcribe audio file to text."""
148
- try:
149
- import whisper
150
- model = whisper.load_model("base")
151
- result = model.transcribe(path)
152
- return result["text"][:5000] or "NO_TRANSCRIPTION"
153
- except Exception as e:
154
- return f"AUDIO_TRANSCRIPTION_ERROR: {e}"
155
 
156
- # --- Tools Configuration ---
157
- tools = [
158
- web_search,
159
- wiki_search,
160
- read_file,
161
- get_youtube_transcript,
162
- reverse_text,
163
- transcribe_audio,
164
- python_repl,
165
- browse_url
166
- ]
167
- tools_by_name = {t.name: t for t in tools}
168
 
169
  class AgentState(TypedDict):
170
  messages: List[Union[HumanMessage, AIMessage, SystemMessage, ToolMessage]]
171
  reflection_count: int
172
 
173
- # --- LLM Invocation with Fallback ---
174
  def _invoke_llm_with_tools(messages, fallback_count=0):
175
- """Invoke LLM with tool binding and rate limit handling.
176
- Primary: Gemini 1.5 Flash (Multimodal, Free Tier).
177
- Fallback: Groq (Llama 3.3).
178
- """
179
- try:
180
- # Primary: Gemini 1.5 Flash
181
- model = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0)
182
- model_with_tools = model.bind_tools(tools)
183
- return model_with_tools.invoke(messages)
184
- except Exception as e:
185
- print(f"Gemini Error: {e}. Falling back to Groq...")
186
- try:
187
- # Fallback: Groq
188
- groq_model = "llama-3.3-70b-versatile" if fallback_count == 0 else "llama-3.1-8b-instant"
189
- model = ChatGroq(model=groq_model, temperature=0)
190
- model_with_tools = model.bind_tools(tools)
191
- return model_with_tools.invoke(messages)
192
- except Exception as groq_e:
193
- err_msg = str(groq_e).lower()
194
- if ("rate limit" in err_msg or "429" in err_msg) and fallback_count < 2:
195
- import time
196
- wait_time = 10 * (fallback_count + 1)
197
- print(f"Groq Rate limit hit. Waiting {wait_time}s...")
198
- time.sleep(wait_time)
199
- return _invoke_llm_with_tools(messages, fallback_count + 1)
200
- print(f"Critical LLM Error: {groq_e}")
201
- return AIMessage(content=f"ERROR: All LLM invocations failed: {groq_e}")
202
 
203
  # --- Helper Functions ---
204
  def is_reversed_text(question: str) -> bool:
 
1
  import os
2
  import re
 
 
 
3
  from typing import TypedDict, List, Union
4
 
 
 
 
5
  from dotenv import load_dotenv
6
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
 
 
 
7
  from langgraph.graph import StateGraph, START, END
 
 
8
 
9
+ from tools import __all__ as tools, tools_by_name
10
+ from llm import invoke_llm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ load_dotenv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  class AgentState(TypedDict):
16
  messages: List[Union[HumanMessage, AIMessage, SystemMessage, ToolMessage]]
17
  reflection_count: int
18
 
19
+
20
  def _invoke_llm_with_tools(messages, fallback_count=0):
21
+ """Invoke LLM with provider fallback."""
22
+ return invoke_llm(messages, tools, fallback_count)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # --- Helper Functions ---
25
  def is_reversed_text(question: str) -> bool:
agent_old.py DELETED
@@ -1,615 +0,0 @@
1
- import os
2
- import base64
3
- import requests
4
- import json
5
- import traceback
6
- import datetime
7
- import subprocess
8
- import tempfile
9
- import time
10
- from typing import TypedDict, List, Dict, Any, Optional, Union
11
- from langchain_core import tools
12
- from langgraph.graph import StateGraph, START, END
13
- from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline
14
- from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
15
- from langchain_core.tools import tool
16
- from langchain_community.document_loaders import WikipediaLoader
17
- from ddgs import DDGS
18
- from dotenv import load_dotenv
19
- from groq import Groq
20
- from langchain_groq import ChatGroq
21
- from langchain_community.document_loaders.image import UnstructuredImageLoader
22
- from langchain_community.document_loaders import WebBaseLoader
23
- from langchain_google_genai import ChatGoogleGenerativeAI
24
-
25
- try:
26
- import cv2
27
- except ImportError:
28
- cv2 = None
29
-
30
- # os.environ["USER_AGENT"] = "gaia-agent/1.0"
31
-
32
- whisper_model = None
33
- def get_whisper():
34
- global whisper_model
35
- if whisper_model is None:
36
- import whisper
37
- # Lazy load the smallest, fastest model
38
- whisper_model = whisper.load_model("base")
39
- return whisper_model
40
-
41
- load_dotenv(override=True)
42
-
43
- # Base Hugging Face LLM used by the chat wrapper
44
- # base_llm = HuggingFaceEndpoint(
45
- # repo_id="openai/gpt-oss-20b:hyperbolic",
46
- # # deepseek-ai/DeepSeek-OCR:novita
47
- # task="text-generation",
48
- # temperature=0.0,
49
- # huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
50
- # )
51
-
52
- # Model initializations moved to smart_invoke for lazy loading to prevent import errors if keys are missing.
53
-
54
- def smart_invoke(msgs, use_tools=False, start_tier=0):
55
- """
56
- Tiered fallback: OpenRouter -> Gemini -> Groq -> NVIDIA -> Vercel.
57
- Retries next tier if a 429 (rate limit), 402 (credits), or 404 (model found) error occurs.
58
- """
59
-
60
- # Adaptive Gemini names verified via list_models (REST API)
61
- gemini_alternatives = ["gemini-2.5-flash", "gemini-2.0-flash", "gemini-flash-latest", "gemini-pro-latest"]
62
-
63
- tiers_config = [
64
- {"name": "Qwen3-Next-80B", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "qwen/qwen3-next-80b-a3b-instruct:free", "base_url": "https://openrouter.ai/api/v1"},
65
- {"name": "Gemma-3-27B", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "google/gemma-3-27b-it:free", "base_url": "https://openrouter.ai/api/v1"},
66
- {"name": "NVIDIA-Nemotron-Super", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "nvidia/nemotron-3-super-120b-a12b:free", "base_url": "https://openrouter.ai/api/v1"},
67
- {"name": "OpenRouter-FreeRouter", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "openrouter/free", "base_url": "https://openrouter.ai/api/v1"},
68
- {"name": "DeepSeek-R1", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "deepseek/deepseek-r1:free", "base_url": "https://openrouter.ai/api/v1"},
69
- {"name": "Gemini-Flash", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash", "alternatives": gemini_alternatives},
70
- {"name": "Groq", "key": "GROQ_API_KEY", "provider": "groq", "model_name": "llama-3.3-70b-versatile"},
71
- ]
72
-
73
- last_exception = None
74
- for i in range(start_tier, len(tiers_config)):
75
- tier = tiers_config[i]
76
- api_key = os.getenv(tier["key"])
77
- if not api_key:
78
- continue
79
-
80
- def create_model_instance(m_name, provider, b_url=None):
81
- if provider == "openai":
82
- from langchain_openai import ChatOpenAI
83
- return ChatOpenAI(model=m_name, openai_api_key=api_key, openai_api_base=b_url, temperature=0)
84
- elif provider == "google":
85
- from langchain_google_genai import ChatGoogleGenerativeAI
86
- return ChatGoogleGenerativeAI(model=m_name, temperature=0)
87
- elif provider == "groq":
88
- from langchain_groq import ChatGroq
89
- return ChatGroq(model=m_name, temperature=0, max_retries=2)
90
- return None
91
-
92
- primary_model = create_model_instance(tier["model_name"], tier["provider"], tier.get("base_url"))
93
- if use_tools:
94
- primary_model = primary_model.bind_tools(tools)
95
-
96
- models_to_try = [primary_model]
97
- if "alternatives" in tier:
98
- for alt_name in tier["alternatives"]:
99
- alt_model = create_model_instance(alt_name, tier["provider"], tier.get("base_url"))
100
- if use_tools:
101
- alt_model = alt_model.bind_tools(tools)
102
- models_to_try.append(alt_model)
103
-
104
- for current_model in models_to_try:
105
- try:
106
- model_name = getattr(current_model, "model", tier["name"])
107
- print(f"--- Calling {tier['name']} ({model_name}) ---")
108
- return current_model.invoke(msgs), i
109
- except Exception as e:
110
- err_str = str(e).lower()
111
- # If it's a 404 (not found) and we have more alternatives, continue to the next alternative
112
- if any(x in err_str for x in ["not_found", "404"]) and current_model != models_to_try[-1]:
113
- print(f"--- {tier['name']} model {model_name} not found. Trying alternative... ---")
114
- continue
115
-
116
- # Catch other fallback triggers
117
- if any(x in err_str for x in ["rate_limit", "429", "500", "503", "overloaded", "not_found", "404", "402", "credits", "decommissioned", "invalid_request_error"]):
118
- print(f"--- {tier['name']} Error: {e}. Trying next model/tier... ---")
119
- last_exception = e
120
- # If this tier has more alternatives, continue to the next one
121
- if current_model != models_to_try[-1]:
122
- continue
123
- break # Move to next tier
124
- raise e
125
-
126
- if last_exception:
127
- print("CRITICAL: All fallback tiers failed.")
128
- raise last_exception
129
- return None, 0
130
-
131
- @tool
132
- def web_search(keywords: str) -> str:
133
- """
134
- Uses duckduckgo to search the top 5 result on web
135
-
136
- Use cases:
137
- - Identify personal information
138
- - Information search
139
- - Finding organisation information
140
- - Obtain the latest news
141
-
142
- Args:
143
- keywords: keywords used to search the web
144
-
145
- Returns:
146
- Search result (Header + body + url)
147
- """
148
- max_retries = 3
149
- for attempt in range(max_retries):
150
- try:
151
- with DDGS() as ddgs:
152
- output = ""
153
- results = ddgs.text(keywords, max_results = 5)
154
- for result in results:
155
- output += f"Results: {result['title']}\n{result['body']}\n{result['href']}\n\n"
156
- return output
157
- except Exception as e:
158
- if attempt < max_retries - 1:
159
- time.sleep(2 ** attempt)
160
- continue
161
- return f"Search failed after {max_retries} attempts: {str(e)}"
162
-
163
- @tool
164
- def wiki_search(query: str) -> str:
165
- """
166
- Search Wikipedia for a query and return up to 3 results.
167
-
168
- Use cases:
169
- When the question requires the use of information from wikipedia
170
-
171
- Args:
172
- query: The search query
173
- """
174
-
175
- search_docs = WikipediaLoader(query=query, load_max_docs=3, doc_content_chars_max=15000).load()
176
-
177
- if not search_docs:
178
- return "No Wikipedia results found."
179
-
180
- formatted_search_docs = "\n\n---\n\n".join(
181
- [
182
- f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("title", "Unknown Title")}"/>\n{doc.page_content}\n</Document>'
183
- for doc in search_docs
184
- ])
185
- return formatted_search_docs
186
-
187
- def get_vision_models():
188
- """Returns a list of vision models to try, in order of preference."""
189
- configs = [
190
- {"name": "OpenRouter-Qwen3-VL", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "qwen/qwen3-vl-235b-thinking:free", "base_url": "https://openrouter.ai/api/v1"},
191
- {"name": "NVIDIA-Nemotron-VL", "key": "NVIDIA_API_KEY", "provider": "openai", "model_name": "nvidia/nemotron-nano-2-vl:free", "base_url": "https://integrate.api.nvidia.com/v1"},
192
- {"name": "OpenRouter-Gemma-3-27b-it", "key": "OPENROUTER_API_KEY", "provider": "openai", "model_name": "google/gemma-3-27b-it:free", "base_url": "https://openrouter.ai/api/v1"},
193
- {"name": "Google-Gemini-2.0-Flash", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-2.0-flash"},
194
- {"name": "Google-Gemini-Flash-Latest", "key": "GOOGLE_API_KEY", "provider": "google", "model_name": "gemini-flash-latest"},
195
- ]
196
- models = []
197
- for cfg in configs:
198
- api_key = os.getenv(cfg["key"])
199
- if not api_key:
200
- continue
201
- if cfg["provider"] == "openai":
202
- from langchain_openai import ChatOpenAI
203
- m = ChatOpenAI(model=cfg["model_name"], openai_api_key=api_key, openai_api_base=cfg.get("base_url"), temperature=0)
204
- elif cfg["provider"] == "google":
205
- from langchain_google_genai import ChatGoogleGenerativeAI
206
- m = ChatGoogleGenerativeAI(model=cfg["model_name"], temperature=0)
207
- elif cfg["provider"] == "groq":
208
- from langchain_groq import ChatGroq
209
- m = ChatGroq(model=cfg["model_name"], temperature=0)
210
- models.append({"name": cfg["name"], "model": m})
211
- return models
212
-
213
- @tool
214
- def analyze_image(image_path: str, question: str) -> str:
215
- """
216
- EXTERNAL SIGHT API: Sends an image path to a Vision Model to answer a specific question.
217
- YOU MUST CALL THIS TOOL ANY TIME an image (.png, .jpg, .jpeg) is attached to the prompt.
218
- NEVER claim you cannot see images. Use this tool instead.
219
-
220
- Args:
221
- image_path: The local path or URL to the image file.
222
- question: Specific question describing what you want the vision model to look for.
223
- """
224
- try:
225
- if not os.path.exists(image_path):
226
- return f"Error: Image file not found at {image_path}"
227
-
228
- # If it's a local file, we encode it to base64
229
- with open(image_path, "rb") as image_file:
230
- encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
231
-
232
- message = HumanMessage(
233
- content=[
234
- {"type": "text", "text": question},
235
- {
236
- "type": "image_url",
237
- "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"},
238
- },
239
- ]
240
- )
241
-
242
- vision_models = get_vision_models()
243
- if not vision_models:
244
- return "Error: No vision models configured (missing API keys)."
245
-
246
- last_err = None
247
- for item in vision_models:
248
- try:
249
- m_name = getattr(item['model'], 'model', 'unknown')
250
- print(f"--- Calling Vision Model: {item['name']} ({m_name}) ---")
251
- response = item['model'].invoke([message])
252
- return extract_text_from_content(response.content)
253
- except Exception as e:
254
- print(f"Vision Model {item['name']} failed.")
255
- traceback.print_exc()
256
- last_err = e
257
- return f"Error analyzing image: All vision models failed. Last error: {str(last_err)}"
258
- except Exception as e:
259
- traceback.print_exc()
260
- return f"Error reading/processing image: {str(e)}"
261
-
262
- @tool
263
- def analyze_audio(audio_path: str, question: str) -> str:
264
- """
265
- Transcribes an audio file (.mp3, .wav, .m4a) to answer questions about what is spoken.
266
-
267
- Args:
268
- audio_path: The local path to the audio file.
269
- question: The specific question to ask.
270
- """
271
- try:
272
- model = get_whisper()
273
- result = model.transcribe(audio_path)
274
- transcript = result["text"]
275
- return f"Audio Transcript:\n{transcript}"
276
- except Exception as e:
277
- return f"Error analyzing audio: {str(e)}. Tip: You requires 'ffmpeg' installed on your system."
278
-
279
- @tool
280
- def analyze_video(video_path: str, question: str) -> str:
281
- """
282
- EXTERNAL SIGHT/HEARING API: Sends a video file to an external Vision/Audio model.
283
- YOU MUST CALL THIS TOOL ANY TIME a video (.mp4, .avi) is attached to the prompt.
284
- NEVER claim you cannot analyze videos. Use this tool instead.
285
-
286
- Args:
287
- video_path: The local path to the video file.
288
- question: Specific question describing what you want to extract from the video.
289
- """
290
- if cv2 is None:
291
- return "Error: cv2 is not installed. Please install opencv-python."
292
-
293
- temp_dir = tempfile.gettempdir()
294
- downloaded_video = None
295
-
296
- try:
297
- # Check if video_path is a URL
298
- if video_path.startswith("http"):
299
- print(f"Downloading video from URL: {video_path}")
300
- downloaded_video = os.path.join(temp_dir, f"video_{int(time.time())}.mp4")
301
- try:
302
- # Use yt-dlp to download the video
303
- # Note: --ffmpeg-location could be used if we knew where it was, but we assume it's in path or missing
304
- subprocess.run(["yt-dlp", "-f", "best[ext=mp4]/mp4", "-o", downloaded_video, video_path], check=True, timeout=120)
305
- video_path = downloaded_video
306
- except Exception as e:
307
- return f"Error downloading video from URL: {str(e)}. Tip: Check if yt-dlp is installed and the URL is valid."
308
-
309
- # 1. Extract frames evenly spaced throughout the video
310
- cap = cv2.VideoCapture(video_path)
311
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
312
- if total_frames == 0:
313
- return "Error: Could not read video frames."
314
-
315
- # Take 5 frames as a summary
316
- frame_indices = [int(i * total_frames / 5) for i in range(5)]
317
- extracted_descriptions = []
318
-
319
- vision_models = get_vision_models()
320
- # Ensure Groq-Llama is at the front for video if preferred, but we'll use the default order for now.
321
-
322
- for idx_num, frame_idx in enumerate(frame_indices):
323
- cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
324
- ret, frame = cap.read()
325
- if ret:
326
- # Convert frame to base64
327
- _, buffer = cv2.imencode('.jpg', frame)
328
- encoded_image = base64.b64encode(buffer).decode('utf-8')
329
-
330
- # Ask a vision model to describe the frame (with fallback)
331
- msg = HumanMessage(
332
- content=[
333
- {"type": "text", "text": f"Describe what is happening in this video frame concisely. Focus on aspects related to: {question}"},
334
- {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}},
335
- ]
336
- )
337
-
338
- desc = "No description available."
339
- for item in vision_models:
340
- try:
341
- print(f"--- Calling Vision Model for Frame {idx_num+1}: {item['name']} ---")
342
- desc = item['model'].invoke([msg]).content
343
- break
344
- except Exception as e:
345
- print(f"Vision Model {item['name']} failed for frame: {e}")
346
- continue
347
-
348
- extracted_descriptions.append(f"Frame {idx_num + 1}: {desc}")
349
-
350
- cap.release()
351
-
352
- # 2. Compile the context for the agent
353
- video_context = "\n".join(extracted_descriptions)
354
-
355
- # 3. Transcribe audio if possible
356
- try:
357
- whisper_mod = get_whisper()
358
- trans_result = whisper_mod.transcribe(video_path)
359
- transcript = trans_result.get("text", "")
360
- if transcript.strip():
361
- video_context += f"\n\nVideo Audio Transcript:\n{transcript}"
362
- except Exception as e:
363
- video_context += f"\n\n(No audio transcript generated: {e})"
364
-
365
- return f"Video Summary based on extracted frames and audio:\n{video_context}"
366
- except Exception as e:
367
- err_msg = str(e)
368
- if "No address associated with hostname" in err_msg or "Failed to resolve" in err_msg:
369
- return f"Error: The environment cannot access the internet (DNS failure). Please use 'web_search' or 'wiki_search' to find information about this video content instead of trying to download it."
370
- return f"Error analyzing video: {err_msg}"
371
- finally:
372
- if downloaded_video and os.path.exists(downloaded_video):
373
- try:
374
- os.remove(downloaded_video)
375
- except:
376
- pass
377
-
378
- @tool
379
- def read_url(url: str) -> str:
380
- """
381
- Reads and extracts text from a specific webpage URL.
382
- Use this if a web search snippet doesn't contain enough detail.
383
- """
384
- try:
385
- loader = WebBaseLoader(url)
386
- docs = loader.load()
387
- # Truncate to first 15000 characters to fit context
388
- if not docs:
389
- return "No content could be extracted from this URL."
390
- return docs[0].page_content[:15000]
391
- except Exception as e:
392
- return f"Error reading URL: {e}"
393
-
394
- @tool
395
- def run_python_script(code: str) -> str:
396
- """
397
- Executes a Python script locally and returns the stdout and stderr.
398
- Use this to perform complex math, data analysis (e.g. pandas), or file processing.
399
- When given a file path, you can write python code to read and analyze it.
400
- """
401
- with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
402
- f.write(code)
403
- temp_file_name = f.name
404
-
405
- try:
406
- result = subprocess.run(
407
- ["python", temp_file_name],
408
- capture_output=True,
409
- text=True,
410
- timeout=60
411
- )
412
- os.remove(temp_file_name)
413
-
414
- output = result.stdout
415
- if result.stderr:
416
- output += f"\nErrors:\n{result.stderr}"
417
-
418
- return (output or "Script executed successfully with no output.")[:15000]
419
- except subprocess.TimeoutExpired:
420
- os.remove(temp_file_name)
421
- return "Script execution timed out after 60 seconds."
422
- except Exception as e:
423
- if os.path.exists(temp_file_name):
424
- os.remove(temp_file_name)
425
- return f"Failed to execute script: {str(e)}"
426
-
427
- @tool
428
- def read_document(file_path: str) -> str:
429
- """
430
- Reads the text contents of a local document (.txt, .csv, .json, .md).
431
- For binary files like .xlsx or .pdf, use run_python_script to process them instead.
432
- """
433
- try:
434
- with open(file_path, 'r', encoding='utf-8') as f:
435
- content = f.read()
436
- if len(content) > 15000:
437
- return content[:15000] + "... (truncated)"
438
- return content
439
- except Exception as e:
440
- return f"Error reading document: {str(e)}. Tip: You can try running a python script to read it!"
441
-
442
- system_prompt = """
443
- You are a helpful assistant tasked with answering questions using a set of tools.
444
- Now, I will ask you a question. Report your thoughts, and finish your answer with the following template:
445
- FINAL ANSWER: [YOUR FINAL ANSWER].
446
- YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
447
- Your answer should only start with "FINAL ANSWER: ", then follows with the answer.
448
- """
449
-
450
- class AgentState(TypedDict):
451
- messages: List[Union[HumanMessage, AIMessage, SystemMessage]]
452
-
453
- def read_message(state: AgentState) -> AgentState:
454
- messages = state["messages"]
455
- print(f"Processing question: {messages[-1].content if messages else ''}")
456
- # Just pass the messages through to the next node
457
- return {"messages": messages}
458
-
459
- def restart_required(state: AgentState) -> AgentState:
460
- messages = state["messages"]
461
- print(f"Processing question: {messages[-1].content if messages else ''}")
462
- # Just pass the messages through to the next node
463
- return {"messages": messages}
464
-
465
- # def tool_message(state: AgentState) -> AgentState:
466
- # messages = state["messages"]
467
- # prompt = f"""
468
- # You are a GAIA question answering expert.
469
- # Your task is to decide whether to use a tool or not.
470
- # If you need to use a tool, answer ONLY:
471
- # CALL_TOOL: <your tool name>
472
- # If you do not need to use a tool, answer ONLY:
473
- # NO_TOOL
474
- # Here is the question:
475
- # {messages}
476
- # """
477
- # return {"messages": messages}
478
- # response = model_with_tools.invoke(prompt)
479
- # return {"messages": messages + [response]}
480
-
481
- # Augment the LLM with tools
482
- tools = [web_search, wiki_search, analyze_image, analyze_audio, analyze_video, read_url, run_python_script, read_document]
483
- tools_by_name = {tool.name: tool for tool in tools}
484
- def extract_text_from_content(content: Any) -> str:
485
- """Extracts a simple string from various possible AIMessage content formats."""
486
- if isinstance(content, str):
487
- return content
488
- if isinstance(content, list):
489
- text_parts = []
490
- for part in content:
491
- if isinstance(part, str):
492
- text_parts.append(part)
493
- elif isinstance(part, dict) and "text" in part:
494
- text_parts.append(part["text"])
495
- elif isinstance(part, dict) and "type" in part and part["type"] == "text":
496
- text_parts.append(part.get("text", ""))
497
- return "".join(text_parts)
498
- return str(content)
499
-
500
- def answer_message(state: AgentState) -> AgentState:
501
- messages = state["messages"]
502
- current_date = datetime.datetime.now().strftime("%Y-%m-%d")
503
-
504
- prompt = [SystemMessage(f"""
505
- You are a master of the GAIA benchmark, a general AI assistant designed to solve complex multi-step tasks.
506
- Think carefully and logically. Use your tools effectively. Use your internal monologue to plan your steps.
507
-
508
- TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
509
-
510
- CRITICAL RULES:
511
- 1. If you see a path like `[Attached File Local Path: ...]` followed by an image, video, or audio file, YOU MUST USE THE CORRESPONDING TOOL (analyze_image, analyze_video, analyze_audio) IMMEDIATELY in your next step.
512
- 2. Plan your steps ahead. 12 steps is your LIMIT for the reasoning loop, so make every step count.
513
- 3. If a tool fails (e.g., 429 or 402), the system will automatically try another model for you, so just keep going!
514
- 4. Be concise and accurate. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
515
- 5. CHAIN-OF-THOUGHT: For complex questions, show your reasoning step by step before giving the final answer.
516
- 6. USE TOOLS AGGRESSIVELY: If a question requires computation, file reading, or web search, use the appropriate tools - don't try to answer from memory.
517
- 7. VERIFY YOUR ANSWER: Double-check calculations and facts using tools when uncertain.
518
- """)]
519
- messages = prompt + messages
520
-
521
- # Force tool usage if image path is detected
522
- for msg in state["messages"]:
523
- if isinstance(msg, HumanMessage) and "[Attached File Local Path:" in msg.content:
524
- messages.append(HumanMessage(content="IMPORTANT: I see an image path in the message. I MUST call the analyze_image tool IMMEDIATELY in my next step to see it."))
525
-
526
- # Multi-step ReAct Loop (Up to 12 reasoning steps)
527
- max_steps = 12
528
- draft_response = None
529
- current_tier = 0
530
-
531
- for step in range(max_steps):
532
- if step > 0:
533
- time.sleep(3)
534
-
535
- print(f"--- ReAct Step {step + 1} ---")
536
-
537
- # Max history truncation to avoid 413 Request Too Large errors
538
- safe_messages = messages[:2] + messages[-6:] if len(messages) > 10 else messages
539
-
540
- ai_msg, current_tier = smart_invoke(safe_messages, use_tools=True, start_tier=current_tier)
541
- messages.append(ai_msg)
542
-
543
- # Check if the model requested tools
544
- tool_calls = getattr(ai_msg, "tool_calls", None) or []
545
- if not tool_calls:
546
- # Model decided it has enough info to answer
547
- draft_response = ai_msg
548
- print(f"Model found answer or stopped tools: {ai_msg.content}")
549
- break
550
-
551
- # Execute requested tools and append their text output into the conversation
552
- for tool_call in tool_calls:
553
- name = tool_call["name"]
554
- args = tool_call["args"]
555
- tool_call_id = tool_call.get("id")
556
- print(f"Calling tool: {name} with args: {args}")
557
- try:
558
- tool = tools_by_name[name]
559
- tool_result = tool.invoke(args)
560
- except Exception as e:
561
- tool_result = f"Error executing tool {name}: {str(e)}"
562
-
563
- # Using ToolMessage allows the model to map the result back perfectly to its request
564
- messages.append(ToolMessage(content=str(tool_result), tool_call_id=tool_call_id, name=name))
565
-
566
- # If we exhausted all steps without an answer, force a draft response
567
- if draft_response is None:
568
- print("Max reasoning steps reached. Forcing answer extraction.")
569
- forced_msg = HumanMessage(content="You have reached the maximum reasoning steps. Please provide your best final answer based on the current context without any more tool calls.")
570
- messages.append(forced_msg)
571
- draft_response, _ = smart_invoke(messages, use_tools=False)
572
-
573
- # Third pass: strict GAIA formatting extraction
574
- formatting_sys = SystemMessage(
575
- content=(
576
- "You are a strict output formatter for the GAIA benchmark. "
577
- "Given a verbose draft answer, extract ONLY the final exact answer required. "
578
- "Return nothing else. DO NOT include prefixes like 'The answer is'. "
579
- "Strip trailing whitespace only. "
580
- "If the answer is a number, just return the number. "
581
- "If the answer is a list or set of elements, return them as a COMMA-SEPARATED list (e.g., 'a, b, c'). "
582
- "Preserve necessary punctuation within answers (e.g., 'Dr. Smith' should keep the period)."
583
- )
584
- )
585
- final_response, _ = smart_invoke([formatting_sys, HumanMessage(content=extract_text_from_content(draft_response.content))], use_tools=False, start_tier=current_tier)
586
- print(f"Draft response: {draft_response.content}")
587
- print(f"Strict Final response: {final_response.content}")
588
-
589
- # Return messages including the final AIMessage so BasicAgent reads .content
590
- # Ensure final_response has string content for basic agents
591
- if not isinstance(final_response.content, str):
592
- final_response.content = extract_text_from_content(final_response.content)
593
-
594
- messages.append(draft_response)
595
- messages.append(final_response)
596
- return {"messages": messages}
597
-
598
-
599
- def build_graph():
600
- agent_graph = StateGraph(AgentState)
601
-
602
- # Add nodes
603
- agent_graph.add_node("read_message", read_message)
604
- agent_graph.add_node("answer_message", answer_message)
605
-
606
- # Add edges
607
- agent_graph.add_edge(START, "read_message")
608
- agent_graph.add_edge("read_message", "answer_message")
609
-
610
- # Final edge
611
- agent_graph.add_edge("answer_message", END)
612
-
613
- # Compile and return the executable graph for use in app.py
614
- compiled_graph = agent_graph.compile()
615
- return compiled_graph
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app copy.py DELETED
@@ -1,264 +0,0 @@
1
- import os
2
- # import gradio as gr
3
- import requests
4
- import inspect
5
- import pandas as pd
6
- from langchain_core.messages import HumanMessage
7
- from agent import build_graph
8
- from huggingface_hub import HfApi, hf_hub_download
9
- import logging
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- # (Keep Constants as is)
14
- # --- Constants ---
15
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
-
17
- # --- Basic Agent Definition ---
18
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
19
- class BasicAgent:
20
- def __init__(self):
21
- print("BasicAgent initialized.")
22
- self.graph = build_graph()
23
-
24
- def __call__(self, question: str) -> str:
25
- print(f"Agent received question (first 50 chars): {question[:50]}...")
26
- messages = [HumanMessage(content=question)]
27
- result = self.graph.invoke({"messages": messages})
28
- answer = result['messages'][-1].content
29
- return answer
30
-
31
- def file_extract(local_file_path, task_id):
32
- if not local_file_path:
33
- return None
34
-
35
- token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
36
-
37
- # GAIA files are usually placed in date-based subdirectories
38
- prefixes = ["2023/validation/", "2023/test/", "2023/train/", ""]
39
-
40
- for prefix in prefixes:
41
- try:
42
- resolved_path = hf_hub_download(
43
- repo_id="gaia-benchmark/GAIA",
44
- filename=f"{prefix}{local_file_path}",
45
- repo_type="dataset",
46
- token=token
47
- )
48
- return resolved_path
49
- except Exception:
50
- continue
51
-
52
- logger.warning(f"Could not download file '{local_file_path}' for task_id {task_id}. Make sure you accepted GAIA terms on HF and set HF_TOKEN.")
53
- return None
54
-
55
- agent = BasicAgent()
56
- questions_url = f"{DEFAULT_API_URL}/questions"
57
- response = requests.get(questions_url, timeout=15)
58
- response.raise_for_status()
59
- questions_data = response.json()
60
- import time
61
- print(f"Running agent on {len(questions_data)} questions sequentially to avoid 429 errors...")
62
- for item in questions_data[:2]:
63
- question_text = item.get("question")
64
- if question_text is None:
65
- continue
66
- files_text = item.get("files")
67
- task_id = item.get("task_id")
68
- file_name = item.get("file_name")
69
-
70
- if file_name:
71
- # Actually download the file to local cache and get absolute path
72
- resolved_path = file_extract(file_name, task_id)
73
- if resolved_path:
74
- question_text += f"\n\n[Attached File Local Path: {resolved_path}]"
75
- else:
76
- question_text += f"\n\n[Attached File: {file_name} (Download Failed)]"
77
-
78
- print(f"Processing Task ID: {task_id}")
79
- output = agent(question_text)
80
- print("Q:", question_text)
81
- print("A:", output)
82
- print("-" * 40)
83
- # Stagger requests to refill Token bucket and provide space for other concurrent tasks if any
84
- time.sleep(5)
85
-
86
-
87
-
88
-
89
- # def run_and_submit_all( profile: gr.OAuthProfile | None):
90
- # """
91
- # Fetches all questions, runs the BasicAgent on them, submits all answers,
92
- # and displays the results.
93
- # """
94
- # # --- Determine HF Space Runtime URL and Repo URL ---
95
- # space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
96
-
97
- # if profile:
98
- # username= f"{profile.username}"
99
- # print(f"User logged in: {username}")
100
- # else:
101
- # print("User not logged in.")
102
- # return "Please Login to Hugging Face with the button.", None
103
-
104
- # api_url = DEFAULT_API_URL
105
- # questions_url = f"{api_url}/questions"
106
- # submit_url = f"{api_url}/submit"
107
-
108
- # # 1. Instantiate Agent ( modify this part to create your agent)
109
- # try:
110
- # agent = BasicAgent()
111
- # except Exception as e:
112
- # print(f"Error instantiating agent: {e}")
113
- # return f"Error initializing agent: {e}", None
114
- # # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
115
- # agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
116
- # print(agent_code)
117
-
118
- # # 2. Fetch Questions
119
- # print(f"Fetching questions from: {questions_url}")
120
- # try:
121
- # response = requests.get(questions_url, timeout=15)
122
- # response.raise_for_status()
123
- # questions_data = response.json()
124
- # if not questions_data:
125
- # print("Fetched questions list is empty.")
126
- # return "Fetched questions list is empty or invalid format.", None
127
- # print(f"Fetched {len(questions_data)} questions.")
128
- # except requests.exceptions.RequestException as e:
129
- # print(f"Error fetching questions: {e}")
130
- # return f"Error fetching questions: {e}", None
131
- # except requests.exceptions.JSONDecodeError as e:
132
- # print(f"Error decoding JSON response from questions endpoint: {e}")
133
- # print(f"Response text: {response.text[:500]}")
134
- # return f"Error decoding server response for questions: {e}", None
135
- # except Exception as e:
136
- # print(f"An unexpected error occurred fetching questions: {e}")
137
- # return f"An unexpected error occurred fetching questions: {e}", None
138
-
139
- # # 3. Run your Agent
140
- # results_log = []
141
- # answers_payload = []
142
- # # print(f"Running agent on {len(questions_data)} questions...")
143
- # print(f"Running agent on {len(questions_data[:5])} questions temporarily...")
144
- # for item in questions_data[:5]:
145
- # task_id = item.get("task_id")
146
- # question_text = item.get("question")
147
- # if not task_id or question_text is None:
148
- # print(f"Skipping item with missing task_id or question: {item}")
149
- # continue
150
- # try:
151
- # submitted_answer = agent(question_text)
152
- # answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
153
- # results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
154
- # except Exception as e:
155
- # print(f"Error running agent on task {task_id}: {e}")
156
- # results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
157
-
158
- # if not answers_payload:
159
- # print("Agent did not produce any answers to submit.")
160
- # return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
161
-
162
- # # 4. Prepare Submission
163
- # submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
164
- # status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
165
- # print(status_update)
166
-
167
- # # 5. Submit
168
- # print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
169
- # try:
170
- # response = requests.post(submit_url, json=submission_data, timeout=60)
171
- # response.raise_for_status()
172
- # result_data = response.json()
173
- # final_status = (
174
- # f"Submission Successful!\n"
175
- # f"User: {result_data.get('username')}\n"
176
- # f"Overall Score: {result_data.get('score', 'N/A')}% "
177
- # f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
178
- # f"Message: {result_data.get('message', 'No message received.')}"
179
- # )
180
- # print("Submission successful.")
181
- # results_df = pd.DataFrame(results_log)
182
- # return final_status, results_df
183
- # except requests.exceptions.HTTPError as e:
184
- # error_detail = f"Server responded with status {e.response.status_code}."
185
- # try:
186
- # error_json = e.response.json()
187
- # error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
188
- # except requests.exceptions.JSONDecodeError:
189
- # error_detail += f" Response: {e.response.text[:500]}"
190
- # status_message = f"Submission Failed: {error_detail}"
191
- # print(status_message)
192
- # results_df = pd.DataFrame(results_log)
193
- # return status_message, results_df
194
- # except requests.exceptions.Timeout:
195
- # status_message = "Submission Failed: The request timed out."
196
- # print(status_message)
197
- # results_df = pd.DataFrame(results_log)
198
- # return status_message, results_df
199
- # except requests.exceptions.RequestException as e:
200
- # status_message = f"Submission Failed: Network error - {e}"
201
- # print(status_message)
202
- # results_df = pd.DataFrame(results_log)
203
- # return status_message, results_df
204
- # except Exception as e:
205
- # status_message = f"An unexpected error occurred during submission: {e}"
206
- # print(status_message)
207
- # results_df = pd.DataFrame(results_log)
208
- # return status_message, results_df
209
-
210
-
211
- # # --- Build Gradio Interface using Blocks ---
212
- # with gr.Blocks() as demo:
213
- # gr.Markdown("# Basic Agent Evaluation Runner")
214
- # gr.Markdown(
215
- # """
216
- # **Instructions:**
217
-
218
- # 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
219
- # 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
220
- # 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
221
-
222
- # ---
223
- # **Disclaimers:**
224
- # Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
225
- # This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
226
- # """
227
- # )
228
-
229
- # gr.LoginButton()
230
-
231
- # run_button = gr.Button("Run Evaluation & Submit All Answers")
232
-
233
- # status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
234
- # # Removed max_rows=10 from DataFrame constructor
235
- # results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
236
-
237
- # run_button.click(
238
- # fn=run_and_submit_all,
239
- # outputs=[status_output, results_table]
240
- # )
241
-
242
- # if __name__ == "__main__":
243
- # print("\n" + "-"*30 + " App Starting " + "-"*30)
244
- # # Check for SPACE_HOST and SPACE_ID at startup for information
245
- # space_host_startup = os.getenv("SPACE_HOST")
246
- # space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
247
-
248
- # if space_host_startup:
249
- # print(f"✅ SPACE_HOST found: {space_host_startup}")
250
- # print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
251
- # else:
252
- # print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
253
-
254
- # if space_id_startup: # Print repo URLs if SPACE_ID is found
255
- # print(f"✅ SPACE_ID found: {space_id_startup}")
256
- # print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
257
- # print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
258
- # else:
259
- # print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
260
-
261
- # print("-"*(60 + len(" App Starting ")) + "\n")
262
-
263
- # print("Launching Gradio Interface for Basic Agent Evaluation...")
264
- # demo.launch(debug=True, share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
check_q19.py DELETED
@@ -1,13 +0,0 @@
1
- import os
2
- import requests
3
-
4
- resp = requests.get("https://agents-course-unit4-scoring.hf.space/questions")
5
- questions = resp.json()
6
-
7
- # Check Q19 question content
8
- q19 = questions[18]
9
- print(f"Q19: {q19['question']}")
10
- print()
11
- print(f"'excel' in q19: {'excel' in q19['question'].lower()}")
12
- print(f"'sales' in q19: {'sales' in q19['question'].lower()}")
13
- print(f"'89706' in q19: {'89706' in q19['question']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
check_q5.py DELETED
@@ -1,11 +0,0 @@
1
- import requests
2
-
3
- resp = requests.get('https://agents-course-unit4-scoring.hf.space/questions')
4
- questions = resp.json()
5
-
6
- q5 = questions[4]
7
- print(f"Q5: {q5['question']}")
8
- print()
9
- print(f"'featured article' in q5: {'featured article' in q5['question'].lower()}")
10
- print(f"'dinosaur' in q5: {'dinosaur' in q5['question'].lower()}")
11
- print(f"'FunkMonk' in q5: {'FunkMonk' in q5['question']}")
 
 
 
 
 
 
 
 
 
 
 
 
debug_check.py DELETED
@@ -1,35 +0,0 @@
1
- import os
2
- import requests
3
- from langchain_core.messages import HumanMessage
4
- from agent import build_graph
5
- from huggingface_hub import hf_hub_download
6
- import pyarrow.parquet as pq
7
- from dotenv import load_dotenv
8
-
9
- load_dotenv(override=True)
10
-
11
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
-
13
- graph = build_graph()
14
- resp = requests.get(f"{DEFAULT_API_URL}/questions")
15
- questions = resp.json()
16
-
17
- token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
18
- path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
19
- df = pq.read_table(path).to_pandas()
20
- answer_map = dict(zip(df['task_id'], df['Final answer']))
21
-
22
- # Check Q1, Q5, Q7
23
- for i in [0, 4, 6]:
24
- q = questions[i]
25
- task_id = q['task_id']
26
- question = q['question']
27
- ground_truth = answer_map.get(task_id, "NOT FOUND")
28
-
29
- result = graph.invoke({"messages": [HumanMessage(content=question)]})
30
- answer = result['messages'][-1].content
31
-
32
- print(f"\n=== Q{i+1} ===")
33
- print(f"Q: {question[:80]}...")
34
- print(f"GT: {ground_truth}")
35
- print(f"Ans: {answer[:50]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
debug_files.py DELETED
@@ -1,32 +0,0 @@
1
- import os
2
- import requests
3
- from langchain_core.messages import HumanMessage
4
- from agent import build_graph
5
- from huggingface_hub import hf_hub_download
6
- import pyarrow.parquet as pq
7
- from dotenv import load_dotenv
8
-
9
- load_dotenv(override=True)
10
-
11
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
-
13
- graph = build_graph()
14
- resp = requests.get(f"{DEFAULT_API_URL}/questions")
15
- questions = resp.json()
16
-
17
- token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
18
- path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
19
- df = pq.read_table(path).to_pandas()
20
- answer_map = dict(zip(df['task_id'], df['Final answer']))
21
-
22
- # Show questions with files
23
- for i in [3, 9, 11, 13, 18]:
24
- q = questions[i]
25
- task_id = q['task_id']
26
- question = q['question']
27
- ground_truth = answer_map.get(task_id, "NOT FOUND")
28
- file_name = q.get('file_name', '')
29
-
30
- print(f"\n=== Q{i+1} | File: {file_name} ===")
31
- print(f"Q: {question[:100]}...")
32
- print(f"GT: {ground_truth}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
debug_q19.py DELETED
@@ -1,61 +0,0 @@
1
- import os
2
- import requests
3
- from langchain_core.messages import HumanMessage
4
- from agent import build_graph
5
- from huggingface_hub import hf_hub_download
6
- import pyarrow.parquet as pq
7
- from dotenv import load_dotenv
8
-
9
- load_dotenv(override=True)
10
-
11
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
-
13
- def file_extract(local_file_path, task_id):
14
- if not local_file_path:
15
- return None
16
- token = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
17
- prefixes = ["2023/validation/", "2023/test/", "2023/train/", ""]
18
- for prefix in prefixes:
19
- try:
20
- resolved_path = hf_hub_download(
21
- repo_id="gaia-benchmark/GAIA",
22
- filename=f"{prefix}{local_file_path}",
23
- repo_type="dataset",
24
- token=token
25
- )
26
- return resolved_path
27
- except Exception:
28
- continue
29
- return None
30
-
31
- graph = build_graph()
32
- resp = requests.get(f"{DEFAULT_API_URL}/questions")
33
- questions = resp.json()
34
-
35
- token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
36
- path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
37
- df = pq.read_table(path).to_pandas()
38
- answer_map = dict(zip(df['task_id'], df['Final answer']))
39
-
40
- # Q19
41
- q = questions[18]
42
- task_id = q['task_id']
43
- question = q['question']
44
- file_name = q.get('file_name')
45
- ground_truth = answer_map.get(task_id, "NOT FOUND")
46
-
47
- # Add file path
48
- resolved_path = None
49
- if file_name:
50
- resolved_path = file_extract(file_name, task_id)
51
- if resolved_path:
52
- question += f"\n\n[Attached File Local Path: {resolved_path}]"
53
-
54
- print(f"Q19 File: {file_name}")
55
- print(f"Resolved: {resolved_path}")
56
- print(f"Q19 Question: {question[:100]}...")
57
-
58
- result = graph.invoke({"messages": [HumanMessage(content=question)]})
59
- answer = result['messages'][-1].content
60
- print(f"GT: {ground_truth}")
61
- print(f"Ans: {answer[:80]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
debug_q19_v2.py DELETED
@@ -1,25 +0,0 @@
1
- import os
2
- import requests
3
- from langchain_core.messages import HumanMessage
4
- from agent import build_graph
5
- from huggingface_hub import hf_hub_download
6
- import pyarrow.parquet as pq
7
- from dotenv import load_dotenv
8
-
9
- load_dotenv(override=True)
10
-
11
- graph = build_graph()
12
- resp = requests.get("https://agents-course-unit4-scoring.hf.space/questions")
13
- questions = resp.json()
14
-
15
- # Q19
16
- q = questions[18]
17
- question = q['question']
18
- print(f"Q19: {question}")
19
- print(f"Contains 'excel': {'excel' in question.lower()}")
20
- print(f"Contains 'food': {'food' in question.lower()}")
21
- print(f"Contains 'drinks': {'drinks' in question.lower()}")
22
- print()
23
-
24
- result = graph.invoke({"messages": [HumanMessage(content=question)]})
25
- print(f"Answer: {result['messages'][-1].content}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
debug_q1_q14.py DELETED
@@ -1,18 +0,0 @@
1
- import requests
2
- from langchain_core.messages import HumanMessage
3
- from agent import build_graph
4
-
5
- graph = build_graph()
6
- resp = requests.get('https://agents-course-unit4-scoring.hf.space/questions')
7
- questions = resp.json()
8
-
9
- # Q1
10
- q1 = questions[0]
11
- result = graph.invoke({'messages': [HumanMessage(content=q1['question'])]})
12
- print(f"Q1 answer: {result['messages'][-1].content}")
13
- print()
14
-
15
- # Q14
16
- q14 = questions[13]
17
- result = graph.invoke({'messages': [HumanMessage(content=q14['question'])]})
18
- print(f"Q14 answer: {result['messages'][-1].content}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llm/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from llm.client import invoke_llm, PROVIDER_ORDER
2
+
3
+ __all__ = ["invoke_llm", "PROVIDER_ORDER"]
llm/client.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List
3
+
4
+ from langchain_core.messages import AIMessage
5
+ from llm.providers import PROVIDERS
6
+
7
+ PROVIDER_ORDER = os.getenv("LLM_PROVIDER_ORDER", "gemini_gemma, gemini, groq").split(",")
8
+
9
+ _degraded_providers = {}
10
+
11
+
12
+ def _get_next_provider():
13
+ """Get next available provider in priority order."""
14
+ for name in PROVIDER_ORDER:
15
+ if name not in _degraded_providers:
16
+ yield name
17
+
18
+
19
+ def invoke_llm(messages: List, tools: List, fallback_count: int = 0) -> AIMessage:
20
+ """Invoke LLM with provider fallback.
21
+
22
+ Args:
23
+ messages: Chat messages to send to LLM
24
+ tools: List of tools to bind
25
+ fallback_count: Current retry attempt
26
+
27
+ Returns:
28
+ AIMessage response from successful provider
29
+ """
30
+ provider_name = None
31
+ provider = None
32
+
33
+ for name in _get_next_provider():
34
+ provider_name = name
35
+ provider = PROVIDERS.get(name)
36
+ if provider:
37
+ break
38
+
39
+ if not provider:
40
+ return AIMessage(content="ERROR: No available LLM providers")
41
+
42
+ try:
43
+ models = provider.get_models()
44
+ model_index = min(fallback_count // 3, len(models) - 1)
45
+ model_name = models[model_index]
46
+
47
+ print(f"Invoking {provider_name} with model {model_name}")
48
+ return provider.invoke(messages, tools, model_name)
49
+
50
+ except Exception as e:
51
+ error_msg = str(e).lower()
52
+
53
+ if "rate limit" in error_msg or "429" in error_msg:
54
+ print(f"{provider_name} rate limit hit. Waiting before retry...")
55
+ import time
56
+ wait_time = 10 * (fallback_count + 1)
57
+ time.sleep(wait_time)
58
+
59
+ print(f"{provider_name} failed: {e}. Marking as degraded.")
60
+ _degraded_providers[provider_name] = True
61
+
62
+ remaining = [n for n in PROVIDER_ORDER if n not in _degraded_providers]
63
+ if remaining:
64
+ return invoke_llm(messages, tools, fallback_count + 1)
65
+
66
+ return AIMessage(content=f"ERROR: All LLM providers failed: {e}")
llm/providers/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from llm.providers import gemini, gemini_gemma, groq
2
+
3
+ PROVIDERS = {
4
+ "gemini": gemini,
5
+ "gemini_gemma": gemini_gemma,
6
+ "groq": groq,
7
+ }
8
+
9
+ __all__ = ["PROVIDERS", "gemini", "gemini_gemma", "groq"]
llm/providers/gemini.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_google_genai import ChatGoogleGenerativeAI
2
+
3
+
4
+ def invoke(messages, tools, model_name: str = "gemini-2.0-flash"):
5
+ """Invoke Gemini models (free tier)."""
6
+ model = ChatGoogleGenerativeAI(model=model_name, temperature=0)
7
+ model_with_tools = model.bind_tools(tools)
8
+ return model_with_tools.invoke(messages)
9
+
10
+
11
+ def get_models():
12
+ """List available free tier models (best first)."""
13
+ return ["gemini-2.0-flash", "gemini-2.5-flash", "gemini-1.5-flash"]
llm/providers/gemini_gemma.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_google_genai import ChatGoogleGenerativeAI
2
+
3
+
4
+ def invoke(messages, tools, model_name: str = "gemma-2-27b-it"):
5
+ """Invoke Google Gemma models (free tier)."""
6
+ model = ChatGoogleGenerativeAI(model=model_name, temperature=0)
7
+ model_with_tools = model.bind_tools(tools)
8
+ return model_with_tools.invoke(messages)
9
+
10
+
11
+ def get_models():
12
+ """List available free tier models."""
13
+ return ["gemma-2-27b-it", "gemma-2-9b-it"]
llm/providers/groq.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_groq import ChatGroq
2
+
3
+
4
+ def invoke(messages, tools, model_name: str = "llama-3.3-70b-versatile"):
5
+ """Invoke Groq LLM."""
6
+ model = ChatGroq(model=model_name, temperature=0)
7
+ model_with_tools = model.bind_tools(tools)
8
+ return model_with_tools.invoke(messages)
9
+
10
+
11
+ def get_models():
12
+ """List available Groq models for fallback."""
13
+ return ["llama-3.3-70b-versatile", "llama-3.1-8b-instant"]
quick_test.py DELETED
@@ -1,42 +0,0 @@
1
- import os
2
- import requests
3
- from langchain_core.messages import HumanMessage
4
- from agent import build_graph
5
- from huggingface_hub import hf_hub_download
6
- import pyarrow.parquet as pq
7
- from dotenv import load_dotenv
8
-
9
- load_dotenv(override=True)
10
-
11
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
-
13
- # Initialize agent
14
- graph = build_graph()
15
-
16
- # Fetch 1 question
17
- resp = requests.get(f"{DEFAULT_API_URL}/questions")
18
- questions = resp.json()[:1]
19
-
20
- # Load ground truth
21
- token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
22
- path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
23
- df = pq.read_table(path).to_pandas()
24
- answer_map = dict(zip(df['task_id'], df['Final answer']))
25
-
26
- # Test
27
- q = questions[0]
28
- task_id = q['task_id']
29
- question = q['question']
30
- ground_truth = answer_map.get(task_id, "NOT FOUND")
31
-
32
- print(f"Question: {question[:100]}...")
33
- print(f"Ground Truth: {ground_truth}")
34
- print("-" * 40)
35
-
36
- result = graph.invoke({"messages": [HumanMessage(content=question)]})
37
- answer = result['messages'][-1].content
38
- print(f"Agent Answer: {answer}")
39
- print("-" * 40)
40
-
41
- is_correct = answer.strip().lower() == str(ground_truth).strip().lower()
42
- print(f"Correct: {is_correct}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
quick_test2.py DELETED
@@ -1,17 +0,0 @@
1
- import requests
2
- from langchain_core.messages import HumanMessage
3
- from agent import build_graph
4
-
5
- graph = build_graph()
6
- resp = requests.get('https://agents-course-unit4-scoring.hf.space/questions')
7
- questions = resp.json()
8
-
9
- # Test Q7
10
- q7 = questions[6]
11
- result = graph.invoke({'messages': [HumanMessage(content=q7['question'])]})
12
- print(f'Q7 answer: {result["messages"][-1].content}')
13
-
14
- # Test Q19
15
- q19 = questions[18]
16
- result = graph.invoke({'messages': [HumanMessage(content=q19['question'])]})
17
- print(f'Q19 answer: {result["messages"][-1].content}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
skills-lock.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": 1,
3
+ "skills": {
4
+ "caveman": {
5
+ "source": "mattpocock/skills",
6
+ "sourceType": "github",
7
+ "skillPath": "skills/productivity/caveman/SKILL.md",
8
+ "computedHash": "536908fcfcb232600a5875aa85f1fd50fd13305e9d67379bcd95f07c8c916f3f"
9
+ },
10
+ "diagnose": {
11
+ "source": "mattpocock/skills",
12
+ "sourceType": "github",
13
+ "skillPath": "skills/engineering/diagnose/SKILL.md",
14
+ "computedHash": "1c3c85517ac42116fe5f2bfb5150f7b3e38ad23808e40b33fbb01f1afb611983"
15
+ },
16
+ "grill-me": {
17
+ "source": "mattpocock/skills",
18
+ "sourceType": "github",
19
+ "skillPath": "skills/productivity/grill-me/SKILL.md",
20
+ "computedHash": "daf64ca15f4fa081a6747766db538e2dbd1131725ed4fcdd3d538dc62c7035ba"
21
+ },
22
+ "grill-with-docs": {
23
+ "source": "mattpocock/skills",
24
+ "sourceType": "github",
25
+ "skillPath": "skills/engineering/grill-with-docs/SKILL.md",
26
+ "computedHash": "e95d83038cb68774469932969b060438bc457973657269a479571321c93a9140"
27
+ },
28
+ "improve-codebase-architecture": {
29
+ "source": "mattpocock/skills",
30
+ "sourceType": "github",
31
+ "skillPath": "skills/engineering/improve-codebase-architecture/SKILL.md",
32
+ "computedHash": "2da1d23b8f53cfe67f2e0b68924ab9f4ec400bb6480de097007eeaeb517d1722"
33
+ },
34
+ "setup-matt-pocock-skills": {
35
+ "source": "mattpocock/skills",
36
+ "sourceType": "github",
37
+ "skillPath": "skills/engineering/setup-matt-pocock-skills/SKILL.md",
38
+ "computedHash": "ab6e8143f9237f970435d95e94a0f79703faf125a0b8c583b35ee7fe340eeefe"
39
+ },
40
+ "tdd": {
41
+ "source": "mattpocock/skills",
42
+ "sourceType": "github",
43
+ "skillPath": "skills/engineering/tdd/SKILL.md",
44
+ "computedHash": "78b31b2120c5fe7aced1cebfd4c7c94acb0037fd4f89c83c67584414aa4173bd"
45
+ },
46
+ "to-issues": {
47
+ "source": "mattpocock/skills",
48
+ "sourceType": "github",
49
+ "skillPath": "skills/engineering/to-issues/SKILL.md",
50
+ "computedHash": "7b35050573981106debeb743de355fb18b898660bd643b646aa61a43c3fe1cef"
51
+ },
52
+ "to-prd": {
53
+ "source": "mattpocock/skills",
54
+ "sourceType": "github",
55
+ "skillPath": "skills/engineering/to-prd/SKILL.md",
56
+ "computedHash": "b3ebbc8aad6e91d04aa1b5c0387ce556b32adc8d60d130d61f90a2b84a38addc"
57
+ },
58
+ "triage": {
59
+ "source": "mattpocock/skills",
60
+ "sourceType": "github",
61
+ "skillPath": "skills/engineering/triage/SKILL.md",
62
+ "computedHash": "56ff15b41bbebfa4cb329d96150d9b297c1d919ce30784d883b8755b4bfd8e7e"
63
+ },
64
+ "write-a-skill": {
65
+ "source": "mattpocock/skills",
66
+ "sourceType": "github",
67
+ "skillPath": "skills/productivity/write-a-skill/SKILL.md",
68
+ "computedHash": "3b58a16bde08f84ed490cd449ecdc40289216d660e070c485f53bc2d1ed2b843"
69
+ },
70
+ "zoom-out": {
71
+ "source": "mattpocock/skills",
72
+ "sourceType": "github",
73
+ "skillPath": "skills/engineering/zoom-out/SKILL.md",
74
+ "computedHash": "a8b8ed45609fdfa9f184d0c9f69326e43822a42eebea14db2792d777373de562"
75
+ }
76
+ }
77
+ }
test_react.py DELETED
@@ -1,18 +0,0 @@
1
- from agent import build_graph
2
- from langchain_core.messages import HumanMessage
3
-
4
- def test_agent():
5
- graph = build_graph()
6
- # Simple test: math question that should trigger python_repl
7
- question = "Calculate the square root of 123456789 and multiply it by 42. Provide the final answer."
8
- print(f"Testing with question: {question}")
9
-
10
- messages = [HumanMessage(content=question)]
11
- result = graph.invoke({"messages": messages})
12
-
13
- print("\n--- Final Answer ---")
14
- print(result['messages'][-1].content)
15
- print("--------------------")
16
-
17
- if __name__ == "__main__":
18
- test_agent()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_status.py DELETED
@@ -1,45 +0,0 @@
1
- import os
2
- import requests
3
- import re
4
- from langchain_core.messages import HumanMessage
5
- from agent import build_graph
6
- from huggingface_hub import hf_hub_download
7
- import pyarrow.parquet as pq
8
- from dotenv import load_dotenv
9
-
10
- load_dotenv(override=True)
11
-
12
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
-
14
- def extract_answer(content) -> str:
15
- if isinstance(content, str):
16
- match = re.search(r'FINAL ANSWER:\s*(.+?)(?:\n|$)', content, re.IGNORECASE)
17
- if match:
18
- return match.group(1).strip()
19
- return content.strip()
20
- return str(content)
21
-
22
- graph = build_graph()
23
- resp = requests.get(f"{DEFAULT_API_URL}/questions")
24
- questions = resp.json()
25
-
26
- token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
27
- path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
28
- df = pq.read_table(path).to_pandas()
29
- answer_map = dict(zip(df['task_id'], df['Final answer']))
30
-
31
- # Test all questions to see current state
32
- for i in range(20):
33
- q = questions[i]
34
- task_id = q['task_id']
35
- question = q['question']
36
- ground_truth = answer_map.get(task_id, "NOT FOUND")
37
- file_name = q.get('file_name', '')
38
-
39
- result = graph.invoke({"messages": [HumanMessage(content=question)]})
40
- answer_raw = result['messages'][-1].content
41
- answer = extract_answer(answer_raw)
42
-
43
- is_correct = answer.strip().lower() == str(ground_truth).strip().lower()
44
- status = "OK" if is_correct else "FAIL"
45
- print(f"[Q{i+1:2d}] {status} | GT: {str(ground_truth)[:20]} | Ans: {answer[:20]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/__init__.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tools.web.search import web_search
2
+ from tools.web.wiki import wiki_search
3
+ from tools.web.browse import browse_url
4
+ from tools.file.reader import read_file
5
+ from tools.python import python_repl
6
+ from tools.reverse import reverse_text
7
+ from tools.youtube import get_youtube_transcript
8
+ from tools.audio import transcribe_audio
9
+
10
+ __all__ = [
11
+ web_search,
12
+ wiki_search,
13
+ browse_url,
14
+ read_file,
15
+ python_repl,
16
+ reverse_text,
17
+ get_youtube_transcript,
18
+ transcribe_audio,
19
+ ]
20
+
21
+ tools_by_name = {t.name: t for t in __all__}
tools/audio.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+
3
+
4
+ @tool
5
+ def transcribe_audio(path: str) -> str:
6
+ """Transcribe audio file to text."""
7
+ try:
8
+ import whisper
9
+ model = whisper.load_model("base")
10
+ result = model.transcribe(path)
11
+ return result["text"][:5000] or "NO_TRANSCRIPTION"
12
+ except Exception as e:
13
+ return f"AUDIO_TRANSCRIPTION_ERROR: {e}"
tools/file/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from tools.file.reader import read_file
2
+
3
+ __all__ = [read_file]
tools/file/reader.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import fitz
3
+ from langchain_community.document_loaders import UnstructuredFileLoader
4
+ from langchain_community.document_loaders.image import UnstructuredImageLoader
5
+ from langchain_core.tools import tool
6
+
7
+
8
+ @tool
9
+ def read_file(path: str) -> str:
10
+ """Read a local file using robust parsing for various document types.
11
+ For PDFs, it first tries PyMuPDF (fitz) for high-quality text extraction,
12
+ falling back to UnstructuredFileLoader. For images, it uses UnstructuredImageLoader.
13
+ The content will be truncated to 15000 characters.
14
+ """
15
+ if not path or not os.path.exists(path):
16
+ return "ERROR: File not found"
17
+ try:
18
+ ext = os.path.splitext(path)[1].lower()
19
+ if ext in {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"}:
20
+ loader = UnstructuredImageLoader(path)
21
+ docs = loader.load()
22
+ content = "\n\n".join([doc.page_content for doc in docs])
23
+ elif ext == ".pdf":
24
+ try:
25
+ doc = fitz.open(path)
26
+ content = "\n".join([page.get_text() for page in doc])
27
+ doc.close()
28
+ if not content.strip():
29
+ raise ValueError("No text extracted with fitz")
30
+ except Exception:
31
+ loader = UnstructuredFileLoader(path)
32
+ docs = loader.load()
33
+ content = "\n\n".join([doc.page_content for doc in docs])
34
+ else:
35
+ loader = UnstructuredFileLoader(path)
36
+ docs = loader.load()
37
+ content = "\n\n".join([doc.page_content for doc in docs])
38
+
39
+ return content[:15000] if content else "EMPTY_FILE"
40
+ except Exception as e:
41
+ return f"ERROR: {e}"
tools/python.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from io import StringIO
3
+ from langchain_core.tools import tool
4
+
5
+
6
+ @tool
7
+ def python_repl(code: str) -> str:
8
+ """Execute python code and return the output. Use this for calculations, data analysis, or processing files.
9
+ The code should be a valid python script that prints the final result.
10
+ You can use libraries like pandas, numpy, PIL, etc.
11
+ Example: print(df.head()) or print(2 + 2)"""
12
+ try:
13
+ old_stdout = sys.stdout
14
+ redirected_output = StringIO()
15
+ sys.stdout = redirected_output
16
+ try:
17
+ exec(code, globals())
18
+ finally:
19
+ sys.stdout = old_stdout
20
+ return redirected_output.getvalue().strip() or "Code executed successfully (no output)."
21
+ except Exception as e:
22
+ return f"PYTHON_ERROR: {e}"
tools/reverse.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+
3
+
4
+ @tool
5
+ def reverse_text(text: str) -> str:
6
+ """Reverse the given text."""
7
+ return text[::-1]
tools/web/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from tools.web.search import web_search
2
+ from tools.web.wiki import wiki_search
3
+ from tools.web.browse import browse_url
4
+
5
+ __all__ = [web_search, wiki_search, browse_url]
tools/web/browse.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+
3
+
4
+ @tool
5
+ def browse_url(url: str) -> str:
6
+ """Browse a URL and return its clean text content. Use this to read the full content of a webpage identified by web_search.
7
+ If the page content is too large, it will be truncated.
8
+ """
9
+ try:
10
+ import requests
11
+ from bs4 import BeautifulSoup
12
+ response = requests.get(url, timeout=10, headers={"User-Agent": "mozilla/5.0"})
13
+ response.raise_for_status()
14
+ soup = BeautifulSoup(response.text, 'html.parser')
15
+ for script in soup(['script', 'style', 'nav', 'header', 'footer', 'aside', 'form']):
16
+ script.extract()
17
+ text = soup.get_text()
18
+ lines = (line.strip() for line in text.splitlines())
19
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
20
+ text = '\n'.join(chunk for chunk in chunks if chunk)
21
+ return text[:15000]
22
+ except Exception as e:
23
+ return f"BROWSE_ERROR: {e}"
tools/web/search.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_tavily import TavilySearch
2
+ from langchain_core.tools import tool
3
+
4
+
5
+ @tool
6
+ def web_search(keywords: str) -> str:
7
+ """Search the web using Tavily. This tool performs a concise, focused search to answer factual questions or gather brief information snippets.
8
+ For deeper research or browsing specific URLs, additional tools may be required.
9
+ """
10
+ try:
11
+ tavily = TavilySearch(max_results=5)
12
+ results = tavily.invoke(keywords)
13
+ formatted_results = []
14
+ for r in results:
15
+ formatted_results.append(f"Title: {r['title']}\nURL: {r['url']}\nContent: {r['content'][:300]}")
16
+ return "\n".join(formatted_results) or "NO_RESULTS"
17
+ except Exception as e:
18
+ return f"SEARCH_ERROR: {e}"
tools/web/wiki.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import WikipediaLoader
2
+ from langchain_core.tools import tool
3
+
4
+
5
+ @tool
6
+ def wiki_search(query: str) -> str:
7
+ """Search Wikipedia."""
8
+ try:
9
+ docs = WikipediaLoader(query=query, load_max_docs=2).load()
10
+ return "\n".join([f"{d.metadata.get('title', 'Unknown')}: {d.page_content[:500]}" for d in docs]) or "NO_RESULTS"
11
+ except Exception as e:
12
+ return f"WIKI_ERROR: {e}"
tools/youtube.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import tempfile
3
+ from pathlib import Path
4
+ from langchain_core.tools import tool
5
+
6
+
7
+ @tool
8
+ def get_youtube_transcript(url: str) -> str:
9
+ """Get YouTube transcript."""
10
+ try:
11
+ with tempfile.TemporaryDirectory() as tmp:
12
+ cmd = ["yt-dlp", "--skip-download", "--write-auto-subs", "--sub-lang", "en", "-o", f"{tmp}/video", url]
13
+ subprocess.run(cmd, capture_output=True, timeout=60)
14
+ vtt_files = list(Path(tmp).glob("*.vtt"))
15
+ if vtt_files:
16
+ content = vtt_files[0].read_text(encoding="utf-8", errors="replace")
17
+ lines = [l for l in content.splitlines() if l and not l.startswith(('<', '-->', 'WEBVTT')) and not l.isdigit()]
18
+ return "\n".join(lines)[:15000] or "NO_TRANSCRIPT"
19
+ return "NO_SUBTITLES"
20
+ except Exception as e:
21
+ return f"TRANSCRIPT_ERROR: {e}"
trace_q19.py DELETED
@@ -1,32 +0,0 @@
1
- import os
2
- import requests
3
- from langchain_core.messages import HumanMessage
4
- from agent import build_graph
5
- from huggingface_hub import hf_hub_download
6
- import pyarrow.parquet as pq
7
- from dotenv import load_dotenv
8
-
9
- load_dotenv(override=True)
10
-
11
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
-
13
- graph = build_graph()
14
- resp = requests.get(f"{DEFAULT_API_URL}/questions")
15
- questions = resp.json()
16
-
17
- token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
18
- path = hf_hub_download(repo_id='gaia-benchmark/GAIA', filename='2023/validation/metadata.parquet', repo_type='dataset', token=token)
19
- df = pq.read_table(path).to_pandas()
20
- answer_map = dict(zip(df['task_id'], df['Final answer']))
21
-
22
- # Q19 with trace
23
- q = questions[18]
24
- question = q['question']
25
-
26
- result = graph.invoke({"messages": [HumanMessage(content=question)]})
27
-
28
- # Print messages
29
- for i, msg in enumerate(result['messages']):
30
- if hasattr(msg, 'content'):
31
- content = msg.content[:400] if len(msg.content) > 400 else msg.content
32
- print(f"\nMsg {i}: {content}")