Commit
·
15a3001
1
Parent(s):
da5af70
Updated space
Browse filesAdded:
- transciber tools
- GeminiAgent
- LocalAgent (not used in space)
- agent.py +245 -63
- app.py +23 -73
- requirements.txt +14 -0
- tools/audio_tools.py +78 -0
agent.py
CHANGED
|
@@ -1,10 +1,21 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
| 2 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
|
|
|
| 4 |
from smolagents import (
|
| 5 |
CodeAgent,
|
| 6 |
-
InferenceClientModel,
|
|
|
|
|
|
|
| 7 |
Tool,
|
|
|
|
| 8 |
DuckDuckGoSearchTool,
|
| 9 |
VisitWebpageTool,
|
| 10 |
WikipediaSearchTool,
|
|
@@ -15,66 +26,237 @@ from smolagents import (
|
|
| 15 |
# Import your custom tools (to be used in app, not in local notebook)
|
| 16 |
from tools.download_file import download_file_from_url
|
| 17 |
from tools.files_to_text import image_to_text, pdf_to_text, text_file_to_string
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Generic agent
|
| 3 |
+
import os
|
| 4 |
+
from typing import Optional
|
| 5 |
import pandas as pd
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
# Local agent specific
|
| 9 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 10 |
|
| 11 |
+
# Smolagents imports
|
| 12 |
from smolagents import (
|
| 13 |
CodeAgent,
|
| 14 |
+
InferenceClientModel,
|
| 15 |
+
TransformersModel,
|
| 16 |
+
LiteLLMModel,
|
| 17 |
Tool,
|
| 18 |
+
tool,
|
| 19 |
DuckDuckGoSearchTool,
|
| 20 |
VisitWebpageTool,
|
| 21 |
WikipediaSearchTool,
|
|
|
|
| 26 |
# Import your custom tools (to be used in app, not in local notebook)
|
| 27 |
from tools.download_file import download_file_from_url
|
| 28 |
from tools.files_to_text import image_to_text, pdf_to_text, text_file_to_string
|
| 29 |
+
from tools.audio_tools import youtube_to_text, transcribe_audio
|
| 30 |
+
|
| 31 |
+
# Define tools
|
| 32 |
+
AGENT_TOOLS = [
|
| 33 |
+
# Default Tools
|
| 34 |
+
DuckDuckGoSearchTool(), # Internet search
|
| 35 |
+
VisitWebpageTool(), # Retrieve webpage content
|
| 36 |
+
PythonInterpreterTool(), # Executes agent-generated Python code
|
| 37 |
+
FinalAnswerTool(), # Ends agent reasoning and returns final answer
|
| 38 |
+
|
| 39 |
+
# Custom Tools
|
| 40 |
+
download_file_from_url, # file downloader
|
| 41 |
+
text_file_to_string, # .txt, .md, .json, etc.
|
| 42 |
+
pdf_to_text, # PyMuPDF-based safe PDF parser
|
| 43 |
+
image_to_text, # OCR for images
|
| 44 |
+
youtube_to_text, # Youtube audio to text
|
| 45 |
+
transcribe_audio, # Audio file to text
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
# System prompt
|
| 49 |
+
SYSTEM_PROMPT = """
|
| 50 |
+
You are an expert **General AI Assistant** and **Python Programmer** tasked with solving complex GAIA benchmark problems.
|
| 51 |
+
|
| 52 |
+
### 1. Reason-Act-Observe
|
| 53 |
+
Follow a **PLAN → ACT → OBSERVE** loop:
|
| 54 |
+
- **PLAN:** Break the task into 1–3 logical steps. Identify tools for each step.
|
| 55 |
+
- **ACT:** Write and run one self-contained Python block per step.
|
| 56 |
+
- **OBSERVE:** Examine outputs or errors before proceeding.
|
| 57 |
+
|
| 58 |
+
### 2. File Handling
|
| 59 |
+
- When a tool like `download_file_from_url` returns a local file path (e.g., `/tmp/data.csv`), you **MUST** save this path to a descriptive variable (e.g., `filepath`) and **immediately use that variable** as the argument for the next file-reading tool.
|
| 60 |
+
|
| 61 |
+
You must select the reading or transcription method **strictly** based on the file type or source, following the rules below.
|
| 62 |
+
|
| 63 |
+
| File Type / Source | Tool / Method to Use |
|
| 64 |
+
| :--- | :--- |
|
| 65 |
+
| `.csv` | `pd.read_csv(filepath)` |
|
| 66 |
+
| `.xlsx`, `.xls` | `pd.read_excel(filepath)` |
|
| 67 |
+
| `.pdf` | `pdf_to_text(filepath)` |
|
| 68 |
+
| `.txt`, `.md`, `.json` | `text_file_to_string(filepath)` |
|
| 69 |
+
| `.png`, `.jpg`, `.jpeg` | `image_to_text(filepath)` |
|
| 70 |
+
| **YouTube URL** | `youtube_to_text(url)` |
|
| 71 |
+
| `.mp3`, `.wav`, `.m4a`, `.flac`, `.ogg` | `transcribe_audio(filepath)` |
|
| 72 |
+
|
| 73 |
+
**Important rules:**
|
| 74 |
+
- When a tool returns a local file path, you **must** store it in a variable (e.g. `filepath`) and pass that variable directly to the next tool.
|
| 75 |
+
- You must **not** mix methods across file types (e.g. do not use Whisper for CSVs or pandas for audio).
|
| 76 |
+
- For YouTube links, always attempt `youtube_to_text` first; it will automatically fall back to Whisper if captions are unavailable.
|
| 77 |
|
| 78 |
+
### 3. Data Analysis & Answer
|
| 79 |
+
- Inspect loaded datasets first (`.head()`, `.info()`, `.describe()`) before analysis.
|
| 80 |
+
- Write clean, idiomatic Python code. Before that, check if there is any pre-made tool that would work for the task.
|
| 81 |
+
- Use `FinalAnswerTool` **only once the problem is fully solved** to give a concise final answer.
|
| 82 |
+
|
| 83 |
+
### 4. Additional instructions for the following tasks provided by GAIA team
|
| 84 |
+
- You are a general AI assistant. I will ask you a question. Do not reveal your internal reasoning. Only the content inside FinalAnswerTool will be evaluated.
|
| 85 |
+
- Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
| 86 |
+
|
| 87 |
+
### 5. To provide the final answer, you MUST call the final_answer tool inside a <code> block.
|
| 88 |
+
|
| 89 |
+
- Example of how to end the task:
|
| 90 |
+
|
| 91 |
+
Thought: I have found the answer. I will now provide it.
|
| 92 |
+
<code>
|
| 93 |
+
final_answer("FINAL ANSWER: The capital of France is Paris")
|
| 94 |
+
</code>
|
| 95 |
+
|
| 96 |
+
\n\n
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
class BasicAgent:
|
| 100 |
+
def __init__(self):
|
| 101 |
+
self.system_prompt = SYSTEM_PROMPT
|
| 102 |
+
self.model = InferenceClientModel(
|
| 103 |
+
model_id = "Qwen/Qwen3-Next-80B-A3B-Thinking",
|
| 104 |
+
temperature = 0.0,
|
| 105 |
+
top_p = 1.0,
|
| 106 |
+
max_tokens = 8196,
|
| 107 |
+
)
|
| 108 |
+
self.tools = AGENT_TOOLS
|
| 109 |
+
self.basic_agent = CodeAgent(
|
| 110 |
+
name = "basic_agent",
|
| 111 |
+
description = "Basic smolagents CodeAgent",
|
| 112 |
+
model = self.model,
|
| 113 |
+
tools = self.tools,
|
| 114 |
+
add_base_tools = True, # probably redundant, but it does not hurt
|
| 115 |
+
max_steps = 5,
|
| 116 |
+
additional_authorized_imports = [
|
| 117 |
+
'numpy','subprocess', 're', 'pandas',
|
| 118 |
+
'json', 'os', 'datetime', 'tempfile',
|
| 119 |
+
],
|
| 120 |
+
verbosity_level = 1,
|
| 121 |
+
max_print_outputs_length=1_000_000
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
print("✅ Basic agent initialized")
|
| 125 |
+
|
| 126 |
+
def __call__(self, question: str, file_path: Optional[str] = None) -> str:
|
| 127 |
+
|
| 128 |
+
if file_path:
|
| 129 |
+
# Inject system prompt + question and (optional) file path
|
| 130 |
+
prompt = (
|
| 131 |
+
f"{self.system_prompt}\n\n"
|
| 132 |
+
f"Question: {question}\n\n"
|
| 133 |
+
f"There is an associated file at path: {file_path}.\n"
|
| 134 |
+
f"Use the appropriate tool to download it (if necessary) and read it before answering"
|
| 135 |
+
)
|
| 136 |
+
else:
|
| 137 |
+
prompt = (
|
| 138 |
+
f"{self.system_prompt}\n\n"
|
| 139 |
+
f"Question: {question}\n\n"
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
return self.basic_agent.run(prompt)
|
| 143 |
|
| 144 |
+
class GeminiAgent:
|
| 145 |
+
def __init__(self):
|
| 146 |
+
self.system_prompt = SYSTEM_PROMPT
|
| 147 |
+
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
|
| 148 |
+
if not GOOGLE_API_KEY:
|
| 149 |
+
raise RuntimeError(
|
| 150 |
+
"GOOGLE_API_KEY not found."
|
| 151 |
+
)
|
| 152 |
+
self.model = LiteLLMModel(
|
| 153 |
+
model_id = "gemini/gemini-2.0-flash",
|
| 154 |
+
api_key = GOOGLE_API_KEY,
|
| 155 |
+
temperature = 0.0,
|
| 156 |
+
top_p = 1.0,
|
| 157 |
+
max_tokens = 8196,
|
| 158 |
+
)
|
| 159 |
+
self.tools = AGENT_TOOLS
|
| 160 |
+
self.gemini_agent = CodeAgent(
|
| 161 |
+
name = "gemini_agent",
|
| 162 |
+
description = "Gemini CodeAgent",
|
| 163 |
+
model = self.model,
|
| 164 |
+
tools = self.tools,
|
| 165 |
+
add_base_tools = True, # probably redundant, but it does not hurt
|
| 166 |
+
max_steps = 5,
|
| 167 |
+
additional_authorized_imports = [
|
| 168 |
+
'numpy','subprocess', 're', 'pandas',
|
| 169 |
+
'json', 'os', 'datetime', 'tempfile',
|
| 170 |
+
],
|
| 171 |
+
verbosity_level = 1,
|
| 172 |
+
max_print_outputs_length=1_000_000
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
print("✅ Gemini agent initialized")
|
| 176 |
+
|
| 177 |
+
def __call__(self, question: str, file_path: Optional[str] = None) -> str:
|
| 178 |
+
|
| 179 |
+
if file_path:
|
| 180 |
+
# Inject system prompt + question and (optional) file path
|
| 181 |
+
prompt = (
|
| 182 |
+
f"{self.system_prompt}\n\n"
|
| 183 |
+
f"Question: {question}\n\n"
|
| 184 |
+
f"There is an associated file at path: {file_path}.\n"
|
| 185 |
+
f"Use the appropriate tool to download it (if necessary) and read it before answering"
|
| 186 |
+
)
|
| 187 |
+
else:
|
| 188 |
+
prompt = (
|
| 189 |
+
f"{self.system_prompt}\n\n"
|
| 190 |
+
f"Question: {question}\n\n"
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
return self.gemini_agent.run(prompt)
|
| 194 |
+
|
| 195 |
+
class LocalAgent:
|
| 196 |
+
def __init__(self):
|
| 197 |
+
checkpoint = "Qwen/Qwen2.5-7B-Instruct"
|
| 198 |
+
quantized_model_dir = "./quantized_model"
|
| 199 |
+
|
| 200 |
+
# Define the quantized configuration
|
| 201 |
+
bnb_config = BitsAndBytesConfig(
|
| 202 |
+
load_in_4bit = True,
|
| 203 |
+
bnb_4bit_quant_type = "nf4",
|
| 204 |
+
bnb_4bit_compute_dtype = torch.bfloat16,
|
| 205 |
+
bnb_4bit_use_double_quant = True,
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
# Load quantized model and tokenizer
|
| 209 |
+
temp_model = AutoModelForCausalLM.from_pretrained(
|
| 210 |
+
checkpoint,
|
| 211 |
+
quantization_config = bnb_config,
|
| 212 |
+
device_map="auto" # use multiple GPUs if available
|
| 213 |
+
)
|
| 214 |
+
temp_tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
| 215 |
+
|
| 216 |
+
# Save the model in local path (seems like it's the only way to make it work with TransformersModel)
|
| 217 |
+
temp_model.save_pretrained(quantized_model_dir)
|
| 218 |
+
temp_tokenizer.save_pretrained(quantized_model_dir)
|
| 219 |
+
|
| 220 |
+
self.system_prompt = SYSTEM_PROMPT
|
| 221 |
+
|
| 222 |
+
self.model = TransformersModel(
|
| 223 |
+
model_path = quantized_model_dir,
|
| 224 |
+
temperature = 0.1,
|
| 225 |
+
top_p = 0.95,
|
| 226 |
+
device_map = "auto",
|
| 227 |
+
max_new_tokens = 8196 # https://github.com/huggingface/smolagents/issues/414#:~:text=Running%20with%20TransformersModel%20does%20not%20work
|
| 228 |
+
)
|
| 229 |
+
self.tools = AGENT_TOOLS
|
| 230 |
+
|
| 231 |
+
self.local_agent = CodeAgent(
|
| 232 |
+
model=self.model,
|
| 233 |
+
tools=tools,
|
| 234 |
+
add_base_tools=True, # probably redundant, but it does not hurt
|
| 235 |
+
max_steps=5,
|
| 236 |
+
additional_authorized_imports = ['numpy','subprocess', 're', 'pandas',
|
| 237 |
+
'json', 'os', 'pathlib', 'tempfile',
|
| 238 |
+
# 'matplotlib.pyplot', 'seaborn'
|
| 239 |
+
],
|
| 240 |
+
verbosity_level = 1,
|
| 241 |
+
max_print_outputs_length=1_000_000
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
print("✅ Local (quantized) agent initialized.")
|
| 245 |
+
|
| 246 |
+
def __call__(self, question: str, file_path: Optional[str] = None) -> str:
|
| 247 |
+
|
| 248 |
+
if file_path:
|
| 249 |
+
# Inject system prompt + question and (optional) file path
|
| 250 |
+
prompt = (
|
| 251 |
+
f"{self.system_prompt}\n\n"
|
| 252 |
+
f"Question: {question}\n\n"
|
| 253 |
+
f"There is an associated file at path: {file_path}.\n"
|
| 254 |
+
f"Use the appropriate tool to download it (if necessary) and read it before answering"
|
| 255 |
+
)
|
| 256 |
+
else:
|
| 257 |
+
prompt = (
|
| 258 |
+
f"{self.system_prompt}\n\n"
|
| 259 |
+
f"Question: {question}\n\n"
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
return self.local_agent.run(prompt)
|
app.py
CHANGED
|
@@ -1,80 +1,18 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
| 6 |
-
from agent import
|
| 7 |
from typing import Optional
|
| 8 |
|
|
|
|
|
|
|
| 9 |
# (Keep Constants as is)
|
| 10 |
# --- Constants ---
|
| 11 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 12 |
|
| 13 |
-
# --- Basic Agent Definition ---
|
| 14 |
-
class BasicAgent:
|
| 15 |
-
def __init__(self):
|
| 16 |
-
self.agent = create_agent()
|
| 17 |
-
self.system_prompt = """
|
| 18 |
-
You are an expert **General AI Assistant** and **Python Programmer** tasked with solving complex GAIA benchmark problems.
|
| 19 |
-
|
| 20 |
-
### 1. Reason-Act-Observe
|
| 21 |
-
Follow a **PLAN → ACT → OBSERVE** loop:
|
| 22 |
-
- **PLAN:** Break the task into 1–3 logical steps. Identify tools for each step.
|
| 23 |
-
- **ACT:** Write and run one self-contained Python block per step.
|
| 24 |
-
- **OBSERVE:** Examine outputs or errors before proceeding.
|
| 25 |
-
|
| 26 |
-
### 2. File Handling
|
| 27 |
-
- When a tool like `download_file_from_url` returns a local file path (e.g., `/tmp/data.csv`), you **MUST** save this path to a descriptive variable (e.g., `filepath`) and **immediately use that variable** as the argument for the next file-reading tool.
|
| 28 |
-
|
| 29 |
-
You must select the reading method based strictly on the file extension:
|
| 30 |
-
| File Extension | Tool / Method to Use |
|
| 31 |
-
| :--- | :--- |
|
| 32 |
-
| .csv | `pd.read_csv(filepath)` |
|
| 33 |
-
| .xlsx, .xls | `pd.read_excel(filepath)` |
|
| 34 |
-
| .pdf | `pdf_to_text(filepath)` |
|
| 35 |
-
| .txt, .md, .json | `text_file_to_string(filepath)` |
|
| 36 |
-
| .png, .jpg, .jpeg | `image_to_text(filepath)` |
|
| 37 |
-
|
| 38 |
-
### 3. Data Analysis & Answer
|
| 39 |
-
- Inspect loaded datasets first (`.head()`, `.info()`, `.describe()`) before analysis.
|
| 40 |
-
- Write clean, idiomatic Python code. Before that, check if there is any pre-made tool that would work for the task.
|
| 41 |
-
- Use `FinalAnswerTool` **only once the problem is fully solved** to give a concise final answer.
|
| 42 |
-
|
| 43 |
-
### 4. Additional instructions for the following tasks provided by GAIA team
|
| 44 |
-
- You are a general AI assistant. I will ask you a question. Do not reveal your internal reasoning. Only the content inside FinalAnswerTool will be evaluated.
|
| 45 |
-
- Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
|
| 46 |
-
|
| 47 |
-
### 5. To provide the final answer, you MUST call the final_answer tool inside a <code> block.
|
| 48 |
-
|
| 49 |
-
- Example of how to end the task:
|
| 50 |
-
|
| 51 |
-
Thought: I have found the answer. I will now provide it.
|
| 52 |
-
<code>
|
| 53 |
-
final_answer("FINAL ANSWER: The capital of France is Paris")
|
| 54 |
-
</code>
|
| 55 |
-
|
| 56 |
-
\n\n
|
| 57 |
-
"""
|
| 58 |
-
# print("Agent initialized.")
|
| 59 |
-
|
| 60 |
-
def __call__(self, question: str, file_path: Optional[str] = None) -> str:
|
| 61 |
-
|
| 62 |
-
if file_path:
|
| 63 |
-
# Inject system prompt + question and (optional) file path
|
| 64 |
-
prompt = (
|
| 65 |
-
f"{self.system_prompt}\n\n"
|
| 66 |
-
f"Question: {question}\n\n"
|
| 67 |
-
f"There is an associated file at path: {file_path}.\n"
|
| 68 |
-
f"Use the appropriate tool to download it (if necessary) and read it before answering"
|
| 69 |
-
)
|
| 70 |
-
else:
|
| 71 |
-
prompt = (
|
| 72 |
-
f"{self.system_prompt}\n\n"
|
| 73 |
-
f"Question: {question}\n\n"
|
| 74 |
-
)
|
| 75 |
-
|
| 76 |
-
return self.agent.run(prompt)
|
| 77 |
-
|
| 78 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 79 |
"""
|
| 80 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
|
@@ -84,7 +22,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 84 |
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
| 85 |
|
| 86 |
if profile:
|
| 87 |
-
username= f"{profile.username}"
|
| 88 |
print(f"User logged in: {username}")
|
| 89 |
else:
|
| 90 |
print("User not logged in.")
|
|
@@ -94,15 +32,25 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 94 |
questions_url = f"{api_url}/questions"
|
| 95 |
submit_url = f"{api_url}/submit"
|
| 96 |
|
| 97 |
-
# 1. Instantiate Agent (
|
| 98 |
try:
|
| 99 |
-
agent =
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
# In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
|
| 104 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
| 105 |
-
print(agent_code)
|
|
|
|
| 106 |
|
| 107 |
# 2. Fetch Questions
|
| 108 |
print(f"Fetching questions from: {questions_url}")
|
|
@@ -139,6 +87,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 139 |
submitted_answer = agent(question_text)
|
| 140 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 141 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
|
|
|
|
|
|
| 142 |
except Exception as e:
|
| 143 |
print(f"Error running agent on task {task_id}: {e}")
|
| 144 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
|
|
|
| 1 |
import os
|
| 2 |
+
import time
|
| 3 |
import gradio as gr
|
| 4 |
import requests
|
| 5 |
import inspect
|
| 6 |
import pandas as pd
|
| 7 |
+
from agent import BasicAgent, GeminiAgent
|
| 8 |
from typing import Optional
|
| 9 |
|
| 10 |
+
# (ASK ABOUT ALIGNMENT BETWEEN TIMEOUT ARGUMENTS AND TIME.SLEEP)
|
| 11 |
+
|
| 12 |
# (Keep Constants as is)
|
| 13 |
# --- Constants ---
|
| 14 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 17 |
"""
|
| 18 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
|
|
|
| 22 |
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
| 23 |
|
| 24 |
if profile:
|
| 25 |
+
username = f"{profile.username}"
|
| 26 |
print(f"User logged in: {username}")
|
| 27 |
else:
|
| 28 |
print("User not logged in.")
|
|
|
|
| 32 |
questions_url = f"{api_url}/questions"
|
| 33 |
submit_url = f"{api_url}/submit"
|
| 34 |
|
| 35 |
+
# 1. Instantiate Agent (modify this part to create your agent)
|
| 36 |
try:
|
| 37 |
+
agent = GeminiAgent()
|
| 38 |
+
agent_type = "GeminiAgent"
|
| 39 |
+
except Exception as main_agent_error:
|
| 40 |
+
print(f"{agent_type} failed to initialize: {main_agent_error}.")
|
| 41 |
+
try:
|
| 42 |
+
agent = BasicAgent()
|
| 43 |
+
agent_type = "BasicAgent"
|
| 44 |
+
print(f"Falling back to {agent_type}.")
|
| 45 |
+
except Exception as secondary_agent_error:
|
| 46 |
+
print(f"{agent_type} failed to initialize: {secondary_agent_error}.")
|
| 47 |
+
agent_type = "None"
|
| 48 |
+
return f"Error initializing agent: {e}", None
|
| 49 |
+
|
| 50 |
# In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
|
| 51 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
| 52 |
+
print(f"Agent code: {agent_code}")
|
| 53 |
+
print(f"Active agent: {agent_code}")
|
| 54 |
|
| 55 |
# 2. Fetch Questions
|
| 56 |
print(f"Fetching questions from: {questions_url}")
|
|
|
|
| 87 |
submitted_answer = agent(question_text)
|
| 88 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
| 89 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
| 90 |
+
|
| 91 |
+
time.sleep(60) # to not exceed free limits
|
| 92 |
except Exception as e:
|
| 93 |
print(f"Error running agent on task {task_id}: {e}")
|
| 94 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
requirements.txt
CHANGED
|
@@ -19,5 +19,19 @@ Pillow==11.3.0
|
|
| 19 |
pdfplumber==0.11.8
|
| 20 |
PyMuPDF==1.26.7
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# OCR (OPTIONAL, disabled)
|
| 23 |
# pytesseract==0.3.13
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
pdfplumber==0.11.8
|
| 20 |
PyMuPDF==1.26.7
|
| 21 |
|
| 22 |
+
# Audio transcriber
|
| 23 |
+
youtube-transcript-api==1.2.3
|
| 24 |
+
pytubefix==10.3.6
|
| 25 |
+
openai-whisper==20250625
|
| 26 |
+
|
| 27 |
# OCR (OPTIONAL, disabled)
|
| 28 |
# pytesseract==0.3.13
|
| 29 |
+
|
| 30 |
+
# Additional for LocalAgent (optional)
|
| 31 |
+
!pip install transformers==4.1.0
|
| 32 |
+
!pip install bitsandbytes==0.49.0
|
| 33 |
+
!pip install \
|
| 34 |
+
torch==2.6.0+cu124 \
|
| 35 |
+
torchvision \
|
| 36 |
+
torchaudio \
|
| 37 |
+
--extra-index-url https://download.pytorch.org/whl/cu124
|
tools/audio_tools.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from smolagents import tool
|
| 2 |
+
import tempfile
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
@tool
|
| 6 |
+
def youtube_to_text(url: str) -> str:
|
| 7 |
+
"""
|
| 8 |
+
Transcribe a YouTube video.
|
| 9 |
+
First tries to retrieve official captions.
|
| 10 |
+
Falls back to Whisper transcription if captions are unavailable.
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
url: Full YouTube video URL
|
| 14 |
+
|
| 15 |
+
Returns:
|
| 16 |
+
Transcribed text
|
| 17 |
+
"""
|
| 18 |
+
# ---- Step 1: Try official YouTube transcripts ----
|
| 19 |
+
try:
|
| 20 |
+
from youtube_transcript_api import YouTubeTranscriptApi
|
| 21 |
+
from urllib.parse import urlparse, parse_qs
|
| 22 |
+
|
| 23 |
+
query = parse_qs(orlparse(url).query)
|
| 24 |
+
video_id = query.get("v", [None])[0]
|
| 25 |
+
|
| 26 |
+
if video_id:
|
| 27 |
+
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
| 28 |
+
text = " ".join([chunk["text"] for chunk in transcript])
|
| 29 |
+
return text
|
| 30 |
+
except Exception:
|
| 31 |
+
pass # Silent fallback to Whisper
|
| 32 |
+
|
| 33 |
+
# ---- Step 2: Fallback to Whisper transcription ----
|
| 34 |
+
try:
|
| 35 |
+
import whisper
|
| 36 |
+
from pytubefix import YouTube
|
| 37 |
+
|
| 38 |
+
yt = YouTube(url)
|
| 39 |
+
audio_stream = yt.streams.get_audio_only()
|
| 40 |
+
|
| 41 |
+
temp_dir = tempfile.gettempdir()
|
| 42 |
+
audio_path = audio_stream.download(output_path=temp_dir)
|
| 43 |
+
|
| 44 |
+
model = whisper.load_model("base")
|
| 45 |
+
result = model.transcribe(audio_path)
|
| 46 |
+
|
| 47 |
+
return result["text"]
|
| 48 |
+
|
| 49 |
+
except Exception as e:
|
| 50 |
+
return f"Error transcribing YouTube video: {str(e)}"
|
| 51 |
+
|
| 52 |
+
@tool
|
| 53 |
+
def transcribe_audio(file_path: str) -> str:
|
| 54 |
+
"""
|
| 55 |
+
Transcribes audio files into text using the Whisper model.
|
| 56 |
+
Supports multiple formats including .mp3, .wav, .m4a, .flac, and .ogg.
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
file_path: The local path to the audio file to be transcribed.
|
| 60 |
+
|
| 61 |
+
Returns:
|
| 62 |
+
The transcribed text as a string.
|
| 63 |
+
"""
|
| 64 |
+
try:
|
| 65 |
+
import whisper
|
| 66 |
+
|
| 67 |
+
model = whisper.load_model("base")
|
| 68 |
+
result = model.transcribe(file_path)
|
| 69 |
+
|
| 70 |
+
return result["text"]
|
| 71 |
+
|
| 72 |
+
except ImportError:
|
| 73 |
+
return (
|
| 74 |
+
"Whisper is not installed. "
|
| 75 |
+
"Install it with `pip install openai-whisper` and ensure ffmpeg is available."
|
| 76 |
+
)
|
| 77 |
+
except Exception as e:
|
| 78 |
+
return f"Error transcribing audio file: {str(e)}"
|