VicBeltran commited on
Commit
de814df
·
1 Parent(s): 81917a3

working agent local version

Browse files
__pycache__/agent.cpython-310.pyc ADDED
Binary file (10.9 kB). View file
 
__pycache__/agent_langchain.cpython-310.pyc ADDED
Binary file (7.26 kB). View file
 
agent.py ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import wiki
3
+ import torch
4
+ import logging
5
+ import requests
6
+ import wikipedia
7
+ import pytesseract
8
+ import pandas as pd
9
+ from PIL import Image
10
+ from io import BytesIO
11
+ import soundfile as sf
12
+ from pytube import YouTube
13
+ from yt_dlp import YoutubeDL
14
+ from transformers import (
15
+ AutoModelForCausalLM,
16
+ AutoTokenizer,
17
+ BitsAndBytesConfig,
18
+ pipeline,
19
+ )
20
+ from smolagents import (
21
+ CodeAgent,
22
+ DuckDuckGoSearchTool,
23
+ PythonInterpreterTool,
24
+ HfApiModel,
25
+ LiteLLMModel,
26
+ Tool,
27
+ TransformersModel
28
+ )
29
+
30
+ model = LiteLLMModel(
31
+ model_id="ollama_chat/qwen3:14b",
32
+ api_base="http://127.0.0.1:11434",
33
+ num_ctx=8192
34
+ )
35
+ #bnb_config = BitsAndBytesConfig(load_in_8bit=True)
36
+ #tokenizer = AutoTokenizer.from_pretrained(model_id)
37
+
38
+ # model = TransformersModel(
39
+ # model_id=model_id,
40
+ # torch_dtype="bfloat16",
41
+ # device_map="cuda",
42
+ # trust_remote_code=True,
43
+ # max_new_tokens=2048
44
+ # )
45
+
46
+ #model = torch.compile(model, mode="default")
47
+ from whisper import load_model as load_whisper
48
+
49
+ whisper_model = load_whisper("small")
50
+ logging.basicConfig(level=logging.INFO)
51
+ logger = logging.getLogger(__name__)
52
+
53
+ # ——————————————————————————————————————————————————————————
54
+ # 1) GAIA system prompt
55
+ # ——————————————————————————————————————————————————————————
56
+ GAIA_SYSTEM_PROMPT = """
57
+ You are a general AI assistant. I will ask you a question.
58
+ Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
59
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
60
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
61
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
62
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
63
+ All question related files if existant are given to you below as: AXULIARY FILE FOR QUESTION: [FILE_PATH]
64
+ """
65
+
66
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
67
+ AUDIO_FILES = ["wav", "mp3", "aac", "ogg"]
68
+ IMAGE_FILES = ["png", "jpg", "tiff", "jpeg", "bmp"]
69
+ TABULAR_FILES = ["csv", "xlsx"]
70
+ # ——————————————————————————————————————————————————————————
71
+ # 2) Custom tools
72
+ # ——————————————————————————————————————————————————————————
73
+ # --- File handler ---
74
+ def file_handler(task_id: str, file_name: str):
75
+ try:
76
+ response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
77
+ response.raise_for_status()
78
+ data = response.content
79
+ ext = file_name.split('.')[-1].lower()
80
+ return data, ext
81
+ except Exception as e:
82
+ logger.error(f"Failed to fetch file: {e}")
83
+ raise
84
+
85
+
86
+ def fetch_file(args: str) -> str:
87
+ """
88
+ Download a binary blob by task_id,file_name via file_handler,
89
+ save it under ./tmp/, and return the local filesystem path.
90
+ Args:
91
+ args: "task_id, file_name"
92
+ """
93
+ task_id, file_name = [x.strip() for x in args.split(',')]
94
+ data, ext = file_handler(task_id, file_name)
95
+ local_path = f"./tmp/{task_id}.{ext}"
96
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
97
+ with open(local_path, 'wb') as f:
98
+ f.write(data)
99
+ return local_path
100
+
101
+ class TranscriptionTool(Tool):
102
+ name = "TranscriptionTool"
103
+ description = """
104
+ This tool transcribes spoken content from local audio files such as .wav or .mp3.
105
+ It uses OpenAI's Whisper model to convert speech to text.
106
+ It expects a file path to the audio file and returns a string containing the transcription.
107
+ To call the tool on code just use TranscriptionTool(path).
108
+ """
109
+
110
+ inputs = {
111
+ "path": {
112
+ "type": "string",
113
+ "description": "The path to a local audio file (.wav, .mp3, etc.)"
114
+ }
115
+ }
116
+ output_type = "string"
117
+
118
+ def forward(self, path: str) -> str:
119
+ data, sr = sf.read(path, dtype='float32')
120
+ res = whisper_model.transcribe(data, language='en')
121
+ return f"The transcribed audio text is: {res['text']}\n"
122
+
123
+ class OCRTool(Tool):
124
+ name = "OCRTool"
125
+ description = """
126
+ This tool extracts text from images using Tesseract OCR.
127
+ It takes a path to an image file (e.g., .png or .jpg) and returns any readable text found in the image.
128
+ To call the tool on code just use OCRTool(path).
129
+ """
130
+
131
+ inputs = {
132
+ "path": {
133
+ "type": "string",
134
+ "description": "The path to a local image file (.png, .jpg, etc.)"
135
+ }
136
+ }
137
+ output_type = "string"
138
+
139
+ def forward(self, path: str) -> str:
140
+ img = Image.open(path)
141
+ text = pytesseract.image_to_string(img)
142
+ return f"Extracted text from image:\n\n{text}"
143
+
144
+ class TablePreviewTool(Tool):
145
+ name = "TablePreviewTool"
146
+ description = """
147
+ This tool previews a CSV or Excel spreadsheet file.
148
+ It returns the shape (rows, columns), column names, the first few rows of data and some description of the database.
149
+ Useful for understanding the structure of tabular data before processing it.
150
+ To call the tool on code just use TablePreviewTool(path)"""
151
+
152
+ inputs = {
153
+ "path": {
154
+ "type": "string",
155
+ "description": "The path to a .csv or .xlsx file"
156
+ }
157
+ }
158
+ output_type = "string"
159
+
160
+ def forward(self, path: str) -> str:
161
+ ext = path.rsplit('.', 1)[-1].lower()
162
+ df = pd.read_csv(path) if ext == 'csv' else pd.read_excel(path)
163
+ return f"""Shape: {df.shape}\n Columns: {list(df.columns)}\n\n
164
+ Head: {df.head().to_markdown()}\n\n Description of dataset: {str(df.describe())}"""
165
+
166
+ class YouTubeInfoTool(Tool):
167
+ name = "YouTubeInfoTool"
168
+ description = """
169
+ This tool fetches metadata and English captions from a given YouTube video.
170
+ It returns the video's title, description, and the English subtitles if available.
171
+ To call the tool on code just use YouTubeInfoTool(url)"""
172
+
173
+ inputs = {
174
+ "url": {
175
+ "type": "string",
176
+ "description": "The full URL to a YouTube video"
177
+ }
178
+ }
179
+ output_type = "string"
180
+
181
+ def forward(self, url: str) -> str:
182
+ ydl_opts = {
183
+ "skip_download": True,
184
+ "quiet": True,
185
+ "writesubtitles": True,
186
+ "writeautomaticsub": True,
187
+ }
188
+ with YoutubeDL(ydl_opts) as ydl:
189
+ info = ydl.extract_info(url, download=False)
190
+
191
+ title = info.get("title", "")
192
+ if title == None:
193
+ title = "None"
194
+ desc = info.get("description", "")
195
+ if desc == None:
196
+ desc = "None"
197
+
198
+ # try manual subtitles first, then auto-generated
199
+ subs = info.get("subtitles", {}) or info.get("automatic_captions", {})
200
+ en_caps = subs.get("en") or subs.get("en-US") or []
201
+ if en_caps:
202
+ cap_url = en_caps[0]["url"]
203
+ captions = requests.get(cap_url).text
204
+ else:
205
+ captions = "No English captions available."
206
+
207
+ text = f"Title: {title}\n\nDescription:\n{desc}\n\nCaptions:\n{captions}"
208
+ return f"The Youtube video title, description and captions are respectivelly: {text}"
209
+
210
+
211
+ class WikiTool(Tool):
212
+ name = "WikiTool"
213
+ description = """
214
+ This tool searches Wikipedia for a given query and returns a concise summary.
215
+ It takes a search term (string) as input and returns the first few sentences
216
+ of the corresponding Wikipedia article (or a notice if multiple or no pages are found).
217
+ To call the tool in code, use: WikiTool(query)
218
+ """
219
+ inputs = {
220
+ "query": {
221
+ "type": "string",
222
+ "description": "The search term for Wikipedia (e.g., 'Python programming language')."
223
+ }
224
+ }
225
+ output_type = "string"
226
+
227
+ def setup(self):
228
+ # Set language or any expensive init once
229
+ wikipedia.set_lang("en")
230
+
231
+ def forward(self, query: str) -> str:
232
+ # Search for matching pages
233
+ results = wikipedia.search(query, results=5)
234
+ if not results:
235
+ return f"No Wikipedia pages found for '{query}'."
236
+ # If multiple results, pick the top one
237
+ page_title = results[0]
238
+ try:
239
+ # Get the summary (first 3 sentences)
240
+ summary = wikipedia.summary(page_title, auto_suggest=False)
241
+ return f"Wikipedia summary for '{page_title}':\n\n{summary}"
242
+ except wikipedia.DisambiguationError as e:
243
+ options = ", ".join(e.options[:5])
244
+ return (
245
+ f"Your query '{query}' is ambiguous. "
246
+ f"Here are some options: {options}"
247
+ )
248
+ except Exception as e:
249
+ return f"Error retrieving Wikipedia summary for '{page_title}': {e}"
250
+
251
+ class TextFileReaderTool(Tool):
252
+ name = "TextFileReaderTool"
253
+ description = """
254
+ This tool reads the full contents of a local text-based file (e.g., .txt, .py, .md).
255
+ It takes a file path as input and returns the entire file as a single string.
256
+ To call the tool in code, use: TextFileReaderTool(path)
257
+ """
258
+ inputs = {
259
+ "path": {
260
+ "type": "string",
261
+ "description": "The path to a local text based file (.txt, .py, .md, etc.), example: ./tmp/f918266a-b3e0-4914-865d-4faa564f1aef.py"
262
+ }
263
+ }
264
+ output_type = "string"
265
+
266
+ def forward(self, path: str) -> str:
267
+ try:
268
+ with open(path, 'r', encoding='utf-8') as f:
269
+ content = f.read()
270
+ return f"Contents of '{path}':\n\n{content}"
271
+ except FileNotFoundError:
272
+ return f"Error: File not found at '{path}'."
273
+ except Exception as e:
274
+ return f"Error reading '{path}': {e}"
275
+
276
+ # ——————————————————————————————————————————————————————————
277
+ # 3) Built-in smolagents tools
278
+ # ——————————————————————————————————————————————————————————
279
+ search_tool = DuckDuckGoSearchTool()
280
+ python_repl = PythonInterpreterTool()
281
+
282
+ # ——————————————————————————————————————————————————————————
283
+ # 4) GaiaAgent class with file-preloading
284
+ # ——————————————————————————————————————————————————————————
285
+ class GAIAAgent:
286
+ def __init__(self, model_name: str = None):
287
+ """
288
+ Initialize the GAIA inference agent with your system prompt.
289
+ Args:
290
+ model_name: optional HF model identifier
291
+ """
292
+ self.system_prompt = GAIA_SYSTEM_PROMPT
293
+ self.model = model
294
+ self.agent = CodeAgent(
295
+ model=self.model,
296
+ tools=[
297
+ TextFileReaderTool(),
298
+ WikiTool(),
299
+ DuckDuckGoSearchTool(),
300
+ PythonInterpreterTool(),
301
+ TranscriptionTool(),
302
+ OCRTool(),
303
+ TablePreviewTool(),
304
+ YouTubeInfoTool(),
305
+ ],
306
+ max_steps=10,
307
+ verbosity_level=2,
308
+ add_base_tools=True,
309
+ additional_authorized_imports = ["numpy", "pandas", "wikipedia"]
310
+ )
311
+
312
+ def __call__(self, question: str,task_id: str = None, file_name: str = None) -> str:
313
+ """
314
+ Run the agent on `question`. If `task_id` and `file_name` are set,
315
+ download the file into ./tmp/ via fetch_file, then prefix:
316
+ "FILE: ./tmp/{file_name}\n\n{question}"
317
+ Returns only what's after 'FINAL ANSWER:'.
318
+ """
319
+ prompt = question
320
+ if task_id and file_name:
321
+ local_path = fetch_file(f"{task_id},{file_name}")
322
+ prompt = f"AXULIARY FILE FOR QUESTION: {local_path}\n\n{question}"
323
+
324
+ # Add system prompt before passing to model
325
+ full_prompt = f"{self.system_prompt}\n\nQuestion: {prompt}"
326
+
327
+ full_resp = self.agent.run(prompt)
328
+ if type(full_resp) != str:
329
+ full_resp = str(full_resp)
330
+ if "FINAL ANSWER:" in full_resp:
331
+ return full_resp.split("FINAL ANSWER:")[-1].strip()
332
+ if "**Answer**" in full_resp:
333
+ return full_resp.split("**Answer**:")[-1].strip()
334
+ if "**Answer:**" in full_resp:
335
+ return full_resp.split("**Answer:**")[-1].strip()
336
+ return full_resp
agent_langchain.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import logging
4
+ import requests
5
+ import pytesseract
6
+ import pandas as pd
7
+ from PIL import Image
8
+ from io import BytesIO
9
+ import soundfile as sf
10
+ from langchain import hub
11
+ from pytube import YouTube
12
+ from transformers import (
13
+ AutoModelForCausalLM,
14
+ AutoTokenizer,
15
+ BitsAndBytesConfig,
16
+ pipeline,
17
+ )
18
+ from duckduckgo_search import DDGS
19
+ from whisper import load_model as load_whisper
20
+ from langchain_huggingface import HuggingFacePipeline
21
+ from langchain.memory import ConversationBufferMemory
22
+ from langchain_experimental.utilities import PythonREPL
23
+ from langchain.agents import initialize_agent, Tool, AgentType, AgentExecutor, create_react_agent
24
+
25
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
26
+ AUDIO_FILES = ["wav", "mp3", "aac", "ogg"]
27
+ IMAGE_FILES = ["png", "jpg", "tiff", "jpeg", "bmp"]
28
+ TABULAR_FILES = ["csv", "xlsx"]
29
+
30
+
31
+ logging.basicConfig(level=logging.INFO)
32
+ logger = logging.getLogger(__name__)
33
+
34
+ GAIA_SYSTEM_PROMPT = (
35
+ "You are a general AI assistant. I will ask you a question. Report your thoughts, "
36
+ "and finish your answer with the following template: "
37
+ "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible "
38
+ "OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write "
39
+ "your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, "
40
+ "don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. "
41
+ "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
42
+ )
43
+
44
+
45
+
46
+ def file_handler(task_id: str, file_name: str):
47
+ try:
48
+ response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
49
+ response.raise_for_status()
50
+ data = response.content
51
+ ext = file_name.split('.')[-1].lower()
52
+ return data, ext
53
+ except Exception as e:
54
+ logger.error(f"Failed to fetch file: {e}")
55
+ raise
56
+
57
+ whisper_model = load_whisper("small")
58
+
59
+
60
+ model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
61
+ bnb_config = BitsAndBytesConfig(load_in_8bit=True)
62
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
63
+ model = AutoModelForCausalLM.from_pretrained(
64
+ model_name,
65
+ quantization_config=bnb_config,
66
+ device_map="auto",
67
+ #use_cache=True,
68
+ )
69
+ torch.backends.cuda.matmul.allow_tf32 = True
70
+
71
+ try:
72
+ model.enable_xformers_memory_efficient_attention()
73
+ except Exception as e:
74
+ logger.warning(f"Failed to enable xformers memory optimization: {e}")
75
+
76
+ pipe = pipeline(
77
+ "text-generation",
78
+ model=model,
79
+ tokenizer=tokenizer,
80
+ temperature=0.05,
81
+ device_map="auto"
82
+ )
83
+ llm = HuggingFacePipeline(pipeline=pipe)
84
+
85
+ def fetch_file(args: str) -> str:
86
+ try:
87
+ task_id, file_name = [x.strip() for x in args.split(',')]
88
+ data, ext = file_handler(task_id, file_name)
89
+ local_path = f"./tmp/{task_id}.{ext}"
90
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
91
+ with open(local_path, 'wb') as f:
92
+ f.write(data)
93
+ logger.info(f"File fetched and saved at {local_path}")
94
+ return local_path
95
+ except Exception as e:
96
+ logger.error(f"fetch_file failed: {e}")
97
+ raise
98
+
99
+ def transcribe(path: str) -> str:
100
+ try:
101
+ data, sr = sf.read(path, dtype='float32')
102
+ res = whisper_model.transcribe(data, language='en')
103
+ return res['text']
104
+ except Exception as e:
105
+ logger.error(f"transcribe failed: {e}")
106
+ raise
107
+
108
+ def ocr(path: str) -> str:
109
+ try:
110
+ img = Image.open(path)
111
+ return pytesseract.image_to_string(img)
112
+ except Exception as e:
113
+ logger.error(f"ocr failed: {e}")
114
+ raise
115
+
116
+ def preview_table(path: str) -> str:
117
+ try:
118
+ ext = path.split('.')[-1]
119
+ df = pd.read_csv(path) if ext == 'csv' else pd.read_excel(path)
120
+ info = f"Table Shape: {df.shape}\nColumns: {list(df.columns)}\nHead:\n{df.head().to_markdown()}"
121
+ return info
122
+ except Exception as e:
123
+ logger.error(f"preview_table failed: {e}")
124
+ raise
125
+
126
+ def youtube_info(url: str) -> str:
127
+ try:
128
+ yt = YouTube(url)
129
+ output = f"title: {yt.title}\n\ndescription: {yt.description}\n\n"
130
+ if 'en' in yt.captions:
131
+ output += yt.captions['en'].generate_srt_captions()
132
+ return output
133
+ except Exception as e:
134
+ logger.error(f"youtube_info failed: {e}")
135
+ raise
136
+
137
+ def web_search(query: str) -> str:
138
+ results = []
139
+ with DDGS() as ddgs:
140
+ for r in ddgs.text(query, max_results=5):
141
+ results.append(f"{r['title']} — {r['href']}")
142
+ return '\n'.join(results)
143
+
144
+ def read_code_from_file(file_path: str) -> str:
145
+ """Reads Python code from a file."""
146
+ try:
147
+ with open(file_path, 'r') as file:
148
+ code = file.read()
149
+ return code
150
+ except FileNotFoundError:
151
+ return "Error: File not found."
152
+ except Exception as e:
153
+ return f"Error reading file: {e}"
154
+
155
+ def execute_python_from_file(file_path: str) -> str:
156
+ """Reads and executes Python code from a specified file."""
157
+ code = read_code_from_file(file_path)
158
+ if code.startswith("Error"):
159
+ return code
160
+ try:
161
+ output = python_repl.run(code)
162
+ return output
163
+ except Exception as e:
164
+ return f"Error executing code: {e}"
165
+
166
+ # --- Define toolset ---
167
+ tools = [
168
+ Tool(name='fetch_file', func=fetch_file, description='Download file by task_id,file_name'),
169
+ Tool(name='transcribe', func=transcribe, description='Transcribe a downloaded audio file'),
170
+ Tool(name='ocr', func=ocr, description='Extract text from a downloaded image'),
171
+ Tool(name='preview_table', func=preview_table, description='Show summary and first rows of a CSV/XLSX'),
172
+ Tool(name='youtube_info', func=youtube_info, description='Get info & transcript from a YouTube URL'),
173
+ Tool(name='web_search', func=web_search, description='Return top 5 search results for a query'),
174
+ Tool(name="Execute Python File",func=execute_python_from_file,description="Executes Python code from a specified file path. Input should be the full path to the Python file.",)
175
+ ]
176
+
177
+ # --- Create agent using ReAct agent style ---
178
+
179
+ base_prompt = hub.pull("langchain-ai/react-agent-template")
180
+ tool_names = ", ".join([t.name for t in tools])
181
+
182
+ agent = create_react_agent(llm, tools, base_prompt)
183
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
184
+
185
+
186
+ agent_executor = AgentExecutor(
187
+ agent=agent,
188
+ tools=tools,
189
+ memory=memory,
190
+ verbose=True,
191
+ max_iterations=5,
192
+ verbose=True,
193
+ handle_parsing_errors=True,
194
+ return_only_outputs=True
195
+ )
196
+
197
+ # --- 4) GAIAAgent class returning only the FINAL ANSWER ---
198
+ class GAIAAgent:
199
+ def __init__(self):
200
+ self.agent = self.executor = agent_executor
201
+
202
+ def __call__(self, question: str, task_id: str = None, file_name: str = None) -> str:
203
+ prompt=""
204
+ if task_id and file_name:
205
+ prompt += f"FILE: {task_id},{file_name}\n"
206
+ prompt += question
207
+
208
+ # Use executor to get full dict response
209
+ response = self.executor.invoke({"input": prompt, "instructions": GAIA_SYSTEM_PROMPT})
210
+ print("prompt : ", prompt)
211
+ output = response.get("output") if isinstance(response, dict) else str(response)
212
+
213
+ if output and 'FINAL ANSWER:' in output:
214
+ return output.split('FINAL ANSWER:')[-1].strip()
215
+ return output or ""
216
+
217
+ agent = GAIAAgent()
218
+ agent("Hello how are u?", "1", None)
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import os
2
- import gradio as gr
3
- import requests
4
  import inspect
 
5
  import pandas as pd
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
@@ -10,14 +11,6 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
@@ -40,7 +33,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
@@ -76,11 +69,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
 
1
  import os
 
 
2
  import inspect
3
+ import requests
4
  import pandas as pd
5
+ import gradio as gr
6
+ from agent import GAIAAgent
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
 
11
 
12
  # --- Basic Agent Definition ---
13
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
 
 
 
 
 
 
 
14
 
15
  def run_and_submit_all( profile: gr.OAuthProfile | None):
16
  """
 
33
 
34
  # 1. Instantiate Agent ( modify this part to create your agent)
35
  try:
36
+ agent = GAIAAgent()
37
  except Exception as e:
38
  print(f"Error instantiating agent: {e}")
39
  return f"Error initializing agent: {e}", None
 
69
  for item in questions_data:
70
  task_id = item.get("task_id")
71
  question_text = item.get("question")
72
+ question_file = item.get("file_name")
73
  if not task_id or question_text is None:
74
  print(f"Skipping item with missing task_id or question: {item}")
75
  continue
76
  try:
77
+ submitted_answer = agent(question_text, task_id, question_file)
78
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
79
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
80
  except Exception as e:
auxiliary_fns.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import subprocess
4
+ import pandas as pd
5
+ from PIL import Image
6
+ from io import BytesIO
7
+ import soundfile as sf
8
+
9
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
+ IMAGE_FILES = ["png", "jpg", "tiff", "jpeg", "bmp"]
11
+ AUDIO_FILES = ["wav", "mp3", "aac", "ogg"]
12
+ TABULAR_FILES = ["csv", "xlsx"]
13
+
14
+ def read_audio_file(audio_bytes, file_extension):
15
+ """
16
+ Reads audio data from in-memory bytes.
17
+
18
+ Args:
19
+ audio_bytes (bytes): The audio data as bytes.
20
+ file_extension (str): The extension of the audio file (e.g., 'wav', 'mp3').
21
+ """
22
+ try:
23
+ audio_buffer = BytesIO(audio_bytes)
24
+ format_string = file_extension.lower()
25
+ data, samplerate = sf.read(audio_buffer, format=format_string)
26
+
27
+ return (data, samplerate)
28
+
29
+ except sf.LibsndfileError:
30
+ print(f"Error: Could not read the audio data from memory with the specified format: {file_extension}")
31
+ except Exception as e:
32
+ print(f"An unexpected error occurred: {e}")
33
+
34
+
35
+ def read_tabular_data(file_bytes, file_extension):
36
+ file_bytes.seek(0)
37
+ if file_extension == "csv":
38
+ return (pd.read_csv(file_bytes))
39
+ elif file_extension == "xlsx":
40
+ return (pd.read_excel(file_bytes))
41
+
42
+
43
+ def read_image_data(file_bytes, file_extension):
44
+ return Image.open(file_bytes)
45
+
46
+
47
+ def write_and_execute_file(text):
48
+ with open(f"file_to_execute.{file_extension}", "wb") as f:
49
+ f.write(text)
50
+ result = subprocess.run(['python', 'file_to_execute.py'], capture_output=True, text=True, check=True)
51
+ return result.stdout
52
+
53
+
54
+ def file_handler(task_id, file_name):
55
+ response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
56
+ response.raise_for_status()
57
+ data = response.content
58
+
59
+ ext = file_name.split(".")[-1]
60
+ if ext in AUDIO_FILES:
61
+ file_data = read_audio_file(data, ext)
62
+ elif ext in TABULAR_FILES:
63
+ file_data = read_tabular_file(data, ext)
64
+ elif ext in IMAGE_FILES:
65
+ file_data = read_image_file(data, ext)
66
+ elif ext == "py":
67
+ file_data = (data, ext)
68
+
69
+ return file_data, ext