mrpe24 commited on
Commit
d75dae7
·
1 Parent(s): 7fa2f25

implemented agent with tools

Browse files
agentcourse_unit4/api/agent_eval_api.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ import requests
4
+
5
+ from agentcourse_unit4.api.answer_data import AnswerData
6
+ from agentcourse_unit4.api.question_response import to_question_response, QuestionResponse
7
+ from agentcourse_unit4.api.submit_answers_response import to_submit_answers_response, SubmitAnswersResponse
8
+
9
+
10
+ class AgentEvalApi:
11
+ def __init__(self):
12
+ self.api_url = "https://agents-course-unit4-scoring.hf.space"
13
+ self.questions_url = f"{self.api_url}/questions"
14
+ self.submit_url = f"{self.api_url}/submit"
15
+ self.files_url = f"{self.api_url}/files"
16
+
17
+ def get_questions(self) -> List[QuestionResponse]:
18
+ """Get complete list of filtered questions with all associated data"""
19
+ response = requests.get(self.questions_url, timeout=15)
20
+ response.raise_for_status()
21
+ return response.json(object_hook=to_question_response) | []
22
+
23
+ def download_file(self, task_id: str) -> bytes:
24
+ """Download the file associated with the given task_id"""
25
+ response = requests.get(f"{self.files_url}/{task_id}", timeout=30)
26
+ response.raise_for_status()
27
+ return response.content
28
+
29
+ def submit_answers(self, username: str, agent_code: str, answers: List[AnswerData]) -> SubmitAnswersResponse:
30
+ """Submit answers from an agent, calculate score, and update leaderboard on HF"""
31
+ request_body = {
32
+ "username": username.strip(),
33
+ "agent_code": agent_code.strip(),
34
+ "answers": [{"task_id": a.task_id.strip(), "submitted_answer": a.answer.strip()} for a in answers]
35
+ }
36
+ response = requests.post(self.submit_url, json=request_body, timeout=60)
37
+ response.raise_for_status()
38
+ return response.json(object_hook=to_submit_answers_response)
agentcourse_unit4/api/answer_data.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ class AnswerData:
2
+ def __init__(self, task_id: str, answer: str):
3
+ self.task_id = task_id
4
+ self.answer = answer
agentcourse_unit4/api/question_response.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class QuestionResponse:
2
+ def __init__(self, task_id: str, question: str, level: str, file_name: str):
3
+ self.task_id = task_id
4
+ self.question = question
5
+ self.level = level
6
+ self.file_name = file_name
7
+
8
+
9
+ def to_question_response(json_data) -> QuestionResponse:
10
+ return QuestionResponse(
11
+ task_id=json_data['task_id'],
12
+ question=json_data['question'],
13
+ level=json_data['Level'],
14
+ file_name=json_data['file_name']
15
+ )
agentcourse_unit4/api/submit_answers_response.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class SubmitAnswersResponse:
2
+ def __init__(self, username: str, score: int, correct_count: int, total_attempted: int, message: str, timestamp: str):
3
+ self.username = username
4
+ self.score = score
5
+ self.correct_count = correct_count
6
+ self.total_attempted = total_attempted
7
+ self.message = message
8
+ self.timestamp = timestamp
9
+
10
+ def to_submit_answers_response(json_data) -> SubmitAnswersResponse:
11
+ return SubmitAnswersResponse(
12
+ username=json_data['username'] | '<unknown>',
13
+ score=json_data['score'] | 0,
14
+ correct_count=json_data['correct_count'] | 0,
15
+ total_attempted=json_data['total_attempted'] | 0,
16
+ message=json_data['message'] | 'No message received.',
17
+ timestamp=json_data['timestamp'] | '<unknown>'
18
+ )
agentcourse_unit4/basic_agent.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Optional
3
+
4
+ from smolagents import LiteLLMModel, CodeAgent, WikipediaSearchTool
5
+
6
+ from agentcourse_unit4.tools.audio_transcriber import AudioTranscriberTool
7
+ from agentcourse_unit4.tools.chess_board_recognizer import ChessBoardRecognizerTool
8
+ from agentcourse_unit4.tools.chess_predictor import ChessPredictorTool
9
+ from agentcourse_unit4.tools.csv_reader import CsvReaderTool
10
+ from agentcourse_unit4.tools.excel_reader import ExcelReaderTool
11
+ from agentcourse_unit4.tools.file_downloader import FileDownloaderTool
12
+ from agentcourse_unit4.tools.image_describer import ImageDescriberTool
13
+ from agentcourse_unit4.tools.image_text_extractor import ImageTextExtractorTool
14
+ from agentcourse_unit4.tools.pdf_reader import PdfReaderTool
15
+ from agentcourse_unit4.tools.py_code_interpreter import PyCodeInterpreterTool
16
+ from agentcourse_unit4.tools.youtube_transcriber import YoutubeTranscriberTool
17
+
18
+ class BasicAgent:
19
+ def __init__(self):
20
+ self.model = LiteLLMModel(
21
+ # model_id="gemini/gemini-2.0-flash",
22
+ model_id="gemini/gemini-2.5-flash-preview-04-17",
23
+ api_key=os.getenv("GOOGLE_API_KEY"),
24
+ max_tokens=8196,
25
+ temperature=0.5
26
+ )
27
+ self.basic_agent = CodeAgent(
28
+ name="basic_agent",
29
+ description="Basic code agent",
30
+ tools=[
31
+ WikipediaSearchTool(),
32
+ FileDownloaderTool(),
33
+ ExcelReaderTool(),
34
+ CsvReaderTool(),
35
+ PdfReaderTool(),
36
+ PyCodeInterpreterTool(),
37
+ YoutubeTranscriberTool(),
38
+ AudioTranscriberTool(),
39
+ ChessBoardRecognizerTool(),
40
+ ChessPredictorTool(),
41
+ ImageDescriberTool(),
42
+ ImageTextExtractorTool()
43
+ ],
44
+ model=self.model,
45
+ add_base_tools=True,
46
+ additional_authorized_imports=["pandas", "numpy", "datetime", "json", "csv"],
47
+ planning_interval=4,
48
+ verbosity_level=1,
49
+ max_steps=5,
50
+ )
51
+ print("==> Agent initialized.")
52
+
53
+ def run(self, question: str, file_path: Optional[str] = None) -> str:
54
+ """
55
+ Process the incoming question and then return the answer.
56
+
57
+ Args:
58
+ question: The question or task
59
+ file_path: Optional path to a file associated with the question or task
60
+
61
+ Returns:
62
+ The final answer to the question
63
+ """
64
+ print(f"==> Agent received question: {question}")
65
+
66
+ prompt = f"""
67
+ Question:
68
+
69
+ {question}
70
+
71
+ Associated file at path:
72
+
73
+ {file_path}
74
+
75
+ It is required to read file content by proper tool, if associated file is provided.
76
+ Remember that you answer to the question from GAIA benchmark. It requires short, exact and precise answer.
77
+ Don't include: thinking, explanations, steps, reasoning, intermediate or additional text.
78
+
79
+ Finish your answer with a number OR as few words as possible OR a comma separated list of numbers and/or strings.
80
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
81
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
82
+ If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
83
+
84
+ For instance, if question is "What is the capital of Spain?", respond with "Madrid".
85
+ It is exact and expected answer.
86
+ """
87
+ answer = self.basic_agent.run(prompt)
88
+
89
+ print(f"==> Agent answer: {answer}")
90
+
91
+ return answer
agentcourse_unit4/data/segmentation.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:912bbbde63f435106d57c7416c11a49eb3e9cb93dfe71cb6f9bfaafc1a4e3683
3
+ size 6781485
agentcourse_unit4/data/standard.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c19a7f75312af21e9e514f008a05da5ff5624590cc5a8997c977a16d2ac459
3
+ size 114375506
agentcourse_unit4/tools/audio_transcriber.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ import whisper
3
+
4
+ class AudioTranscriberTool(Tool):
5
+ name = "audio_transcriber"
6
+ description = ("""
7
+ This is a tool that get the transcription of the audio file in the form of text.
8
+ Supported file extensions: .mp3, .wav, .flac, .ogg, .m4a.
9
+ """)
10
+ inputs = {
11
+ "file_path": {
12
+ "type": "string",
13
+ "description": "The file path to the audio file.",
14
+ }
15
+ }
16
+ output_type = "string"
17
+
18
+ def __init__(self, *args, **kwargs):
19
+ super().__init__(*args, **kwargs)
20
+ self.model = whisper.load_model("base")
21
+
22
+ def forward(self, file_path: str) -> str:
23
+ result = self.model.transcribe(file_path)
24
+ return result['text']
agentcourse_unit4/tools/chess_board_recognizer.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ from typing import Optional
3
+
4
+ import cv2
5
+ import numpy as np
6
+ from smolagents import Tool
7
+ from ultralytics import YOLO
8
+
9
+ FEN_MAPPING = {
10
+ "black-pawn": "p", "black-rook": "r", "black-knight": "n", "black-bishop": "b", "black-queen": "q",
11
+ "black-king": "k",
12
+ "white-pawn": "P", "white-rook": "R", "white-knight": "N", "white-bishop": "B", "white-queen": "Q",
13
+ "white-king": "K"
14
+ }
15
+ GRID_BORDER = 10 # Border size in pixels
16
+ GRID_SIZE = 204 # Effective grid size (10px to 214px)
17
+ BLOCK_SIZE = GRID_SIZE // 8 # Each block is ~25px
18
+ X_LABELS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] # Labels for x-axis (a to h)
19
+ Y_LABELS = [8, 7, 6, 5, 4, 3, 2, 1] # Reversed labels for y-axis (8 to 1)
20
+
21
+
22
+ class ChessBoardRecognizerTool(Tool):
23
+ name = "chess_board_recognizer"
24
+ description = "Recognizes the state of chess board from image and returns the position representation in Forsyth-Edwards notation (FEN)"
25
+ inputs = {
26
+ "image_path": {
27
+ "type": "string",
28
+ "description": "The path of the chess board image file"
29
+ },
30
+ "is_white_turn": {
31
+ "type": "boolean",
32
+ "description": "Optionally white's turn on the chess board if value not provided",
33
+ "nullable": True
34
+
35
+ }
36
+ }
37
+ output_type = "string"
38
+
39
+ def __init__(self):
40
+ super().__init__()
41
+ self.model_std = YOLO("../data/standard.pt")
42
+ self.model_seg = YOLO("../data/segmentation.pt")
43
+
44
+ def forward(self, image_path: str, is_white_turn: Optional[bool] = None) -> str:
45
+ processed_image = self._process_image(image_path)
46
+
47
+ if processed_image is not None:
48
+ processed_image = cv2.resize(processed_image, (224, 224))
49
+ height, width, _ = processed_image.shape
50
+
51
+ results = self.model_std.predict(source=processed_image, save=False, save_txt=False, conf=0.6)
52
+
53
+ # Initialize the board for FEN (empty rows represented by "8")
54
+ board = [["8"] * 8 for _ in range(8)]
55
+
56
+ # Extract predictions and map to FEN board
57
+ for result in results[0].boxes:
58
+ x1, y1, x2, y2 = result.xyxy[0].tolist()
59
+ class_id = int(result.cls[0])
60
+ class_name = self.model_std.names[class_id]
61
+
62
+ # Convert class_name to FEN notation
63
+ fen_piece = FEN_MAPPING.get(class_name, None)
64
+ if not fen_piece:
65
+ continue
66
+
67
+ # Calculate the center of the bounding box
68
+ center_x = (x1 + x2) / 2
69
+ center_y = (y1 + y2) / 2
70
+
71
+ # Convert to integer pixel coordinates
72
+ pixel_x = int(center_x)
73
+ pixel_y = int(height - center_y) # Flip Y-axis for generic coordinate system
74
+
75
+ # Get grid coordinate
76
+ grid_position = self._get_grid_coordinate(pixel_x, pixel_y)
77
+
78
+ if grid_position != "Pixel outside grid bounds":
79
+ file = ord(grid_position[0]) - ord('a') # Column index (0-7)
80
+ rank = int(grid_position[1]) - 1 # Row index (0-7)
81
+
82
+ # Place the piece on the board
83
+ board[7 - rank][file] = fen_piece # Flip rank index for FEN
84
+
85
+ # Generate the FEN string
86
+ fen_rows = []
87
+ for row in board:
88
+ fen_row = ""
89
+ empty_count = 0
90
+ for cell in row:
91
+ if cell == "8":
92
+ empty_count += 1
93
+ else:
94
+ if empty_count > 0:
95
+ fen_row += str(empty_count)
96
+ empty_count = 0
97
+ fen_row += cell
98
+ if empty_count > 0:
99
+ fen_row += str(empty_count)
100
+ fen_rows.append(fen_row)
101
+
102
+ fen_str = "/".join(fen_rows)
103
+ b_or_w_turn = "w" if is_white_turn is None else "b"
104
+
105
+ return f"{fen_str} {b_or_w_turn} - - 0 1"
106
+
107
+ def _get_grid_coordinate(self, pixel_x, pixel_y):
108
+ """
109
+ Function to determine the grid coordinate of a pixel, considering a 10px border and
110
+ the grid where bottom-left is (a, 1) and top-left is (h, 8).
111
+ """
112
+ # Grid settings
113
+ border = 10 # 10px border
114
+ grid_size = 204 # Effective grid size (10px to 214px)
115
+ block_size = grid_size // 8 # Each block is ~25px
116
+
117
+ x_labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] # Labels for x-axis (a to h)
118
+ y_labels = [8, 7, 6, 5, 4, 3, 2, 1] # Reversed labels for y-axis (8 to 1)
119
+
120
+ # Adjust pixel_x and pixel_y by subtracting the border (grid starts at pixel 10)
121
+ adjusted_x = pixel_x - border
122
+ adjusted_y = pixel_y - border
123
+
124
+ # Check bounds
125
+ if adjusted_x < 0 or adjusted_y < 0 or adjusted_x >= grid_size or adjusted_y >= grid_size:
126
+ return "Pixel outside grid bounds"
127
+
128
+ # Determine the grid column and row
129
+ x_index = adjusted_x // block_size
130
+ y_index = adjusted_y // block_size
131
+
132
+ if x_index < 0 or x_index >= len(x_labels) or y_index < 0 or y_index >= len(y_labels):
133
+ return "Pixel outside grid bounds"
134
+
135
+ # Convert indices to grid coordinates
136
+ x_index = adjusted_x // block_size # Determine the column index (0-7)
137
+ y_index = adjusted_y // block_size # Determine the row index (0-7)
138
+
139
+ # Convert row index to the correct label, with '8' at the bottom
140
+ y_labeld = y_labels[y_index] # Correct index directly maps to '8' to '1'
141
+ x_label = x_labels[x_index]
142
+ y_label = 8 - y_labeld + 1
143
+
144
+ return f"{x_label}{y_label}"
145
+
146
+ def _process_image(self, image_path):
147
+ results = self.model_seg.predict(
148
+ source=image_path,
149
+ conf=0.8 # Confidence threshold
150
+ )
151
+
152
+ segmentation_mask = None
153
+ bbox = None
154
+
155
+ for result in results:
156
+ if result.boxes.conf[0] >= 0.8: # Filter results by confidence
157
+ segmentation_mask = result.masks.data.cpu().numpy().astype(np.uint8)[0]
158
+ bbox = result.boxes.xyxy[0].cpu().numpy() # Get the bounding box coordinates
159
+ break
160
+
161
+ if segmentation_mask is None:
162
+ print("No segmentation mask with confidence above 0.8 found.")
163
+ return None
164
+
165
+ image = cv2.imread(image_path)
166
+
167
+ # Resize segmentation mask to match the input image dimensions
168
+ segmentation_mask_resized = cv2.resize(segmentation_mask, (image.shape[1], image.shape[0]))
169
+
170
+ if bbox is None:
171
+ print("No bounding box coordinates found. Skip cropping the image")
172
+ return None
173
+
174
+ x1, y1, x2, y2 = bbox
175
+ cropped_segment = image[int(y1):int(y2), int(x1):int(x2)]
176
+
177
+ cropped_image_path = tempfile.NamedTemporaryFile(suffix=".jpg").name
178
+ cv2.imwrite(cropped_image_path, cropped_segment)
179
+
180
+ print(f"Cropped segmented image saved to {cropped_image_path}")
181
+
182
+ # Return the cropped image
183
+ return cropped_segment
agentcourse_unit4/tools/chess_predictor.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from stockfish import Stockfish
3
+ import shutil
4
+
5
+ class ChessPredictorTool(Tool):
6
+ name = "chess_predictor"
7
+ description = "Analyzes a chess state (FEN) and predicts the best move."
8
+ inputs = {
9
+ "fen": {
10
+ "type": "string",
11
+ "description": "FEN (Forsyth-Edwards notation) value",
12
+ }
13
+ }
14
+ output_type = "string"
15
+
16
+ def __init__(self):
17
+ super().__init__()
18
+
19
+ stockfish_path = shutil.which("stockfish") or "/usr/games/stockfish"
20
+
21
+ if not stockfish_path:
22
+ raise RuntimeError(f"Can't find stockfish on PATH or at /usr/games/stockfish.")
23
+
24
+ print(f"Stockfish path: {stockfish_path}")
25
+
26
+ self.stockfish = Stockfish(
27
+ path=stockfish_path,
28
+ depth=10,
29
+ parameters={"Threads": 2, "Minimum Thinking Time": 30}
30
+ )
31
+
32
+ def forward(self, fen: str) -> str:
33
+ if not self.stockfish.is_fen_valid(fen):
34
+ return f"Error: invalid FEN notation: {fen}"
35
+
36
+ self.stockfish.set_fen_position(fen)
37
+
38
+ print(f"Current chessboard:\n {self.stockfish.get_board_visual()}")
39
+
40
+ best_move = self.stockfish.get_best_move()
41
+ return f"{best_move}" if best_move else "No valid move found on chessboard."
42
+
43
+
agentcourse_unit4/tools/csv_reader.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from smolagents import Tool
3
+
4
+ class CsvReaderTool(Tool):
5
+ name = "csv_reader"
6
+ description = "Extract CSV file content. Supported file extensions: .csv"
7
+ inputs = {
8
+ "file_path": {
9
+ "type": "string",
10
+ "description": "Path to the CSV file",
11
+ }
12
+ }
13
+ output_type = "string"
14
+
15
+ def forward(self, file_path) -> str:
16
+ try:
17
+ df = pd.read_csv(file_path)
18
+ print(f"Describe CSV file:\n {df.describe()}")
19
+ return df.to_json()
20
+ except Exception as e:
21
+ return f"Error processing CSV file: {str(e)}"
agentcourse_unit4/tools/excel_reader.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from smolagents import Tool
3
+
4
+ class ExcelReaderTool(Tool):
5
+ name = "excel_reader"
6
+ description = "Extract Excel file content. Supported file extensions: .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, .odt"
7
+ inputs = {
8
+ "file_path": {
9
+ "type": "string",
10
+ "description": "Path to the Excel file",
11
+ }
12
+ }
13
+ output_type = "string"
14
+
15
+ def forward(self, file_path) -> str:
16
+ try:
17
+ df = pd.read_excel(file_path)
18
+ print(f"Describe Excel file:\n {df.describe()}")
19
+ return df.to_json()
20
+ except Exception as e:
21
+ print(f"Error: processing Excel file: {str(e)}")
agentcourse_unit4/tools/file_downloader.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import urllib.request
2
+ from smolagents import Tool
3
+
4
+
5
+ class FileDownloaderTool(Tool):
6
+ name = "file_downloader"
7
+ description = "Download a file from Internet by URL provided, save it into temp dir and return file path"
8
+ inputs = {
9
+ "url": {
10
+ "type": "string",
11
+ "description": "URL to download from",
12
+ }
13
+ }
14
+ output_type = "string"
15
+
16
+ def forward(self, url: str) -> str:
17
+ file_path = None
18
+
19
+ try:
20
+ result = urllib.request.urlretrieve(url)
21
+ file_path = result[0]
22
+ except Exception as e:
23
+ print(f"Error downloading file: {str(e)}")
24
+
25
+ return file_path
agentcourse_unit4/tools/image_describer.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from transformers import BlipProcessor, BlipForConditionalGeneration
3
+ from PIL import Image
4
+ import torch
5
+
6
+
7
+ class ImageDescriberTool(Tool):
8
+ name = "image_describer"
9
+ description = """
10
+ Analyzes image and provide what is represented on it.
11
+ Supported image extensions: .png, .jpg, .jpeg, .bmp, .svg.
12
+ """
13
+ inputs = {
14
+ "image_path": {
15
+ "type": "string",
16
+ "description": "The path to the image file",
17
+ }
18
+ }
19
+ output_type = "string"
20
+
21
+ def __init__(self):
22
+
23
+ super().__init__()
24
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
25
+ model_name = "Salesforce/blip-image-captioning-large"
26
+ self.processor = BlipProcessor.from_pretrained(model_name)
27
+ self.model = BlipForConditionalGeneration.from_pretrained(model_name).to(self.device)
28
+
29
+ def forward(self, image_path: str) -> str:
30
+ try:
31
+ image = Image.open(image_path).convert('RGB')
32
+ inputs = self.processor(image, return_tensors="pt").to(self.device)
33
+ out = self.model.generate(**inputs)
34
+ img_description = self.processor.decode(out[0], skip_special_tokens=True)
35
+ return img_description
36
+ except Exception as e:
37
+ return f"Error generating image description: {e}"
agentcourse_unit4/tools/image_text_extractor.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ import easyocr
3
+
4
+
5
+ class ImageTextExtractorTool(Tool):
6
+ name = "image_text_extractor"
7
+ description = """
8
+ Multilingual OCR tool to extract key information or presented text from any image.
9
+ Supported image extensions: .png, .jpg, .jpeg, .bmp, .svg.
10
+ """
11
+ inputs = {
12
+ "image_path": {
13
+ "type": "string",
14
+ "description": "The path to the image file",
15
+ }
16
+ }
17
+ output_type = "array"
18
+
19
+ def __init__(self):
20
+ super().__init__()
21
+ self.reader = easyocr.Reader(['ch_sim', 'en'])
22
+
23
+ def forward(self, image_path: str) -> list[str]:
24
+ result = self.reader.readtext(image_path, detail=False)
25
+ return result
agentcourse_unit4/tools/pdf_reader.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pdfminer.high_level import extract_text
2
+ from smolagents import Tool
3
+
4
+ class PdfReaderTool(Tool):
5
+ name = "pdf_reader"
6
+ description = "Extract PDF content. Supported file extensions: .pdf"
7
+ inputs = {
8
+ "file_path": {
9
+ "type": "string",
10
+ "description": "Path to the PDF file",
11
+ }
12
+ }
13
+ output_type = "string"
14
+
15
+ def forward(self, file_path) -> str:
16
+ try:
17
+ return extract_text(file_path)
18
+ except Exception as e:
19
+ return f"Error processing PDF file: {str(e)}"
agentcourse_unit4/tools/py_code_interpreter.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import sys
3
+ from smolagents import Tool
4
+
5
+ class PyCodeInterpreterTool(Tool):
6
+ name = "py_code_interpreter"
7
+ description = "This is a tool that evaluates python code. It can be used to perform calculations."
8
+ inputs = {
9
+ "file_path": {
10
+ "type": "string",
11
+ "description": "The python file with code to run in interpreter",
12
+ }
13
+ }
14
+ output_type = "string"
15
+
16
+ def __init__(self, *args, **kwargs):
17
+ super().__init__(*args, **kwargs)
18
+
19
+ def forward(self, file_path: str) -> str:
20
+ cmd = f"python {file_path}".split()
21
+
22
+ try:
23
+ output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, timeout=60.0)
24
+ output_text = output.decode(sys.stdout.encoding).strip()
25
+ except subprocess.TimeoutExpired:
26
+ output_text = "Error: code execution timed out."
27
+ except subprocess.CalledProcessError as e:
28
+ output_text = e.output.decode(sys.stdout.encoding).strip()
29
+ except subprocess.SubprocessError as ex:
30
+ output_text = f"Error: {str(ex)}\n"
31
+
32
+ return output_text
agentcourse_unit4/tools/youtube_transcriber.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+
3
+ from smolagents import Tool
4
+ import whisper
5
+ from pytubefix import YouTube
6
+ from pytubefix.cli import on_progress
7
+
8
+ class YoutubeTranscriberTool(Tool):
9
+ name = "youtube_transcriber"
10
+ description = "This is a tool that get the transcription of the YouTube video in the form of text."
11
+ inputs = {
12
+ "url": {
13
+ "type": "string",
14
+ "description": "The link of any youtube video to get the transcription",
15
+ }
16
+ }
17
+ output_type = "string"
18
+
19
+ def __init__(self, *args, **kwargs):
20
+ super().__init__(*args, **kwargs)
21
+ self.model = whisper.load_model("base")
22
+
23
+
24
+ def forward(self, url: str) -> str:
25
+ yt = YouTube(url, on_progress_callback=on_progress)
26
+ audio_stream = yt.streams.get_audio_only()
27
+ temp_dir = tempfile.gettempdir()
28
+ out_file = audio_stream.download(output_path=temp_dir)
29
+ result = self.model.transcribe(out_file)
30
+ return result['text']
app.py CHANGED
@@ -1,42 +1,49 @@
1
  import os
 
 
2
  import gradio as gr
3
- import requests
4
- import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # (Keep Constants as is)
8
- # --- Constants ---
9
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
-
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
- username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
35
  return "Please Login to Hugging Face with the button.", None
36
 
37
- api_url = DEFAULT_API_URL
38
- questions_url = f"{api_url}/questions"
39
- submit_url = f"{api_url}/submit"
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
@@ -49,22 +56,20 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
49
  print(agent_code)
50
 
51
  # 2. Fetch Questions
52
- print(f"Fetching questions from: {questions_url}")
53
  try:
54
- response = requests.get(questions_url, timeout=15)
55
- response.raise_for_status()
56
- questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -74,40 +79,42 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
74
  answers_payload = []
75
  print(f"Running agent on {len(questions_data)} questions...")
76
  for item in questions_data:
77
- task_id = item.get("task_id")
78
- question_text = item.get("question")
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
 
 
 
 
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
 
99
  # 5. Submit
100
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
  try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
- response.raise_for_status()
104
- result_data = response.json()
105
  final_status = (
106
  f"Submission Successful!\n"
107
- f"User: {result_data.get('username')}\n"
108
- f"Overall Score: {result_data.get('score', 'N/A')}% "
109
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
- f"Message: {result_data.get('message', 'No message received.')}"
111
  )
112
  print("Submission successful.")
113
  results_df = pd.DataFrame(results_log)
@@ -172,10 +179,10 @@ with gr.Blocks() as demo:
172
  )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
  # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
 
180
  if space_host_startup:
181
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +190,14 @@ if __name__ == "__main__":
183
  else:
184
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
 
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
  print(f"✅ SPACE_ID found: {space_id_startup}")
188
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
  else:
191
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
 
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
1
  import os
2
+ import tempfile
3
+
4
  import gradio as gr
 
 
5
  import pandas as pd
6
+ import requests
7
+
8
+ from agentcourse_unit4.api.agent_eval_api import AgentEvalApi
9
+ from agentcourse_unit4.api.answer_data import AnswerData
10
+ from agentcourse_unit4.basic_agent import BasicAgent
11
+
12
+
13
+ def save_to_file(content: str) -> str:
14
+ """
15
+ Create temporary file, save content to it and return absolute file path.
16
+
17
+ Args:
18
+ content: file content to save
19
+ Returns:
20
+ Path to file
21
+ """
22
+ temp_file = tempfile.NamedTemporaryFile(delete=False)
23
+ filepath = temp_file.name
24
+
25
+ with open(filepath, 'w') as f:
26
+ f.write(content)
27
+
28
+ return filepath
29
+
30
 
31
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  """
33
  Fetches all questions, runs the BasicAgent on them, submits all answers,
34
  and displays the results.
35
  """
36
  # --- Determine HF Space Runtime URL and Repo URL ---
37
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
38
 
39
  if profile:
40
+ username = f"{profile.username}"
41
  print(f"User logged in: {username}")
42
  else:
43
  print("User not logged in.")
44
  return "Please Login to Hugging Face with the button.", None
45
 
46
+ api_client = AgentEvalApi()
 
 
47
 
48
  # 1. Instantiate Agent ( modify this part to create your agent)
49
  try:
 
56
  print(agent_code)
57
 
58
  # 2. Fetch Questions
59
+ print(f"Fetching questions from: {api_client.questions_url}")
60
  try:
61
+ questions_data = api_client.get_questions()
 
 
62
  if not questions_data:
63
+ print("Fetched questions list is empty.")
64
+ return "Fetched questions list is empty or invalid format.", None
65
  print(f"Fetched {len(questions_data)} questions.")
66
  except requests.exceptions.RequestException as e:
67
  print(f"Error fetching questions: {e}")
68
  return f"Error fetching questions: {e}", None
69
  except requests.exceptions.JSONDecodeError as e:
70
+ print(f"Error decoding JSON response from questions endpoint: {e}")
71
+ print(f"Response text: {e.response.text[:500]}")
72
+ return f"Error decoding server response for questions: {e}", None
73
  except Exception as e:
74
  print(f"An unexpected error occurred fetching questions: {e}")
75
  return f"An unexpected error occurred fetching questions: {e}", None
 
79
  answers_payload = []
80
  print(f"Running agent on {len(questions_data)} questions...")
81
  for item in questions_data:
82
+ task_id = item.task_id
83
+ question_text = item.question
84
+ file_name = item.file_name
85
  if not task_id or question_text is None:
86
  print(f"Skipping item with missing task_id or question: {item}")
87
  continue
88
  try:
89
+ file_path = None
90
+ if len(file_name) > 0:
91
+ file_content = api_client.download_file(task_id)
92
+ file_path = save_to_file(str(file_content))
93
+ submitted_answer = agent.run(question_text, file_path)
94
+ answers_payload.append(AnswerData(task_id=task_id, answer=submitted_answer))
95
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
96
  except Exception as e:
97
+ print(f"Error running agent on task {task_id}: {e}")
98
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
99
 
100
  if not answers_payload:
101
  print("Agent did not produce any answers to submit.")
102
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
103
 
104
+ # 4. Prepare Submission
 
105
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
106
  print(status_update)
107
 
108
  # 5. Submit
109
+ print(f"Submitting {len(answers_payload)} answers to: {api_client.submit_url}")
110
  try:
111
+ result_data = api_client.submit_answers(username=username, agent_code=agent_code, answers=answers_payload)
 
 
112
  final_status = (
113
  f"Submission Successful!\n"
114
+ f"User: {result_data.username}\n"
115
+ f"Overall Score: {result_data.score}% "
116
+ f"({result_data.correct_count}/{result_data.total_attempted} correct)\n"
117
+ f"Message: {result_data.message}"
118
  )
119
  print("Submission successful.")
120
  results_df = pd.DataFrame(results_log)
 
179
  )
180
 
181
  if __name__ == "__main__":
182
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
183
  # Check for SPACE_HOST and SPACE_ID at startup for information
184
  space_host_startup = os.getenv("SPACE_HOST")
185
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
186
 
187
  if space_host_startup:
188
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
190
  else:
191
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
192
 
193
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
194
  print(f"✅ SPACE_ID found: {space_id_startup}")
195
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
196
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
197
  else:
198
  print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
199
 
200
+ print("-" * (60 + len(" App Starting ")) + "\n")
201
 
202
  print("Launching Gradio Interface for Basic Agent Evaluation...")
203
+ demo.launch(debug=True, share=False)
requirements.txt CHANGED
@@ -1,2 +1,20 @@
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ requests
3
+ smolagents
4
+ smolagents[audio]
5
+ litellm
6
+ python-dotenv
7
+ pandas
8
+ openpyxl
9
+ numpy
10
+ transformers
11
+ openai-whisper
12
+ pytubefix
13
+ ffmpeg-python
14
+ wikipedia-api
15
+ stockfish
16
+ torch
17
+ pillow
18
+ easyocr
19
+ pdfminer.six
20
+ ultralytics