0f3dy commited on
Commit
8c9ad01
·
verified ·
1 Parent(s): fe50a7a

Delete agentcourse_unit4

Browse files
agentcourse_unit4/api/agent_eval_api.py DELETED
@@ -1,47 +0,0 @@
1
- import tempfile
2
- import urllib.request
3
- from typing import List
4
-
5
- import requests
6
-
7
- from agentcourse_unit4.api.answer_data import AnswerData
8
- from agentcourse_unit4.api.question_response import to_question_response, QuestionResponse
9
- from agentcourse_unit4.api.submit_answers_response import to_submit_answers_response, SubmitAnswersResponse
10
-
11
-
12
- class AgentEvalApi:
13
- def __init__(self):
14
- self.api_url = "https://agents-course-unit4-scoring.hf.space"
15
- self.questions_url = f"{self.api_url}/questions"
16
- self.submit_url = f"{self.api_url}/submit"
17
- self.files_url = f"{self.api_url}/files"
18
-
19
- def get_questions(self) -> List[QuestionResponse]:
20
- """Get complete list of filtered questions with all associated data"""
21
- response = requests.get(self.questions_url, timeout=15)
22
- response.raise_for_status()
23
- return response.json(object_hook=to_question_response)
24
-
25
- def download_file(self, task_id: str, file_name: str) -> str:
26
- """Download the file associated with the given task_id to temp dir and return path to file"""
27
- file_path = None
28
-
29
- try:
30
- path_to_file = f"{tempfile.gettempdir()}/{file_name}"
31
- result = urllib.request.urlretrieve(f"{self.files_url}/{task_id}", filename=path_to_file)
32
- file_path = result[0]
33
- except Exception as e:
34
- print(f"Error downloading file: {str(e)}")
35
-
36
- return file_path
37
-
38
- def submit_answers(self, username: str, agent_code: str, answers: List[AnswerData]) -> SubmitAnswersResponse:
39
- """Submit answers from an agent, calculate score, and update leaderboard on HF"""
40
- request_body = {
41
- "username": username.strip(),
42
- "agent_code": agent_code,
43
- "answers": [{"task_id": a.task_id, "submitted_answer": a.answer} for a in answers]
44
- }
45
- response = requests.post(self.submit_url, json=request_body, timeout=60)
46
- response.raise_for_status()
47
- return response.json(object_hook=to_submit_answers_response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/api/answer_data.py DELETED
@@ -1,7 +0,0 @@
1
- class AnswerData:
2
- def __init__(self, task_id: str, answer: str):
3
- self.task_id = task_id
4
- self.answer = answer
5
-
6
- def __repr__(self):
7
- return f"AnswerData(task_id={self.task_id}, answer='{self.answer}')"
 
 
 
 
 
 
 
 
agentcourse_unit4/api/question_response.py DELETED
@@ -1,15 +0,0 @@
1
- class QuestionResponse:
2
- def __init__(self, task_id: str, question: str, level: str, file_name: str):
3
- self.task_id = task_id
4
- self.question = question
5
- self.level = level
6
- self.file_name = file_name
7
-
8
-
9
- def to_question_response(json_data) -> QuestionResponse:
10
- return QuestionResponse(
11
- task_id=json_data['task_id'],
12
- question=json_data['question'],
13
- level=json_data['Level'],
14
- file_name=json_data['file_name']
15
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/api/submit_answers_response.py DELETED
@@ -1,20 +0,0 @@
1
-
2
- class SubmitAnswersResponse:
3
- def __init__(self, username: str, score: float, correct_count: int, total_attempted: int, message: str, timestamp: str):
4
- self.username = username
5
- self.score = score
6
- self.correct_count = correct_count
7
- self.total_attempted = total_attempted
8
- self.message = message
9
- self.timestamp = timestamp
10
-
11
- def to_submit_answers_response(json_data) -> SubmitAnswersResponse:
12
- print(f"Original Response:\n {json_data}")
13
- return SubmitAnswersResponse(
14
- username=json_data['username'] or '<unknown>',
15
- score=json_data['score'] or 0.0,
16
- correct_count=json_data['correct_count'] or 0,
17
- total_attempted=json_data['total_attempted'] or 0,
18
- message=json_data['message'] or 'No message received.',
19
- timestamp=json_data['timestamp'] or '<unknown>'
20
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/basic_agent.py DELETED
@@ -1,87 +0,0 @@
1
- import os
2
- from typing import Optional
3
-
4
- from smolagents import LiteLLMModel, CodeAgent, DuckDuckGoSearchTool, PythonInterpreterTool, VisitWebpageTool
5
-
6
- from agentcourse_unit4.tools.audio_transcriber import AudioTranscriberTool
7
- from agentcourse_unit4.tools.chess_board_recognizer import ChessBoardRecognizerTool
8
- from agentcourse_unit4.tools.chess_predictor import ChessPredictorTool
9
- from agentcourse_unit4.tools.csv_reader import CsvReaderTool
10
- from agentcourse_unit4.tools.excel_reader import ExcelReaderTool
11
- from agentcourse_unit4.tools.file_downloader import FileDownloaderTool
12
- from agentcourse_unit4.tools.image_describer import ImageDescriberTool
13
- from agentcourse_unit4.tools.image_text_extractor import ImageTextExtractorTool
14
- from agentcourse_unit4.tools.pdf_reader import PdfReaderTool
15
- from agentcourse_unit4.tools.py_code_interpreter import PyCodeInterpreterTool
16
- from agentcourse_unit4.tools.youtube_transcriber import YoutubeTranscriberTool
17
-
18
-
19
- class BasicAgent:
20
- def __init__(self):
21
- self.model = LiteLLMModel(
22
- # model_id="gemini/gemini-2.0-flash",
23
- model_id="gemini/gemini-2.5-flash-preview-04-17",
24
- api_key=os.getenv("GOOGLE_API_KEY"),
25
- max_tokens=8196,
26
- temperature=0.9
27
- )
28
- self.basic_agent = CodeAgent(
29
- name="basic_agent",
30
- description="Basic code agent",
31
- tools=[
32
- PythonInterpreterTool(),
33
- DuckDuckGoSearchTool(max_results=5),
34
- VisitWebpageTool(max_output_length=1_000_000),
35
- FileDownloaderTool(),
36
- ExcelReaderTool(),
37
- CsvReaderTool(),
38
- PdfReaderTool(),
39
- PyCodeInterpreterTool(),
40
- YoutubeTranscriberTool(),
41
- AudioTranscriberTool(),
42
- ChessBoardRecognizerTool(),
43
- ChessPredictorTool(),
44
- ImageDescriberTool(),
45
- ImageTextExtractorTool()
46
- ],
47
- model=self.model,
48
- add_base_tools=False,
49
- additional_authorized_imports=["pandas", "numpy", "datetime", "json", "csv"],
50
- planning_interval=None,
51
- verbosity_level=1,
52
- max_steps=5,
53
- max_print_outputs_length=1_000_000
54
- )
55
- print("==> Agent initialized.")
56
-
57
- def run(self, question: str, file_path: Optional[str] = None) -> str:
58
- """
59
- Process the incoming question and then return the answer.
60
-
61
- Args:
62
- question: The question or task
63
- file_path: Optional path to a file associated with the question or task
64
-
65
- Returns:
66
- The final answer to the question
67
- """
68
-
69
- associated_file_prompt = f"\nAssociated file at path: {file_path}\n\nReading file content by proper tool is mandatory." if file_path else ''
70
-
71
- prompt = f"""
72
- Question:
73
- \"{question}\"
74
- {associated_file_prompt}
75
-
76
- Remember that you answer to the question from GAIA benchmark. It requires short, exact and precise answer.
77
- Don't include: thinking, explanations, steps, reasoning, intermediate or additional text.
78
-
79
- Finish your answer with a number OR as few words as possible OR a comma separated list of numbers and/or strings.
80
- If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
81
- If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
82
- If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
83
-
84
- For instance, if question is "What is the capital of Spain?", respond with "Madrid".
85
- It is exact and expected answer.
86
- """
87
- return self.basic_agent.run(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/data/segmentation.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:912bbbde63f435106d57c7416c11a49eb3e9cb93dfe71cb6f9bfaafc1a4e3683
3
- size 6781485
 
 
 
 
agentcourse_unit4/data/standard.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2c19a7f75312af21e9e514f008a05da5ff5624590cc5a8997c977a16d2ac459
3
- size 114375506
 
 
 
 
agentcourse_unit4/tools/audio_transcriber.py DELETED
@@ -1,24 +0,0 @@
1
- from smolagents import Tool
2
- import whisper
3
-
4
- class AudioTranscriberTool(Tool):
5
- name = "audio_transcriber"
6
- description = ("""
7
- This is a tool that get the transcription of the audio file in the form of text.
8
- Supported file extensions: .mp3, .wav, .flac, .ogg, .m4a.
9
- """)
10
- inputs = {
11
- "file_path": {
12
- "type": "string",
13
- "description": "The file path to the audio file.",
14
- }
15
- }
16
- output_type = "string"
17
-
18
- def __init__(self, *args, **kwargs):
19
- super().__init__(*args, **kwargs)
20
- self.model = whisper.load_model("base")
21
-
22
- def forward(self, file_path: str) -> str:
23
- result = self.model.transcribe(file_path)
24
- return result['text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/tools/chess_board_recognizer.py DELETED
@@ -1,187 +0,0 @@
1
- import os
2
- import tempfile
3
- from typing import Optional
4
-
5
- import cv2
6
- import numpy as np
7
- from smolagents import Tool
8
- from ultralytics import YOLO
9
-
10
- FEN_MAPPING = {
11
- "black-pawn": "p", "black-rook": "r", "black-knight": "n", "black-bishop": "b", "black-queen": "q",
12
- "black-king": "k",
13
- "white-pawn": "P", "white-rook": "R", "white-knight": "N", "white-bishop": "B", "white-queen": "Q",
14
- "white-king": "K"
15
- }
16
- GRID_BORDER = 10 # Border size in pixels
17
- GRID_SIZE = 204 # Effective grid size (10px to 214px)
18
- BLOCK_SIZE = GRID_SIZE // 8 # Each block is ~25px
19
- X_LABELS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] # Labels for x-axis (a to h)
20
- Y_LABELS = [8, 7, 6, 5, 4, 3, 2, 1] # Reversed labels for y-axis (8 to 1)
21
-
22
-
23
- class ChessBoardRecognizerTool(Tool):
24
- name = "chess_board_recognizer"
25
- description = "Recognizes the state of chess board from image and returns the position representation in Forsyth-Edwards notation (FEN)"
26
- inputs = {
27
- "image_path": {
28
- "type": "string",
29
- "description": "The path of the chess board image file"
30
- },
31
- "is_white_turn": {
32
- "type": "boolean",
33
- "description": "Optionally white's turn on the chess board if value not provided",
34
- "nullable": True
35
-
36
- }
37
- }
38
- output_type = "string"
39
-
40
- def __init__(self):
41
- super().__init__()
42
-
43
- parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
44
-
45
- self.model_std = YOLO(f"{parent_dir}/data/standard.pt")
46
- self.model_seg = YOLO(f"{parent_dir}/data/segmentation.pt")
47
-
48
- def forward(self, image_path: str, is_white_turn: Optional[bool] = None) -> str:
49
- processed_image = self._process_image(image_path)
50
-
51
- if processed_image is not None:
52
- processed_image = cv2.resize(processed_image, (224, 224))
53
- height, width, _ = processed_image.shape
54
-
55
- results = self.model_std.predict(source=processed_image, save=False, save_txt=False, conf=0.6)
56
-
57
- # Initialize the board for FEN (empty rows represented by "8")
58
- board = [["8"] * 8 for _ in range(8)]
59
-
60
- # Extract predictions and map to FEN board
61
- for result in results[0].boxes:
62
- x1, y1, x2, y2 = result.xyxy[0].tolist()
63
- class_id = int(result.cls[0])
64
- class_name = self.model_std.names[class_id]
65
-
66
- # Convert class_name to FEN notation
67
- fen_piece = FEN_MAPPING.get(class_name, None)
68
- if not fen_piece:
69
- continue
70
-
71
- # Calculate the center of the bounding box
72
- center_x = (x1 + x2) / 2
73
- center_y = (y1 + y2) / 2
74
-
75
- # Convert to integer pixel coordinates
76
- pixel_x = int(center_x)
77
- pixel_y = int(height - center_y) # Flip Y-axis for generic coordinate system
78
-
79
- # Get grid coordinate
80
- grid_position = self._get_grid_coordinate(pixel_x, pixel_y)
81
-
82
- if grid_position != "Pixel outside grid bounds":
83
- file = ord(grid_position[0]) - ord('a') # Column index (0-7)
84
- rank = int(grid_position[1]) - 1 # Row index (0-7)
85
-
86
- # Place the piece on the board
87
- board[7 - rank][file] = fen_piece # Flip rank index for FEN
88
-
89
- # Generate the FEN string
90
- fen_rows = []
91
- for row in board:
92
- fen_row = ""
93
- empty_count = 0
94
- for cell in row:
95
- if cell == "8":
96
- empty_count += 1
97
- else:
98
- if empty_count > 0:
99
- fen_row += str(empty_count)
100
- empty_count = 0
101
- fen_row += cell
102
- if empty_count > 0:
103
- fen_row += str(empty_count)
104
- fen_rows.append(fen_row)
105
-
106
- fen_str = "/".join(fen_rows)
107
- b_or_w_turn = "w" if is_white_turn is None else "b"
108
-
109
- return f"{fen_str} {b_or_w_turn} - - 0 1"
110
-
111
- def _get_grid_coordinate(self, pixel_x, pixel_y):
112
- """
113
- Function to determine the grid coordinate of a pixel, considering a 10px border and
114
- the grid where bottom-left is (a, 1) and top-left is (h, 8).
115
- """
116
- # Grid settings
117
- border = 10 # 10px border
118
- grid_size = 204 # Effective grid size (10px to 214px)
119
- block_size = grid_size // 8 # Each block is ~25px
120
-
121
- x_labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] # Labels for x-axis (a to h)
122
- y_labels = [8, 7, 6, 5, 4, 3, 2, 1] # Reversed labels for y-axis (8 to 1)
123
-
124
- # Adjust pixel_x and pixel_y by subtracting the border (grid starts at pixel 10)
125
- adjusted_x = pixel_x - border
126
- adjusted_y = pixel_y - border
127
-
128
- # Check bounds
129
- if adjusted_x < 0 or adjusted_y < 0 or adjusted_x >= grid_size or adjusted_y >= grid_size:
130
- return "Pixel outside grid bounds"
131
-
132
- # Determine the grid column and row
133
- x_index = adjusted_x // block_size
134
- y_index = adjusted_y // block_size
135
-
136
- if x_index < 0 or x_index >= len(x_labels) or y_index < 0 or y_index >= len(y_labels):
137
- return "Pixel outside grid bounds"
138
-
139
- # Convert indices to grid coordinates
140
- x_index = adjusted_x // block_size # Determine the column index (0-7)
141
- y_index = adjusted_y // block_size # Determine the row index (0-7)
142
-
143
- # Convert row index to the correct label, with '8' at the bottom
144
- y_labeld = y_labels[y_index] # Correct index directly maps to '8' to '1'
145
- x_label = x_labels[x_index]
146
- y_label = 8 - y_labeld + 1
147
-
148
- return f"{x_label}{y_label}"
149
-
150
- def _process_image(self, image_path):
151
- results = self.model_seg.predict(
152
- source=image_path,
153
- conf=0.8 # Confidence threshold
154
- )
155
-
156
- segmentation_mask = None
157
- bbox = None
158
-
159
- for result in results:
160
- if result.boxes.conf[0] >= 0.8: # Filter results by confidence
161
- segmentation_mask = result.masks.data.cpu().numpy().astype(np.uint8)[0]
162
- bbox = result.boxes.xyxy[0].cpu().numpy() # Get the bounding box coordinates
163
- break
164
-
165
- if segmentation_mask is None:
166
- print("No segmentation mask with confidence above 0.8 found.")
167
- return None
168
-
169
- image = cv2.imread(image_path)
170
-
171
- # Resize segmentation mask to match the input image dimensions
172
- segmentation_mask_resized = cv2.resize(segmentation_mask, (image.shape[1], image.shape[0]))
173
-
174
- if bbox is None:
175
- print("No bounding box coordinates found. Skip cropping the image")
176
- return None
177
-
178
- x1, y1, x2, y2 = bbox
179
- cropped_segment = image[int(y1):int(y2), int(x1):int(x2)]
180
-
181
- cropped_image_path = tempfile.NamedTemporaryFile(suffix=".jpg").name
182
- cv2.imwrite(cropped_image_path, cropped_segment)
183
-
184
- print(f"Cropped segmented image saved to {cropped_image_path}")
185
-
186
- # Return the cropped image
187
- return cropped_segment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/tools/chess_predictor.py DELETED
@@ -1,43 +0,0 @@
1
- from smolagents import Tool
2
- from stockfish import Stockfish
3
- import shutil
4
-
5
- class ChessPredictorTool(Tool):
6
- name = "chess_predictor"
7
- description = "Analyzes a chess state (FEN) and predicts the best move."
8
- inputs = {
9
- "fen": {
10
- "type": "string",
11
- "description": "FEN (Forsyth-Edwards notation) value",
12
- }
13
- }
14
- output_type = "string"
15
-
16
- def __init__(self):
17
- super().__init__()
18
-
19
- stockfish_path = shutil.which("stockfish") or "/usr/games/stockfish"
20
-
21
- if not stockfish_path:
22
- raise RuntimeError(f"Can't find stockfish on PATH or at /usr/games/stockfish.")
23
-
24
- print(f"Stockfish path: {stockfish_path}")
25
-
26
- self.stockfish = Stockfish(
27
- path=stockfish_path,
28
- depth=10,
29
- parameters={"Threads": 2, "Minimum Thinking Time": 30}
30
- )
31
-
32
- def forward(self, fen: str) -> str:
33
- if not self.stockfish.is_fen_valid(fen):
34
- return f"Error: invalid FEN notation: {fen}"
35
-
36
- self.stockfish.set_fen_position(fen)
37
-
38
- print(f"Current chessboard:\n {self.stockfish.get_board_visual()}")
39
-
40
- best_move = self.stockfish.get_best_move()
41
- return f"{best_move}" if best_move else "No valid move found on chessboard."
42
-
43
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/tools/csv_reader.py DELETED
@@ -1,21 +0,0 @@
1
- import pandas as pd
2
- from smolagents import Tool
3
-
4
- class CsvReaderTool(Tool):
5
- name = "csv_reader"
6
- description = "Extract CSV file content. Supported file extensions: .csv"
7
- inputs = {
8
- "file_path": {
9
- "type": "string",
10
- "description": "Path to the CSV file",
11
- }
12
- }
13
- output_type = "string"
14
-
15
- def forward(self, file_path) -> str:
16
- try:
17
- df = pd.read_csv(file_path)
18
- print(f"Describe CSV file:\n {df.describe()}")
19
- return df.to_json()
20
- except Exception as e:
21
- return f"Error processing CSV file: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/tools/excel_reader.py DELETED
@@ -1,21 +0,0 @@
1
- import pandas as pd
2
- from smolagents import Tool
3
-
4
- class ExcelReaderTool(Tool):
5
- name = "excel_reader"
6
- description = "Extract Excel file content. Supported file extensions: .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, .odt"
7
- inputs = {
8
- "file_path": {
9
- "type": "string",
10
- "description": "Path to the Excel file",
11
- }
12
- }
13
- output_type = "string"
14
-
15
- def forward(self, file_path) -> str:
16
- try:
17
- df = pd.read_excel(file_path)
18
- print(f"Describe Excel file:\n {df.describe()}")
19
- return df.to_json()
20
- except Exception as e:
21
- print(f"Error: processing Excel file: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/tools/file_downloader.py DELETED
@@ -1,25 +0,0 @@
1
- import urllib.request
2
- from smolagents import Tool
3
-
4
-
5
- class FileDownloaderTool(Tool):
6
- name = "file_downloader"
7
- description = "Download a file from Internet by URL provided, save it into temp dir and return file path"
8
- inputs = {
9
- "url": {
10
- "type": "string",
11
- "description": "URL to download from",
12
- }
13
- }
14
- output_type = "string"
15
-
16
- def forward(self, url: str) -> str:
17
- file_path = None
18
-
19
- try:
20
- result = urllib.request.urlretrieve(url)
21
- file_path = result[0]
22
- except Exception as e:
23
- print(f"Error downloading file: {str(e)}")
24
-
25
- return file_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/tools/image_describer.py DELETED
@@ -1,37 +0,0 @@
1
- from smolagents import Tool
2
- from transformers import BlipProcessor, BlipForConditionalGeneration
3
- from PIL import Image
4
- import torch
5
-
6
-
7
- class ImageDescriberTool(Tool):
8
- name = "image_describer"
9
- description = """
10
- Analyzes image and provide what is represented on it.
11
- Supported image extensions: .png, .jpg, .jpeg, .bmp, .svg.
12
- """
13
- inputs = {
14
- "image_path": {
15
- "type": "string",
16
- "description": "The path to the image file",
17
- }
18
- }
19
- output_type = "string"
20
-
21
- def __init__(self):
22
-
23
- super().__init__()
24
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
25
- model_name = "Salesforce/blip-image-captioning-large"
26
- self.processor = BlipProcessor.from_pretrained(model_name)
27
- self.model = BlipForConditionalGeneration.from_pretrained(model_name).to(self.device)
28
-
29
- def forward(self, image_path: str) -> str:
30
- try:
31
- image = Image.open(image_path).convert('RGB')
32
- inputs = self.processor(image, return_tensors="pt").to(self.device)
33
- out = self.model.generate(**inputs)
34
- img_description = self.processor.decode(out[0], skip_special_tokens=True)
35
- return img_description
36
- except Exception as e:
37
- return f"Error generating image description: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/tools/image_text_extractor.py DELETED
@@ -1,25 +0,0 @@
1
- from smolagents import Tool
2
- import easyocr
3
-
4
-
5
- class ImageTextExtractorTool(Tool):
6
- name = "image_text_extractor"
7
- description = """
8
- Multilingual OCR tool to extract key information or presented text from any image.
9
- Supported image extensions: .png, .jpg, .jpeg, .bmp, .svg.
10
- """
11
- inputs = {
12
- "image_path": {
13
- "type": "string",
14
- "description": "The path to the image file",
15
- }
16
- }
17
- output_type = "array"
18
-
19
- def __init__(self):
20
- super().__init__()
21
- self.reader = easyocr.Reader(['ch_sim', 'en'])
22
-
23
- def forward(self, image_path: str) -> list[str]:
24
- result = self.reader.readtext(image_path, detail=False)
25
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/tools/pdf_reader.py DELETED
@@ -1,19 +0,0 @@
1
- from pdfminer.high_level import extract_text
2
- from smolagents import Tool
3
-
4
- class PdfReaderTool(Tool):
5
- name = "pdf_reader"
6
- description = "Extract PDF content. Supported file extensions: .pdf"
7
- inputs = {
8
- "file_path": {
9
- "type": "string",
10
- "description": "Path to the PDF file",
11
- }
12
- }
13
- output_type = "string"
14
-
15
- def forward(self, file_path) -> str:
16
- try:
17
- return extract_text(file_path)
18
- except Exception as e:
19
- return f"Error processing PDF file: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/tools/py_code_interpreter.py DELETED
@@ -1,36 +0,0 @@
1
- import subprocess
2
- import sys
3
- from smolagents import Tool
4
-
5
- class PyCodeInterpreterTool(Tool):
6
- name = "py_code_interpreter"
7
- description = """
8
- Executes file with python code.
9
- Strongly use only for answering to the questions from GAIA benchmark.
10
- Use default python_interpreter for other cases.
11
- """
12
- inputs = {
13
- "file_path": {
14
- "type": "string",
15
- "description": "The file with python code to execute.",
16
- }
17
- }
18
- output_type = "string"
19
-
20
- def __init__(self, *args, **kwargs):
21
- super().__init__(*args, **kwargs)
22
-
23
- def forward(self, file_path: str) -> str:
24
- cmd = f"python {file_path}".split()
25
-
26
- try:
27
- output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, timeout=60.0)
28
- output_text = output.decode(sys.stdout.encoding).strip()
29
- except subprocess.TimeoutExpired:
30
- output_text = "Error: code execution timed out."
31
- except subprocess.CalledProcessError as e:
32
- output_text = e.output.decode(sys.stdout.encoding).strip()
33
- except subprocess.SubprocessError as ex:
34
- output_text = f"Error: {str(ex)}\n"
35
-
36
- return output_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentcourse_unit4/tools/youtube_transcriber.py DELETED
@@ -1,30 +0,0 @@
1
- import tempfile
2
-
3
- from smolagents import Tool
4
- import whisper
5
- from pytubefix import YouTube
6
- from pytubefix.cli import on_progress
7
-
8
- class YoutubeTranscriberTool(Tool):
9
- name = "youtube_transcriber"
10
- description = "This is a tool that get the transcription of the YouTube video in the form of text."
11
- inputs = {
12
- "url": {
13
- "type": "string",
14
- "description": "The link of any youtube video to get the transcription",
15
- }
16
- }
17
- output_type = "string"
18
-
19
- def __init__(self, *args, **kwargs):
20
- super().__init__(*args, **kwargs)
21
- self.model = whisper.load_model("base")
22
-
23
-
24
- def forward(self, url: str) -> str:
25
- yt = YouTube(url, on_progress_callback=on_progress)
26
- audio_stream = yt.streams.get_audio_only()
27
- temp_dir = tempfile.gettempdir()
28
- out_file = audio_stream.download(output_path=temp_dir)
29
- result = self.model.transcribe(out_file)
30
- return result['text']