Final_Assignment_Templatel

Sleeping

App Files Files Community

0f3dy commited on Jun 14, 2025

Commit

fe50a7a

verified ·

1 Parent(s): 9f57eb7

Upload 24 files

Browse files

Files changed (24) hide show

.gitattributes +35 -0
.gitignore +13 -0
README.md +15 -14
agentcourse_unit4/api/agent_eval_api.py +47 -0
agentcourse_unit4/api/answer_data.py +7 -0
agentcourse_unit4/api/question_response.py +15 -0
agentcourse_unit4/api/submit_answers_response.py +20 -0
agentcourse_unit4/basic_agent.py +87 -0
agentcourse_unit4/data/segmentation.pt +3 -0
agentcourse_unit4/data/standard.pt +3 -0
agentcourse_unit4/tools/audio_transcriber.py +24 -0
agentcourse_unit4/tools/chess_board_recognizer.py +187 -0
agentcourse_unit4/tools/chess_predictor.py +43 -0
agentcourse_unit4/tools/csv_reader.py +21 -0
agentcourse_unit4/tools/excel_reader.py +21 -0
agentcourse_unit4/tools/file_downloader.py +25 -0
agentcourse_unit4/tools/image_describer.py +37 -0
agentcourse_unit4/tools/image_text_extractor.py +25 -0
agentcourse_unit4/tools/pdf_reader.py +19 -0
agentcourse_unit4/tools/py_code_interpreter.py +36 -0
agentcourse_unit4/tools/youtube_transcriber.py +30 -0
app.py +186 -0
packages.txt +1 -0
requirements.txt +20 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,13 @@

+.idea/
+*.iws
+*.iml
+*.ipr
+out/
+gen
+__pycache__
+*.py[cod]
+*.log
+# Generated by MacOS
+.DS_Store

README.md CHANGED Viewed

@@ -1,15 +1,16 @@
----
-title: Template Final Assignment
-emoji: 🕵🏻‍♂️
-colorFrom: indigo
-colorTo: indigo
-sdk: gradio
-sdk_version: 5.25.2
-app_file: app.py
-pinned: false
-hf_oauth: true
-# optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
-hf_oauth_expiration_minutes: 480
----
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Agent Course - Unit4
+emoji: 🕵🏻‍♂️
+colorFrom: indigo
+colorTo: indigo
+sdk: gradio
+sdk_version: 5.25.2
+app_file: app.py
+pinned: false
+hf_oauth: true
+# optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
+hf_oauth_expiration_minutes: 480
+short_description: agent-course.unit4 practical task
+---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

agentcourse_unit4/api/agent_eval_api.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import tempfile
+import urllib.request
+from typing import List
+import requests
+from agentcourse_unit4.api.answer_data import AnswerData
+from agentcourse_unit4.api.question_response import to_question_response, QuestionResponse
+from agentcourse_unit4.api.submit_answers_response import to_submit_answers_response, SubmitAnswersResponse
+class AgentEvalApi:
+    def __init__(self):
+        self.api_url = "https://agents-course-unit4-scoring.hf.space"
+        self.questions_url = f"{self.api_url}/questions"
+        self.submit_url = f"{self.api_url}/submit"
+        self.files_url = f"{self.api_url}/files"
+    def get_questions(self) -> List[QuestionResponse]:
+        """Get complete list of filtered questions with all associated data"""
+        response = requests.get(self.questions_url, timeout=15)
+        response.raise_for_status()
+        return response.json(object_hook=to_question_response)
+    def download_file(self, task_id: str, file_name: str) -> str:
+        """Download the file associated with the given task_id to temp dir and return path to file"""
+        file_path = None
+        try:
+            path_to_file = f"{tempfile.gettempdir()}/{file_name}"
+            result = urllib.request.urlretrieve(f"{self.files_url}/{task_id}", filename=path_to_file)
+            file_path = result[0]
+        except Exception as e:
+            print(f"Error downloading file: {str(e)}")
+        return file_path
+    def submit_answers(self, username: str, agent_code: str, answers: List[AnswerData]) -> SubmitAnswersResponse:
+        """Submit answers from an agent, calculate score, and update leaderboard on HF"""
+        request_body = {
+            "username": username.strip(),
+            "agent_code": agent_code,
+            "answers": [{"task_id": a.task_id, "submitted_answer": a.answer} for a in answers]
+        }
+        response = requests.post(self.submit_url, json=request_body, timeout=60)
+        response.raise_for_status()
+        return response.json(object_hook=to_submit_answers_response)

agentcourse_unit4/api/answer_data.py ADDED Viewed

	@@ -0,0 +1,7 @@

+class AnswerData:
+    def __init__(self, task_id: str, answer: str):
+        self.task_id = task_id
+        self.answer = answer
+    def __repr__(self):
+        return f"AnswerData(task_id={self.task_id}, answer='{self.answer}')"

agentcourse_unit4/api/question_response.py ADDED Viewed

	@@ -0,0 +1,15 @@

+class QuestionResponse:
+    def __init__(self, task_id: str, question: str, level: str, file_name: str):
+        self.task_id = task_id
+        self.question = question
+        self.level = level
+        self.file_name = file_name
+def to_question_response(json_data) -> QuestionResponse:
+    return QuestionResponse(
+        task_id=json_data['task_id'],
+        question=json_data['question'],
+        level=json_data['Level'],
+        file_name=json_data['file_name']
+    )

agentcourse_unit4/api/submit_answers_response.py ADDED Viewed

	@@ -0,0 +1,20 @@

+class SubmitAnswersResponse:
+    def __init__(self, username: str, score: float, correct_count: int, total_attempted: int, message: str, timestamp: str):
+        self.username = username
+        self.score = score
+        self.correct_count = correct_count
+        self.total_attempted = total_attempted
+        self.message = message
+        self.timestamp = timestamp
+def to_submit_answers_response(json_data) -> SubmitAnswersResponse:
+    print(f"Original Response:\n {json_data}")
+    return SubmitAnswersResponse(
+        username=json_data['username'] or '<unknown>',
+        score=json_data['score'] or 0.0,
+        correct_count=json_data['correct_count'] or 0,
+        total_attempted=json_data['total_attempted'] or 0,
+        message=json_data['message'] or 'No message received.',
+        timestamp=json_data['timestamp'] or '<unknown>'
+    )

agentcourse_unit4/basic_agent.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import os
+from typing import Optional
+from smolagents import LiteLLMModel, CodeAgent, DuckDuckGoSearchTool, PythonInterpreterTool, VisitWebpageTool
+from agentcourse_unit4.tools.audio_transcriber import AudioTranscriberTool
+from agentcourse_unit4.tools.chess_board_recognizer import ChessBoardRecognizerTool
+from agentcourse_unit4.tools.chess_predictor import ChessPredictorTool
+from agentcourse_unit4.tools.csv_reader import CsvReaderTool
+from agentcourse_unit4.tools.excel_reader import ExcelReaderTool
+from agentcourse_unit4.tools.file_downloader import FileDownloaderTool
+from agentcourse_unit4.tools.image_describer import ImageDescriberTool
+from agentcourse_unit4.tools.image_text_extractor import ImageTextExtractorTool
+from agentcourse_unit4.tools.pdf_reader import PdfReaderTool
+from agentcourse_unit4.tools.py_code_interpreter import PyCodeInterpreterTool
+from agentcourse_unit4.tools.youtube_transcriber import YoutubeTranscriberTool
+class BasicAgent:
+    def __init__(self):
+        self.model = LiteLLMModel(
+           # model_id="gemini/gemini-2.0-flash",
+            model_id="gemini/gemini-2.5-flash-preview-04-17",
+            api_key=os.getenv("GOOGLE_API_KEY"),
+            max_tokens=8196,
+            temperature=0.9
+        )
+        self.basic_agent = CodeAgent(
+            name="basic_agent",
+            description="Basic code agent",
+            tools=[
+                PythonInterpreterTool(),
+                DuckDuckGoSearchTool(max_results=5),
+                VisitWebpageTool(max_output_length=1_000_000),
+                FileDownloaderTool(),
+                ExcelReaderTool(),
+                CsvReaderTool(),
+                PdfReaderTool(),
+                PyCodeInterpreterTool(),
+                YoutubeTranscriberTool(),
+                AudioTranscriberTool(),
+                ChessBoardRecognizerTool(),
+                ChessPredictorTool(),
+                ImageDescriberTool(),
+                ImageTextExtractorTool()
+            ],
+            model=self.model,
+            add_base_tools=False,
+            additional_authorized_imports=["pandas", "numpy", "datetime", "json", "csv"],
+            planning_interval=None,
+            verbosity_level=1,
+            max_steps=5,
+            max_print_outputs_length=1_000_000
+        )
+        print("==> Agent initialized.")
+    def run(self, question: str, file_path: Optional[str] = None) -> str:
+        """
+        Process the incoming question and then return the answer.
+        Args:
+            question: The question or task
+            file_path: Optional path to a file associated with the question or task
+        Returns:
+            The final answer to the question
+        """
+        associated_file_prompt = f"\nAssociated file at path: {file_path}\n\nReading file content by proper tool is mandatory." if file_path else ''
+        prompt = f"""
+Question:
+\"{question}\"
+{associated_file_prompt}
+Remember that you answer to the question from GAIA benchmark. It requires short, exact and precise answer.
+Don't include: thinking, explanations, steps, reasoning, intermediate or additional text.
+Finish your answer with a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+For instance, if question is "What is the capital of Spain?", respond with "Madrid".
+It is exact and expected answer.
+"""
+        return self.basic_agent.run(prompt)

agentcourse_unit4/data/segmentation.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:912bbbde63f435106d57c7416c11a49eb3e9cb93dfe71cb6f9bfaafc1a4e3683
+size 6781485

agentcourse_unit4/data/standard.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2c19a7f75312af21e9e514f008a05da5ff5624590cc5a8997c977a16d2ac459
+size 114375506

agentcourse_unit4/tools/audio_transcriber.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from smolagents import Tool
+import whisper
+class AudioTranscriberTool(Tool):
+    name = "audio_transcriber"
+    description = ("""
+        This is a tool that get the transcription of the audio file in the form of text.
+        Supported file extensions: .mp3, .wav, .flac, .ogg, .m4a.
+    """)
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "The file path to the audio file.",
+        }
+    }
+    output_type = "string"
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.model = whisper.load_model("base")
+    def forward(self, file_path: str) -> str:
+        result = self.model.transcribe(file_path)
+        return result['text']

agentcourse_unit4/tools/chess_board_recognizer.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import os
+import tempfile
+from typing import Optional
+import cv2
+import numpy as np
+from smolagents import Tool
+from ultralytics import YOLO
+FEN_MAPPING = {
+    "black-pawn": "p", "black-rook": "r", "black-knight": "n", "black-bishop": "b", "black-queen": "q",
+    "black-king": "k",
+    "white-pawn": "P", "white-rook": "R", "white-knight": "N", "white-bishop": "B", "white-queen": "Q",
+    "white-king": "K"
+}
+GRID_BORDER = 10  # Border size in pixels
+GRID_SIZE = 204  # Effective grid size (10px to 214px)
+BLOCK_SIZE = GRID_SIZE // 8  # Each block is ~25px
+X_LABELS = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']  # Labels for x-axis (a to h)
+Y_LABELS = [8, 7, 6, 5, 4, 3, 2, 1]  # Reversed labels for y-axis (8 to 1)
+class ChessBoardRecognizerTool(Tool):
+    name = "chess_board_recognizer"
+    description = "Recognizes the state of chess board from image and returns the position representation in Forsyth-Edwards notation (FEN)"
+    inputs = {
+        "image_path": {
+            "type": "string",
+            "description": "The path of the chess board image file"
+        },
+        "is_white_turn": {
+            "type": "boolean",
+            "description": "Optionally white's turn on the chess board if value not provided",
+            "nullable": True
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+        parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+        self.model_std = YOLO(f"{parent_dir}/data/standard.pt")
+        self.model_seg = YOLO(f"{parent_dir}/data/segmentation.pt")
+    def forward(self, image_path: str, is_white_turn: Optional[bool] = None) -> str:
+        processed_image = self._process_image(image_path)
+        if processed_image is not None:
+            processed_image = cv2.resize(processed_image, (224, 224))
+            height, width, _ = processed_image.shape
+            results = self.model_std.predict(source=processed_image, save=False, save_txt=False, conf=0.6)
+            # Initialize the board for FEN (empty rows represented by "8")
+            board = [["8"] * 8 for _ in range(8)]
+            # Extract predictions and map to FEN board
+            for result in results[0].boxes:
+                x1, y1, x2, y2 = result.xyxy[0].tolist()
+                class_id = int(result.cls[0])
+                class_name = self.model_std.names[class_id]
+                # Convert class_name to FEN notation
+                fen_piece = FEN_MAPPING.get(class_name, None)
+                if not fen_piece:
+                    continue
+                # Calculate the center of the bounding box
+                center_x = (x1 + x2) / 2
+                center_y = (y1 + y2) / 2
+                # Convert to integer pixel coordinates
+                pixel_x = int(center_x)
+                pixel_y = int(height - center_y)  # Flip Y-axis for generic coordinate system
+                # Get grid coordinate
+                grid_position = self._get_grid_coordinate(pixel_x, pixel_y)
+                if grid_position != "Pixel outside grid bounds":
+                    file = ord(grid_position[0]) - ord('a')  # Column index (0-7)
+                    rank = int(grid_position[1]) - 1  # Row index (0-7)
+                    # Place the piece on the board
+                    board[7 - rank][file] = fen_piece  # Flip rank index for FEN
+            # Generate the FEN string
+            fen_rows = []
+            for row in board:
+                fen_row = ""
+                empty_count = 0
+                for cell in row:
+                    if cell == "8":
+                        empty_count += 1
+                    else:
+                        if empty_count > 0:
+                            fen_row += str(empty_count)
+                            empty_count = 0
+                        fen_row += cell
+                if empty_count > 0:
+                    fen_row += str(empty_count)
+                fen_rows.append(fen_row)
+            fen_str = "/".join(fen_rows)
+            b_or_w_turn = "w" if is_white_turn is None else "b"
+            return f"{fen_str} {b_or_w_turn} - - 0 1"
+    def _get_grid_coordinate(self, pixel_x, pixel_y):
+        """
+        Function to determine the grid coordinate of a pixel, considering a 10px border and
+        the grid where bottom-left is (a, 1) and top-left is (h, 8).
+        """
+        # Grid settings
+        border = 10  # 10px border
+        grid_size = 204  # Effective grid size (10px to 214px)
+        block_size = grid_size // 8  # Each block is ~25px
+        x_labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']  # Labels for x-axis (a to h)
+        y_labels = [8, 7, 6, 5, 4, 3, 2, 1]  # Reversed labels for y-axis (8 to 1)
+        # Adjust pixel_x and pixel_y by subtracting the border (grid starts at pixel 10)
+        adjusted_x = pixel_x - border
+        adjusted_y = pixel_y - border
+        # Check bounds
+        if adjusted_x < 0 or adjusted_y < 0 or adjusted_x >= grid_size or adjusted_y >= grid_size:
+            return "Pixel outside grid bounds"
+        # Determine the grid column and row
+        x_index = adjusted_x // block_size
+        y_index = adjusted_y // block_size
+        if x_index < 0 or x_index >= len(x_labels) or y_index < 0 or y_index >= len(y_labels):
+            return "Pixel outside grid bounds"
+        # Convert indices to grid coordinates
+        x_index = adjusted_x // block_size  # Determine the column index (0-7)
+        y_index = adjusted_y // block_size  # Determine the row index (0-7)
+        # Convert row index to the correct label, with '8' at the bottom
+        y_labeld = y_labels[y_index]  # Correct index directly maps to '8' to '1'
+        x_label = x_labels[x_index]
+        y_label = 8 - y_labeld + 1
+        return f"{x_label}{y_label}"
+    def _process_image(self, image_path):
+        results = self.model_seg.predict(
+            source=image_path,
+            conf=0.8  # Confidence threshold
+        )
+        segmentation_mask = None
+        bbox = None
+        for result in results:
+            if result.boxes.conf[0] >= 0.8:  # Filter results by confidence
+                segmentation_mask = result.masks.data.cpu().numpy().astype(np.uint8)[0]
+                bbox = result.boxes.xyxy[0].cpu().numpy()  # Get the bounding box coordinates
+                break
+        if segmentation_mask is None:
+            print("No segmentation mask with confidence above 0.8 found.")
+            return None
+        image = cv2.imread(image_path)
+        # Resize segmentation mask to match the input image dimensions
+        segmentation_mask_resized = cv2.resize(segmentation_mask, (image.shape[1], image.shape[0]))
+        if bbox is None:
+            print("No bounding box coordinates found. Skip cropping the image")
+            return None
+        x1, y1, x2, y2 = bbox
+        cropped_segment = image[int(y1):int(y2), int(x1):int(x2)]
+        cropped_image_path = tempfile.NamedTemporaryFile(suffix=".jpg").name
+        cv2.imwrite(cropped_image_path, cropped_segment)
+        print(f"Cropped segmented image saved to {cropped_image_path}")
+        # Return the cropped image
+        return cropped_segment

agentcourse_unit4/tools/chess_predictor.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from smolagents import Tool
+from stockfish import Stockfish
+import shutil
+class ChessPredictorTool(Tool):
+    name = "chess_predictor"
+    description = "Analyzes a chess state (FEN) and predicts the best move."
+    inputs = {
+        "fen": {
+            "type": "string",
+            "description": "FEN (Forsyth-Edwards notation) value",
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+        stockfish_path = shutil.which("stockfish") or "/usr/games/stockfish"
+        if not stockfish_path:
+            raise RuntimeError(f"Can't find stockfish on PATH or at /usr/games/stockfish.")
+        print(f"Stockfish path: {stockfish_path}")
+        self.stockfish = Stockfish(
+            path=stockfish_path,
+            depth=10,
+            parameters={"Threads": 2, "Minimum Thinking Time": 30}
+        )
+    def forward(self, fen: str) -> str:
+        if not self.stockfish.is_fen_valid(fen):
+            return f"Error: invalid FEN notation: {fen}"
+        self.stockfish.set_fen_position(fen)
+        print(f"Current chessboard:\n {self.stockfish.get_board_visual()}")
+        best_move = self.stockfish.get_best_move()
+        return f"{best_move}" if best_move else "No valid move found on chessboard."

agentcourse_unit4/tools/csv_reader.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import pandas as pd
+from smolagents import Tool
+class CsvReaderTool(Tool):
+    name = "csv_reader"
+    description = "Extract CSV file content. Supported file extensions: .csv"
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "Path to the CSV file",
+        }
+    }
+    output_type = "string"
+    def forward(self, file_path) -> str:
+        try:
+            df = pd.read_csv(file_path)
+            print(f"Describe CSV file:\n {df.describe()}")
+            return df.to_json()
+        except Exception as e:
+            return f"Error processing CSV file: {str(e)}"

agentcourse_unit4/tools/excel_reader.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import pandas as pd
+from smolagents import Tool
+class ExcelReaderTool(Tool):
+    name = "excel_reader"
+    description = "Extract Excel file content. Supported file extensions: .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, .odt"
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "Path to the Excel file",
+        }
+    }
+    output_type = "string"
+    def forward(self, file_path) -> str:
+        try:
+            df = pd.read_excel(file_path)
+            print(f"Describe Excel file:\n {df.describe()}")
+            return df.to_json()
+        except Exception as e:
+            print(f"Error: processing Excel file: {str(e)}")

agentcourse_unit4/tools/file_downloader.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import urllib.request
+from smolagents import Tool
+class FileDownloaderTool(Tool):
+    name = "file_downloader"
+    description = "Download a file from Internet by URL provided, save it into temp dir and return file path"
+    inputs = {
+        "url": {
+            "type": "string",
+            "description": "URL to download from",
+        }
+    }
+    output_type = "string"
+    def forward(self, url: str) -> str:
+        file_path = None
+        try:
+            result = urllib.request.urlretrieve(url)
+            file_path = result[0]
+        except Exception as e:
+            print(f"Error downloading file: {str(e)}")
+        return file_path

agentcourse_unit4/tools/image_describer.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from smolagents import Tool
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from PIL import Image
+import torch
+class ImageDescriberTool(Tool):
+    name = "image_describer"
+    description = """
+        Analyzes image and provide what is represented on it.
+        Supported image extensions: .png, .jpg, .jpeg, .bmp, .svg.
+    """
+    inputs = {
+        "image_path": {
+            "type": "string",
+            "description": "The path to the image file",
+        }
+    }
+    output_type = "string"
+    def __init__(self):
+        super().__init__()
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        model_name = "Salesforce/blip-image-captioning-large"
+        self.processor = BlipProcessor.from_pretrained(model_name)
+        self.model = BlipForConditionalGeneration.from_pretrained(model_name).to(self.device)
+    def forward(self, image_path: str) -> str:
+        try:
+            image = Image.open(image_path).convert('RGB')
+            inputs = self.processor(image, return_tensors="pt").to(self.device)
+            out = self.model.generate(**inputs)
+            img_description = self.processor.decode(out[0], skip_special_tokens=True)
+            return img_description
+        except Exception as e:
+            return f"Error generating image description: {e}"

agentcourse_unit4/tools/image_text_extractor.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from smolagents import Tool
+import easyocr
+class ImageTextExtractorTool(Tool):
+    name = "image_text_extractor"
+    description = """
+        Multilingual OCR tool to extract key information or presented text from any image.
+        Supported image extensions: .png, .jpg, .jpeg, .bmp, .svg.
+    """
+    inputs = {
+        "image_path": {
+            "type": "string",
+            "description": "The path to the image file",
+        }
+    }
+    output_type = "array"
+    def __init__(self):
+        super().__init__()
+        self.reader = easyocr.Reader(['ch_sim', 'en'])
+    def forward(self, image_path: str) -> list[str]:
+        result = self.reader.readtext(image_path, detail=False)
+        return result

agentcourse_unit4/tools/pdf_reader.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from pdfminer.high_level import extract_text
+from smolagents import Tool
+class PdfReaderTool(Tool):
+    name = "pdf_reader"
+    description = "Extract PDF content. Supported file extensions: .pdf"
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "Path to the PDF file",
+        }
+    }
+    output_type = "string"
+    def forward(self, file_path) -> str:
+        try:
+            return extract_text(file_path)
+        except Exception as e:
+            return f"Error processing PDF file: {str(e)}"

agentcourse_unit4/tools/py_code_interpreter.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import subprocess
+import sys
+from smolagents import Tool
+class PyCodeInterpreterTool(Tool):
+    name = "py_code_interpreter"
+    description = """
+        Executes file with python code.
+        Strongly use only for answering to the questions from GAIA benchmark.
+        Use default python_interpreter for other cases.
+    """
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "The file with python code to execute.",
+        }
+    }
+    output_type = "string"
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def forward(self, file_path: str) -> str:
+        cmd = f"python {file_path}".split()
+        try:
+            output = subprocess.check_output(cmd, stderr=subprocess.STDOUT, timeout=60.0)
+            output_text = output.decode(sys.stdout.encoding).strip()
+        except subprocess.TimeoutExpired:
+            output_text = "Error: code execution timed out."
+        except subprocess.CalledProcessError as e:
+            output_text = e.output.decode(sys.stdout.encoding).strip()
+        except subprocess.SubprocessError as ex:
+            output_text = f"Error: {str(ex)}\n"
+        return output_text

agentcourse_unit4/tools/youtube_transcriber.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import tempfile
+from smolagents import Tool
+import whisper
+from pytubefix import YouTube
+from pytubefix.cli import on_progress
+class YoutubeTranscriberTool(Tool):
+    name = "youtube_transcriber"
+    description = "This is a tool that get the transcription of the YouTube video in the form of text."
+    inputs = {
+        "url": {
+            "type": "string",
+            "description": "The link of any youtube video to get the transcription",
+        }
+    }
+    output_type = "string"
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.model = whisper.load_model("base")
+    def forward(self, url: str) -> str:
+        yt = YouTube(url, on_progress_callback=on_progress)
+        audio_stream = yt.streams.get_audio_only()
+        temp_dir = tempfile.gettempdir()
+        out_file = audio_stream.download(output_path=temp_dir)
+        result = self.model.transcribe(out_file)
+        return result['text']

app.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import os
+import time
+import gradio as gr
+import pandas as pd
+import requests
+from agentcourse_unit4.api.agent_eval_api import AgentEvalApi
+from agentcourse_unit4.api.answer_data import AnswerData
+from agentcourse_unit4.basic_agent import BasicAgent
+def run_and_submit_all(profile: gr.OAuthProfile | None):
+    """
+    Fetches all questions, runs the BasicAgent on them, submits all answers,
+    and displays the results.
+    """
+    # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")  # Get the SPACE_ID for sending link to the code
+    if profile:
+        username = f"{profile.username}"
+        print(f"User logged in: {username}")
+    else:
+        print("User not logged in.")
+        return "Please Login to Hugging Face with the button.", None
+    api_client = AgentEvalApi()
+    # 1. Instantiate Agent ( modify this part to create your agent)
+    try:
+        agent = BasicAgent()
+    except Exception as e:
+        print(f"Error instantiating agent: {e}")
+        return f"Error initializing agent: {e}", None
+    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
+    print(agent_code)
+    # 2. Fetch Questions
+    print(f"Fetching questions from: {api_client.questions_url}")
+    try:
+        questions_data = api_client.get_questions()
+        if not questions_data:
+            print("Fetched questions list is empty.")
+            return "Fetched questions list is empty or invalid format.", None
+        print(f"Fetched {len(questions_data)} questions.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching questions: {e}")
+        return f"Error fetching questions: {e}", None
+    except requests.exceptions.JSONDecodeError as e:
+        print(f"Error decoding JSON response from questions endpoint: {e}")
+        print(f"Response text: {e.response.text[:500]}")
+        return f"Error decoding server response for questions: {e}", None
+    except Exception as e:
+        print(f"An unexpected error occurred fetching questions: {e}")
+        return f"An unexpected error occurred fetching questions: {e}", None
+    # 3. Run your Agent
+    results_log = []
+    answers_payload = []
+    print(f"Running agent on {len(questions_data)} questions...")
+    for item in questions_data:
+        task_id = item.task_id
+        question_text = item.question
+        file_name = item.file_name
+        if not task_id or question_text is None:
+            print(f"Skipping item with missing task_id or question: {item}")
+            continue
+        try:
+            file_path = api_client.download_file(task_id, file_name) if len(file_name) > 0 else None
+            submitted_answer = agent.run(question_text, file_path)
+            answers_payload.append(AnswerData(task_id=task_id, answer=str(submitted_answer)))
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
+            time.sleep(60) # to not exceed free limits
+        except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
+    if not answers_payload:
+        print("Agent did not produce any answers to submit.")
+        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
+    # 4. Prepare Submission
+    status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
+    print(status_update)
+    # 5. Submit
+    print(f"Submitting {len(answers_payload)} answers to: {api_client.submit_url}")
+    try:
+        result_data = api_client.submit_answers(username=username, agent_code=agent_code, answers=answers_payload)
+        final_status = (
+            f"Submission Successful!\n"
+            f"User: {result_data.username}\n"
+            f"Overall Score: {result_data.score}% "
+            f"({result_data.correct_count}/{result_data.total_attempted} correct)\n"
+            f"Message: {result_data.message}"
+        )
+        print("Submission successful.")
+        results_df = pd.DataFrame(results_log)
+        return final_status, results_df
+    except requests.exceptions.HTTPError as e:
+        error_detail = f"Server responded with status {e.response.status_code}."
+        try:
+            error_json = e.response.json()
+            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+        except requests.exceptions.JSONDecodeError:
+            error_detail += f" Response: {e.response.text[:500]}"
+        status_message = f"Submission Failed: {error_detail}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.Timeout:
+        status_message = "Submission Failed: The request timed out."
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except requests.exceptions.RequestException as e:
+        status_message = f"Submission Failed: Network error - {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+    except Exception as e:
+        status_message = f"An unexpected error occurred during submission: {e}"
+        print(status_message)
+        results_df = pd.DataFrame(results_log)
+        return status_message, results_df
+# --- Build Gradio Interface using Blocks ---
+with gr.Blocks() as demo:
+    gr.Markdown("# Basic Agent Evaluation Runner")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
+        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
+        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        ---
+        **Disclaimers:**
+        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
+        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
+        """
+    )
+    gr.LoginButton()
+    run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
+    # Removed max_rows=10 from DataFrame constructor
+    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
+    run_button.click(
+        fn=run_and_submit_all,
+        outputs=[status_output, results_table]
+    )
+if __name__ == "__main__":
+    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
+    # Check for SPACE_HOST and SPACE_ID at startup for information
+    space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")  # Get SPACE_ID at startup
+    if space_host_startup:
+        print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
+    else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
+    if space_id_startup:  # Print repo URLs if SPACE_ID is found
+        print(f"✅ SPACE_ID found: {space_id_startup}")
+        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
+    else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-" * (60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ stockfish

requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+gradio
+requests
+smolagents
+smolagents[audio]
+litellm
+python-dotenv
+pandas
+openpyxl
+numpy
+transformers
+openai-whisper
+pytubefix
+ffmpeg-python
+wikipedia-api
+stockfish
+torch
+pillow
+easyocr
+pdfminer.six
+ultralytics