Agents-Course-Assignment

Paused

App Files Files Community

krzsam commited on Jul 3, 2025

Commit

ad476aa

1 Parent(s): 2d695b2

commit

Browse files

Files changed (14) hide show

.gitignore +5 -0
app.py +127 -16
chess_board_tool.py +252 -0
chess_pieces_detection/__init__.py +1 -0
chess_pieces_detection/train.sh +2 -0
chess_pieces_detection/train_chess_pieces_recognition.py +400 -0
install-requirements.sh +2 -0
my_prompt_config.py +57 -0
my_tools.py +66 -0
requirements.txt +9 -1
run.sh +2 -0
simple.py +87 -0
simple.sh +3 -0
test_tools.py +34 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+/.env
+/venvw/
+/questions.json
+/venv/
+/.idea/

app.py CHANGED Viewed

@@ -3,27 +3,121 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
-        print("BasicAgent initialized.")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
@@ -76,10 +170,23 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
@@ -99,17 +206,21 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        print("Submission successful.")
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:

 import requests
 import inspect
 import pandas as pd
+from smolagents import CodeAgent, tool, InferenceClientModel, WebSearchTool, load_tool, PromptTemplates, Tool, FinalAnswerTool
+from dotenv import load_dotenv
+from my_prompt_config import PromptConfig
+from my_tools import ReverseStringTool, ImageLoadTool
+from PIL import Image
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# testing --------------------------------------------
+testing_mode = True
+questions_to_run = [
+    #"8e867cd7-cff9-4e6c-867a-ff5ddc2550be",     # OK
+    #"a1e91b78-d3d8-4675-bb8d-62741b4b68a6"
+    #"2d83110e-a098-4ebb-9987-066c06fa42d0"      # almost OK
+    "cca530fc-4052-43b2-b130-b30968d8aa44"
+    #"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8"
+    #"6f37996b-2ac7-44b0-8e68-6d28256631b4"
+    #"9d191bce-651d-4746-be2d-7ef8ecadb9c2"
+    #"cabe07ed-9eca-40ea-8ead-410ef5e83f91"
+    #"3cef3a44-215e-4aed-8e3b-b1e3f08063b7"
+    #"99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3"
+    #"305ac316-eef6-4446-960a-92d80d542f82"
+    #"f918266a-b3e0-4914-865d-4faa564f1aef"
+    #"3f57289b-8c60-48be-bd80-01f8099ca449"
+    #"1f975693-876d-457b-a649-393859e79bf3"
+    #"840bfca7-4f7b-481a-8794-c560c340185d"
+    #"bda648d7-d618-4883-88f4-3466eabd860e"
+    #"cf106601-ab4f-4af9-b045-5295fe67b37d"
+    #"a0c07678-e491-4bbc-8f0b-07405144218f"
+    #"7bd855d8-463d-4ed5-93ca-5fe35145f733"
+    #"5a0c1adf-205e-4841-a666-7c3ef95def9d"
+]
+# testing --------------------------------------------
 # --- Basic Agent Definition ---
+# ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
+    #MODEL_CODER = "Qwen/Qwen2.5-Coder-32B-Instruct"
+    MODEL_CODER = "Qwen/Qwen2.5-72B-Instruct"
+    #MODEL_REASONING = "deepseek-ai/DeepSeek-R1"
+    MODEL_REASONING = "Qwen/Qwen2.5-72B-Instruct"
     def __init__(self):
+        load_dotenv()
+        print("Agent initialized.")
+        self.__create_agents__()
+    def __create_agents__(self):
+        web_search_agent = CodeAgent(
+            tools=[WebSearchTool()],
+            model=InferenceClientModel(model_id=self.MODEL_CODER),
+            name="agent_websearch",
+            description="Agent to browse and search and extract web content"
+        )
+        #reverse_agent = CodeAgent(
+        #    tools=[ReverseStringTool()],
+        #    model=InferenceClientModel(model_id=self.MODEL_CODER),
+        #    name="agent_reverse",
+        #    description="Agent to reverse strings"
+        #)
+        # self.image_generation_tool = load_tool("m-ric/text-to-image", trust_remote_code=True)
+        image_generation_tool = Tool.from_space(
+            "black-forest-labs/FLUX.1-schnell",
+            name="image_generator",
+            description="Generate an image from a prompt"
+        )
+        image_captioning_tool = Tool.from_space(
+            "ovi054/image-to-prompt",
+            name="image_captioning",
+            description="Generate description of an image"
+        )
+        image_loading_tool = ImageLoadTool()
+        print(f"Image load tool: {image_loading_tool}")
+        #image_generation_agent = CodeAgent(
+        #    tools=[image_generation_tool],
+        #    model=InferenceClientModel(model_id=self.MODEL_CODER)
+        #)
+        # ImageLoadTool()
+        self.reasoning_agent = CodeAgent(
+            #tools=[image_generation_tool, image_captioning_tool, ReverseStringTool(), image_loading_tool],
+            tools=[image_loading_tool, FinalAnswerTool()],
+            model=InferenceClientModel(model_id=self.MODEL_REASONING),
+            planning_interval=3, # This is where you activate planning!,
+            prompt_templates=PromptConfig().PROMPT_TEMPLATES,
+            managed_agents=[web_search_agent],
+            additional_authorized_imports=["PIL","chess","my_tools","my_tools."],
+        )
+        print(f"Main agent initialized: {self.reasoning_agent}")
     def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        answer = self.reasoning_agent.run(question)
+        print(f"Agent returning answer: {answer}")
+        return answer
+# -----------------------------------------------------------------------------
+#
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
     space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        question_file_name = item.get("file_name")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        if testing_mode:
+            if task_id not in questions_to_run:
+                continue
         try:
+            if question_file_name is not None:
+                ext = question_file_name[-4:]
+                if ext == ".png":
+                    question_text = question_text + (f" . Use available tool to load an image associated with task id: "
+                                                     f"{task_id}")
             submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
     # 5. Submit
     print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
     try:
+        if not testing_mode:
+            response = requests.post(submit_url, json=submission_data, timeout=60)
+            response.raise_for_status()
+            result_data = response.json()
+            final_status = (
+                f"Submission Successful!\n"
+                f"User: {result_data.get('username')}\n"
+                f"Overall Score: {result_data.get('score', 'N/A')}% "
+                f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+                f"Message: {result_data.get('message', 'No message received.')}"
+            )
+            print("Submission successful.")
+        else:
+            final_status = "TESTING, Submission skipped"
         results_df = pd.DataFrame(results_log)
         return final_status, results_df
     except requests.exceptions.HTTPError as e:

chess_board_tool.py ADDED Viewed

	@@ -0,0 +1,252 @@

+from smolagents import Tool
+from PIL import Image
+import os
+import cv2
+import numpy as np
+import math
+import numpy
+from .chess_pieces_detection import ChessPiecesRecognition
+class ChessBoard(Tool):
+    name = "_my_chess_board"
+    description = """
+        Analyze an image representing a chess board and extract board state in FEN notation
+    """
+    inputs = {
+        "img": {
+            "type": "image",
+            "description": "image of chess board to extract board position",
+        }
+    }
+    output_type = "string"
+    # Steps to do
+    # - break board image into array of images representing pieces
+    #      Image -> Image[]
+    # - image recognition on set of images to get piece labels
+    #      Image[] -> str[]
+    # - construct FEN from string of chess pieces
+    #      str[] -> []
+    def gradientx(self, img):
+        # Compute gradient in x-direction using larger Sobel kernel
+        grad_x = cv2.Sobel(img, cv2.CV_32F, 1, 0, ksize=31)
+        return grad_x
+    def gradienty(self, img):
+        # Compute gradient in y-direction using larger Sobel kernel
+        grad_y = cv2.Sobel(img, cv2.CV_32F, 0, 1, ksize=31)
+        return grad_y
+    def checkMatch(self, lineset):
+        linediff = np.diff(lineset)
+        x = 0
+        cnt = 0
+        for line in linediff:
+            if abs(line - x) < 5:
+                cnt += 1
+            else:
+                cnt = 0
+                x = line
+        return cnt == 5
+    def pruneLines(self, lineset, image_dim, margin=20):
+        # Remove lines near the margins
+        lineset = [x for x in lineset if x > margin and x < image_dim - margin]
+        if not lineset:
+            return lineset
+        linediff = np.diff(lineset)
+        x = 0
+        cnt = 0
+        start_pos = 0
+        for i, line in enumerate(linediff):
+            if abs(line - x) < 5:
+                cnt += 1
+                if cnt == 5:
+                    end_pos = i + 2
+                    return lineset[start_pos:end_pos]
+            else:
+                cnt = 0
+                x = line
+                start_pos = i
+        return lineset
+    def skeletonize_1d(self, arr):
+        _arr = arr.copy()
+        for i in range(len(_arr) - 1):
+            if _arr[i] <= _arr[i + 1]:
+                _arr[i] = 0
+        for i in range(len(_arr) - 1, 0, -1):
+            if _arr[i - 1] > _arr[i]:
+                _arr[i] = 0
+        return _arr
+    def getChessLines(self, hdx, hdy, hdx_thresh, hdy_thresh, image_shape):
+        # Generate Gaussian window
+        window_size = 21
+        sigma = 8.0
+        gausswin = cv2.getGaussianKernel(window_size, sigma, cv2.CV_64F)
+        gausswin = gausswin.flatten()
+        half_size = window_size // 2
+        # Threshold signals
+        hdx_thresh_binary = np.where(hdx > hdx_thresh, 1.0, 0.0)
+        hdy_thresh_binary = np.where(hdy > hdy_thresh, 1.0, 0.0)
+        # Blur signals using convolution with Gaussian window
+        blur_x = np.convolve(hdx_thresh_binary, gausswin, mode='same')
+        blur_y = np.convolve(hdy_thresh_binary, gausswin, mode='same')
+        # Skeletonize signals
+        skel_x = self.skeletonize_1d(blur_x)
+        skel_y = self.skeletonize_1d(blur_y)
+        # Find line positions
+        lines_x = np.where(skel_x > 0)[0].tolist()
+        lines_y = np.where(skel_y > 0)[0].tolist()
+        # Prune lines
+        lines_x = self.pruneLines(lines_x, image_shape[1])
+        lines_y = self.pruneLines(lines_y, image_shape[0])
+        # Check if lines match expected pattern
+        is_match = (len(lines_x) == 7) and (len(lines_y) == 7) and \
+                   self.checkMatch(lines_x) and self.checkMatch(lines_y)
+        return lines_x, lines_y, is_match
+    def getChessTiles(self, img, lines_x, lines_y):
+        stepx = int(round(np.mean(np.diff(lines_x))))
+        stepy = int(round(np.mean(np.diff(lines_y))))
+        # Pad the image if necessary
+        padl_x = 0
+        padr_x = 0
+        padl_y = 0
+        padr_y = 0
+        if lines_x[0] - stepx < 0:
+            padl_x = abs(lines_x[0] - stepx)
+        if lines_x[-1] + stepx > img.shape[1] - 1:
+            padr_x = lines_x[-1] + stepx - img.shape[1] + 1
+        if lines_y[0] - stepy < 0:
+            padl_y = abs(lines_y[0] - stepy)
+        if lines_y[-1] + stepy > img.shape[0] - 1:
+            padr_y = lines_y[-1] + stepy - img.shape[0] + 1
+        img_padded = cv2.copyMakeBorder(img, padl_y, padr_y, padl_x, padr_x, cv2.BORDER_REPLICATE)
+        setsx = [lines_x[0] - stepx + padl_x] + [x + padl_x for x in lines_x] + [lines_x[-1] + stepx + padl_x]
+        setsy = [lines_y[0] - stepy + padl_y] + [y + padl_y for y in lines_y] + [lines_y[-1] + stepy + padl_y]
+        squares = []
+        for j in range(8):
+            for i in range(8):
+                x1 = setsx[i]
+                x2 = setsx[i + 1]
+                y1 = setsy[j]
+                y2 = setsy[j + 1]
+                # Adjust sizes to ensure squares are of equal size
+                if (x2 - x1) != stepx:
+                    x2 = x1 + stepx
+                if (y2 - y1) != stepy:
+                    y2 = y1 + stepy
+                square = img_padded[y1:y2, x1:x2]
+                squares.append(square)
+        return squares
+    # Image(PIL) --> Image(CV2)[]
+    def extract_pieces_from_image_board(self, image):
+        # Load the image
+        if image is None:
+            print(f"Image not provided")
+            return
+        # Convert to grayscale
+        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+        # Preprocessing
+        equ = cv2.equalizeHist(gray)
+        norm_image = equ.astype(np.float32) / 255.0
+        # Compute the gradients
+        grad_x = self.gradientx(norm_image)
+        grad_y = self.gradienty(norm_image)
+        # Clip the gradients
+        Dx_pos = np.clip(grad_x, 0, None)
+        Dx_neg = np.clip(-grad_x, 0, None)
+        Dy_pos = np.clip(grad_y, 0, None)
+        Dy_neg = np.clip(-grad_y, 0, None)
+        # Compute the Hough transform
+        hough_Dx = (np.sum(Dx_pos, axis=0) * np.sum(Dx_neg, axis=0)) / (norm_image.shape[0] ** 2)
+        hough_Dy = (np.sum(Dy_pos, axis=1) * np.sum(Dy_neg, axis=1)) / (norm_image.shape[1] ** 2)
+        # Adaptive thresholding
+        a = 1
+        is_match = False
+        lines_x = []
+        lines_y = []
+        while a < 5:
+            threshold_x = np.max(hough_Dx) * (a / 5.0)
+            threshold_y = np.max(hough_Dy) * (a / 5.0)
+            lines_x, lines_y, is_match = self.getChessLines(hough_Dx, hough_Dy, threshold_x, threshold_y,
+                                                          norm_image.shape)
+            if is_match:
+                break
+            else:
+                a += 1
+        squares_resized = []
+        if is_match:
+            squares = self.getChessTiles(gray, lines_x, lines_y)
+            for square in squares:
+                resized = cv2.resize(square, (32, 32), interpolation=cv2.INTER_AREA)
+                squares_resized.append(resized)
+            #print("7 horizontal and vertical lines found, slicing up squares")
+            #squares = self.getChessTiles(gray, lines_x, lines_y)
+            #print(f"Tiles generated: ({squares[0].shape[0]}x{squares[0].shape[1]}) * {len(squares)}")
+            # Extract filename and FEN (assuming filename is FEN)
+            #img_save_dir = os.path.join("/mnt/c/Users/krzsa/IdeaProjects/Agents-Course-Assignment/chess-pieces")
+            #letters = "ABCDEFGH"
+            #for i, square in enumerate(squares):
+            #    filename = f"fen_{letters[i % 8]}{(i // 8) + 1}.png"
+            #    save_path = os.path.join(img_save_dir, filename)
+            #    if i % 8 == 0:
+            #        print(f"#{i}: saving {save_path}...")
+            #    # Resize to 32x32 and save
+            #    resized = cv2.resize(square, (32, 32), interpolation=cv2.INTER_AREA)
+            #    cv2.imwrite(save_path, resized)
+        return squares_resized
+    def detect_chess_pieces(self, images):
+        recognition = ChessPiecesRecognition()
+        return recognition.classify_pieces(images)
+    def convert_pieces_list_to_fen(self, pieces):
+        print()
+    def forward(self, img: Image) -> str:
+        print(f"***KS*** Analyzing chess board image")
+        cv2_image = cv2.cvtColor(numpy.array(img), cv2.COLOR_RGB2BGR)
+        # Image(PIL) -> Image(CV2)(32x32) []
+        squares_resized = self.extract_pieces_from_image_board(cv2_image)
+        # Image(CV2)(32x32) [] -> str[]
+        pieces_list = self.detect_chess_pieces(squares_resized)
+        # str[] -> str(FEN)
+        fen = self.convert_pieces_list_to_fen(pieces_list)
+        return f"FEN is: \"{fen}\n  "

chess_pieces_detection/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from train_chess_pieces_recognition import ChessPiecesRecognition

chess_pieces_detection/train.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ . ../venv/bin/activate
2	+ python3 train_chess_pieces_recognition.py

chess_pieces_detection/train_chess_pieces_recognition.py ADDED Viewed

	@@ -0,0 +1,400 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+import os
+import numpy as np
+from PIL import Image
+# https://en.wikipedia.org/wiki/Convolution
+# https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
+# https://en.wikipedia.org/wiki/Forsyth%E2%80%93Edwards_Notation
+# piece types
+# - white Rook          R
+# - white Knights       N
+# - white Bishop        B
+# - white Queen         Q
+# - white King          K
+# - white Pawn          P
+# - black Rook          r
+# - black Knights       n
+# - black Bishop        b
+# - black Queen         q
+# - black King          k
+# - black Pawn          p
+# - empty
+TRAIN_DIR = "/mnt/c/Users/krzsa/IdeaProjects/Agents-Course-Assignment/chess_pieces_detection/train-data"
+TRAIN_DIR_BLACK = f"{TRAIN_DIR}/black"
+TRAIN_DIR_WHITE = f"{TRAIN_DIR}/white"
+TRAIN_DIR_EMPTY = f"{TRAIN_DIR}/empty"
+#
+# 0:  1
+# 1:  K
+# 2:  Q
+# 3:  R
+# 4:  B
+# 5:  N
+# 6:  P
+# 7:  k
+# 8:  q
+# 9:  r
+# 10: b
+# 11: n
+# 12: p
+TRAIN_DATA = [
+    (f"{TRAIN_DIR_EMPTY}/1_001.png", "1"),
+    (f"{TRAIN_DIR_EMPTY}/1_002.png", "1"),
+    (f"{TRAIN_DIR_BLACK}/b_001.png", "b"),
+    (f"{TRAIN_DIR_BLACK}/b_002.png", "b"),
+    (f"{TRAIN_DIR_BLACK}/k_001.png", "k"),
+    (f"{TRAIN_DIR_BLACK}/k_002.png", "k"),
+    (f"{TRAIN_DIR_BLACK}/n_001.png", "n"),
+    (f"{TRAIN_DIR_BLACK}/n_002.png", "n"),
+    (f"{TRAIN_DIR_BLACK}/p_001.png", "p"),
+    (f"{TRAIN_DIR_BLACK}/p_002.png", "p"),
+    (f"{TRAIN_DIR_BLACK}/q_001.png", "q"),
+    (f"{TRAIN_DIR_BLACK}/q_002.png", "q"),
+    (f"{TRAIN_DIR_BLACK}/r_001.png", "r"),
+    (f"{TRAIN_DIR_BLACK}/r_002.png", "r"),
+    (f"{TRAIN_DIR_WHITE}/B_001.png", "B"),
+    (f"{TRAIN_DIR_WHITE}/B_002.png", "B"),
+    (f"{TRAIN_DIR_WHITE}/K_001.png", "K"),
+    (f"{TRAIN_DIR_WHITE}/K_002.png", "K"),
+    (f"{TRAIN_DIR_WHITE}/N_001.png", "N"),
+    (f"{TRAIN_DIR_WHITE}/N_002.png", "N"),
+    (f"{TRAIN_DIR_WHITE}/P_001.png", "P"),
+    (f"{TRAIN_DIR_WHITE}/P_002.png", "P"),
+    (f"{TRAIN_DIR_WHITE}/Q_001.png", "Q"),
+    (f"{TRAIN_DIR_WHITE}/Q_002.png", "Q"),
+    (f"{TRAIN_DIR_WHITE}/R_001.png", "R"),
+    (f"{TRAIN_DIR_WHITE}/R_002.png", "R"),
+]
+TEST_DATA = TRAIN_DATA
+# https://docs.pytorch.org/docs/stable/nn.html
+# https://docs.pytorch.org/docs/stable/optim.html
+# https://docs.pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module
+class CNNModel(nn.Module):
+    def __init__(self, _name):
+        super(CNNModel, self).__init__()
+        self.name = _name
+        print("***KS*** Model: Creating layers")
+        # First Convolutional Layer: 32 features, 5x5 kernel
+        # https://docs.pytorch.org/docs/stable/generated/torch.nn.Conv2d.html#torch.nn.Conv2d
+        # https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
+        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, padding=2)
+        # Second Convolutional Layer: 64 features, 5x5 kernel
+        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)
+        # Fully connected layer
+        # https://docs.pytorch.org/docs/stable/generated/torch.nn.Linear.html#torch.nn.Linear
+        # 64 because last convolution had 64 channels
+        # 8 x 8 because 2 pool2d calculations will reduce 32 x 32 --> 16 x 16 --> 8 x 8
+        self.fc1 = nn.Linear(8 * 8 * 64, 1024)
+        self.dropout = nn.Dropout(p=0.5)  # Changed from 0.3 to 0.5
+        # Output layer
+        self.fc2 = nn.Linear(1024, 13)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Initialize weights and biases
+        self._initialize_weights()
+    def _initialize_weights(self):
+        # Load the pre-trained model
+        model_name = f"saved_models/{self.name}.pth"
+        print(f"***KS*** Checking pre-trained model: '{model_name}'")
+        if os.path.exists(model_name):
+            print(f"***KS*** Model '{model_name}' exists, loading weights ...")
+            self.load_state_dict(torch.load(model_name, map_location=self.device))
+            print("*** KS *** Model loaded.")
+        else:
+            print(f"*** KS *** Model file '{model_name}' not found. Initializing weights with random values")
+            # Initialize weights with truncated normal (approximate with normal and clamp)
+            nn.init.trunc_normal_(self.conv1.weight, std=0.1)
+            nn.init.constant_(self.conv1.bias, 0.1)
+            nn.init.trunc_normal_(self.conv2.weight, std=0.1)
+            nn.init.constant_(self.conv2.bias, 0.1)
+            nn.init.trunc_normal_(self.fc1.weight, std=0.1)
+            nn.init.constant_(self.fc1.bias, 0.1)
+            nn.init.trunc_normal_(self.fc2.weight, std=0.1)
+            nn.init.constant_(self.fc2.bias, 0.1)
+        self.to(self.device)
+    def save_weights(self):
+        print(f"***KS*** Saving model ...")
+        # Save the model checkpoint
+        os.makedirs('saved_models', exist_ok=True)
+        model_save_path = f"saved_models/{self.name}.pth"
+        torch.save(self.state_dict(), model_save_path)
+        print(f'*** KS *** Model saved in file: {model_save_path}')
+    # Define the computation performed at every call.
+    # Should be overridden by all subclasses.
+    def forward(self, x):
+        print("***KS*** Model: Executing forward calculations")
+        # Apply first convolutional layer + ReLU activation
+        print(f"***KS***  [0]   {x.shape}")
+        # [26, 1, 32, 32]
+        x = self.conv1(x)
+        print(f"***KS***  [1]   {x.shape}")
+        # [26, 32, 32, 32]   26 - number of images, first 32 number of convolutions
+        # --> 32 channels
+        # --> each channel is [x,x] size
+        # https://docs.pytorch.org/docs/stable/generated/torch.nn.ReLU.html#torch.nn.ReLU
+        x = F.relu(x)
+        print(f"***KS***  [2]   {x.shape}")
+        # [26, 32, 32, 32]
+        # https://docs.pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html#torch.nn.MaxPool2d
+        x = F.max_pool2d(x, kernel_size=2, stride=2)  # First pooling
+        print(f"***KS***  [3]   {x.shape}")
+        # [26, 32, 16, 16]
+        # Apply second convolutional layer + ReLU activation
+        x = F.relu(self.conv2(x))
+        print(f"***KS***  [4]   {x.shape}")
+        # [26, 64, 16, 16]
+        x = F.max_pool2d(x, kernel_size=2, stride=2)  # Second pooling
+        # --> 32 channels
+        # --> each channel is [x/2 , x/2]
+        print(f"***KS***  [5]   {x.shape}")
+        # [26, 64, 8, 8]
+        # Flatten the tensor
+        # https://docs.pytorch.org/docs/stable/tensor_view.html
+        # https://docs.pytorch.org/docs/stable/generated/torch.Tensor.view.html#torch.Tensor.view
+        x = x.view(-1, 8 * 8 * 64)
+        print(f"***KS***  [6]   {x.shape}")
+        # [26, 4096]
+        # --> first dimension inferred from existing dimensions and from the second dimension below
+        # --> second dimensions 8*8*64 = 4096
+        # Fully connected layer + ReLU activation
+        x = self.fc1(x)
+        print(f"***KS***  [7]   {x.shape}")
+        # [26, 1024]
+        # input  [?, 4096]
+        # output [?, 1024]
+        x = F.relu(x)
+        print(f"***KS***  [8]   {x.shape}")
+        # [26, 1024]
+        # Apply dropout
+        x = self.dropout(x)
+        print(f"***KS***  [9]   {x.shape}")
+        # [26, 1024]
+        # Output layer (no activation, as CrossEntropyLoss applies Softmax internally)
+        x = self.fc2(x)
+        print(f"***KS***  [10]   {x.shape}")
+        # [26, 13]
+        # input  [?, 1024]
+        # output [?,   13]
+        return x
+    def get_device(self):
+        return self.device
+# Dataset class for PyTorch
+# https://docs.pytorch.org/docs/stable/data.html#torch.utils.data.Dataset
+class ChessDataset(Dataset):
+    CHESS_PIECES = '1KQRBNPkqrbnp'
+    def __init__(self, image_train_date):
+        #self.image_filepaths = image_filepaths
+        self.num_images = len(image_train_date)
+        # Each tile is a 32x32 grayscale image
+        self.images = np.zeros([self.num_images, 32, 32], dtype=np.uint8)
+        self.labels = np.zeros([self.num_images], dtype=np.int64)  # Store labels as integers
+        for i, image_file_path_and_label in enumerate(image_train_date):
+            # Load Image
+            with Image.open(image_file_path_and_label[0]) as img:
+                img = img.convert('L')  # Ensure image is in grayscale
+                self.images[i, :, :] = np.array(img, dtype=np.uint8)
+            self.labels[i] = self.__get_piece_index_from_label__(image_file_path_and_label[1])
+        print("***KS*** Done loading training data")
+    def __get_piece_index_from_label__(self, label) -> int:
+        return self.CHESS_PIECES.find(label)
+    def get_piece_label(self, idx) -> str:
+        return self.CHESS_PIECES[idx]
+    def __len__(self):
+        return self.num_images
+    # required to be implemented
+    # returns an item for given key
+    def __getitem__(self, idx):
+        image = self.images[idx].astype('float32') / 255.0  # Normalize
+        image = np.expand_dims(image, axis=0)  # Add channel dimension
+        label = self.labels[idx]
+        return torch.tensor(image, dtype=torch.float32), label
+class ChessImagesDataset(Dataset):
+    def __init__(self, images):
+        self.num_images = len(images)
+        self.images = images
+    def __len__(self):
+        return self.num_images
+    def get_piece_label(self, idx) -> str:
+        return self.CHESS_PIECES[idx]
+    # required to be implemented
+    # returns an item for given key
+    def __getitem__(self, idx):
+        image = self.images[idx].astype('float32') / 255.0  # Normalize
+        image = np.expand_dims(image, axis=0)  # Add channel dimension
+        label = "" # not needed
+        return torch.tensor(image, dtype=torch.float32), label
+class ChessPiecesRecognition:
+    def __init__(self):
+        print(f"***KS*** Chess pieces recognition initialized")
+        self.model = CNNModel("test-1")
+        self.__load_train_data__()
+    def __load_train_data__(self):
+        print(f"*** KS *** loading training data")
+        # Load training dataset
+        # Data loader combines a dataset and a sampler, and provides an iterable over the given dataset.
+        print(f"Loading {len(TRAIN_DATA)} Training tiles", end='')
+        train_dataset = ChessDataset(TRAIN_DATA)
+        # Load testing dataset
+        print(f"\n*** KS *** Loading {len(TEST_DATA)} Testing tiles", end='')
+        test_dataset = ChessDataset(TEST_DATA)
+        print()
+        batch_size = 64 # @param {type:"number"}
+        # https://docs.pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader
+        self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
+        self.test_loader = DataLoader(test_dataset, batch_size=batch_size)
+    def train(self):
+        print(f"***KS*** Training chess pieces recognition")
+        # Define loss function and optimizer
+        # https://docs.pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html#torch.nn.CrossEntropyLoss
+        criterion = nn.CrossEntropyLoss()  # For multi-class classification
+        # https://docs.pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module.parameters
+        # https://docs.pytorch.org/docs/stable/optim.html
+        # https://docs.pytorch.org/docs/stable/generated/torch.optim.Adam.html#torch.optim.Adam
+        optimizer = optim.Adam(self.model.parameters(), lr=1e-4)
+        # Move model to GPU if available
+        # Set training parameters
+        do_training = True  # Set to True to train the model
+        epochs = 100 # @param {type:"number"}
+        if do_training:
+            # Training loop
+            self.model.train()
+            print(f"*** KS *** Starting training for {epochs} epochs...")
+            for epoch in range(epochs):
+                running_loss = 0.0
+                print(f"***KS*** Epoch: {epoch}")
+                for i, (inputs, labels) in enumerate(self.train_loader):
+                    # Move inputs and labels to device
+                    inputs = inputs.to(self.model.get_device())
+                    labels = labels.to(self.model.get_device())
+                    # Zero the parameter gradients
+                    optimizer.zero_grad()
+                    # Forward pass
+                    outputs = self.model(inputs)
+                    loss = criterion(outputs, labels)
+                    # Backward pass and optimize
+                    loss.backward()
+                    optimizer.step()
+                    # Print statistics
+                    running_loss += loss.item()
+                    if (i + 1) % 10 == 0:  # Print every 10 batches
+                        print(f'*** KS *** Epoch [{epoch +1}/{epochs}], Step [{i +1}/{len(self.train_loader)}], '
+                              f'Loss: {running_loss / 10:.4f}')
+                        running_loss = 0.0
+            print('Finished Training')
+        self.model.save_weights()
+    def eval(self):
+        # Evaluate the model on the testing dataset
+        self.model.eval()  # Set model to evaluation mode
+        correct = 0
+        total = 0
+        with torch.no_grad():
+            for inputs, labels in self.test_loader:
+                # Move inputs and labels to device
+                inputs = inputs.to(self.model.get_device())
+                labels = labels.to(self.model.get_device())
+                outputs = self.model(inputs)
+                print(f"***KS*** Got model outputs: \nshape: {outputs.shape}\n{outputs}")
+                labels_detected = np.argmax(outputs.cpu(), axis=1)
+                print(f"***KS*** Got labels idx detected: \nshape: {labels_detected.shape}\n{labels_detected}")
+                _, predicted = torch.max(outputs.data, 1)
+                total += labels.size(0)
+                correct += (predicted == labels).sum().item()
+        test_accuracy = correct / total
+        print(f'Accuracy on test set: {test_accuracy * 100:.2f}%\n')
+    def classify_pieces(self, images):
+        dataset = ChessImagesDataset(images)
+        loader = DataLoader(dataset, batch_size=64)
+        # Evaluate the model on the testing dataset
+        labels_str = ""
+        self.model.eval()  # Set model to evaluation mode
+        with torch.no_grad():
+            for inputs, labels in loader:
+                # Move inputs and labels to device
+                inputs = inputs.to(self.model.get_device())
+                labels = labels.to(self.model.get_device())
+                outputs = self.model(inputs)
+                print(f"***KS*** Got model outputs: \nshape: {outputs.shape}\n{outputs}")
+                labels_detected = np.argmax(outputs.cpu(), axis=1)
+                print(f"***KS*** Got labels idx detected: \nshape: {labels_detected.shape}\n{labels_detected}")
+                labels = [dataset.get_piece_label(ix) for ix in labels_detected]
+                labels_str = ''.join(labels)
+        return labels_str
+#t = ChessPiecesRecognition()
+#t.train()
+#t.eval()
+if __name__ == "__main__":
+    print("This is a module and should not be executed directly")

install-requirements.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ . ./venv/bin/activate
2	+ pip install -r ./requirements.txt

my_prompt_config.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from smolagents import PromptTemplates, PlanningPromptTemplate, FinalAnswerPromptTemplate, ManagedAgentPromptTemplate
+class PromptConfig:
+    PROMPT_TEMPLATES = PromptTemplates(
+        system_prompt="""
+            You are a general AI assistant. I will ask you a question. Report your thoughts, and finish
+            your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
+            Describe your initial plan as a set of bullet points.
+            Each bullet point should describe in one sentence an action which is to be taken in this step.
+            Use the tools provided. If you are going to use a tool, describe in detail why you are going
+            to use that particular tool and explain parameters used to invoke the tool.
+            Analyze the question provided.
+            Describe each step which needs to be taken to answer it.
+        """,
+        planning=PlanningPromptTemplate(
+            initial_plan="""
+            """,
+            update_plan_pre_messages="""
+            """,
+            update_plan_post_messages="""
+            """,
+        ),
+        managed_agent=ManagedAgentPromptTemplate(task="", report=""),
+        final_answer=FinalAnswerPromptTemplate(
+            pre_messages="",
+            post_messages="""
+                YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of
+                numbers and/or strings.
+                If you are asked for a number, don’t use comma to write your number neither use units such as $ or percent
+                sign unless specified otherwise.
+                If you are asked for a string, don’t use articles, neither abbreviations (e.g. for cities), and write the digits in
+                plain text unless specified otherwise.
+                If you are asked for a comma separated list, apply the above rules depending of whether the element to be put
+                in the list is a number or a string
+            """
+        ),
+    )
+    def __init__(self):
+        print("Prompt Templates initialized")
+#EMPTY_PROMPT_TEMPLATES = PromptTemplates(
+#    system_prompt="",
+#    planning=PlanningPromptTemplate(
+#        initial_plan="",
+#        update_plan_pre_messages="",
+#        update_plan_post_messages="",
+#    ),
+#    managed_agent=ManagedAgentPromptTemplate(task="", report=""),
+#    final_answer=FinalAnswerPromptTemplate(pre_messages="", post_messages=""),
+#)

my_tools.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from smolagents import Tool
+from PIL import Image
+import requests
+from io import BytesIO
+#AUTHORIZED_TYPES = [
+#    "string",
+#    "boolean",
+#    "integer",
+#    "number",
+#    "image",
+#    "audio",
+#    "array",
+#    "object",
+#    "any",
+#    "null",
+#]
+class ReverseStringTool(Tool):
+    name = "_my_reverse_string"
+    description = """
+        Decode a string which is provided in a reversed form.
+    """
+    inputs = {
+        "_inp": {
+            "type": "string",
+            "description": "encoded input string",
+        }
+    }
+    output_type = "string"
+    def forward(self, _inp: str) -> str:
+        _out = ""
+        for a in _inp:
+            _out = a + _out
+        return _out
+class ImageLoadTool(Tool):
+    name = "_my_image_load"
+    description = """
+        Load image for the provided task id
+    """
+    inputs = {
+        "task_id": {
+            "type": "string",
+            "description": "task id to load image",
+        }
+    }
+    output_type = "image"
+    api_url = "https://agents-course-unit4-scoring.hf.space"
+    def forward(self, task_id: str) -> Image:
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
+        }
+        url = f"{self.api_url}/files/{task_id}"
+        response = requests.get(url, headers=headers)
+        image = Image.open(BytesIO(response.content)).convert("RGB")
+        print(f"***KS*** Loaded image for \n\ttask id: {task_id} \n\timage: {image}")
+        return image

requirements.txt CHANGED Viewed

@@ -1,2 +1,10 @@
 gradio
-requests

 gradio
+requests
+smolagents
+gradio[oauth]
+pytest
+matplotlib
+PyQt6
+chess
+opencv-python
+torch

run.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ . ./venv/bin/activate
2	+ python3 app.py

simple.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import os
+import gradio as gr
+import requests
+import inspect
+import pandas as pd
+from smolagents import CodeAgent, tool, InferenceClientModel, WebSearchTool, load_tool, PromptTemplates, Tool, FinalAnswerTool
+from smolagents import PromptTemplates, PlanningPromptTemplate, FinalAnswerPromptTemplate, ManagedAgentPromptTemplate
+from dotenv import load_dotenv
+from my_tools import ReverseStringTool, ImageLoadTool
+from chess_board_tool import ChessBoard
+from PIL import Image
+task_id = "cca530fc-4052-43b2-b130-b30968d8aa44"
+# https://github.com/kratos606/chessboard-recogniser/tree/main
+MODEL_REASONING = "Qwen/Qwen2.5-Coder-32B-Instruct"
+#MODEL_REASONING = "Qwen/Qwen2.5-72B-Instruct"  not good
+#"meta-llama/Meta-Llama-3-70B-Instruct"
+# jayasuryajsk/chess-reasoner-qwen
+# https://huggingface.co/jayasuryajsk/chess-reasoner-qwen
+PROMPT_TEMPLATES = PromptTemplates(
+    system_prompt="""
+            You are a general AI assistant.
+            Answer the following questions as best you can.
+            Describe your initial plan as a set of bullet points.
+            Each bullet point should describe in one sentence an action which is to be taken in this step.
+            Use the tools provided. If you are going to use a tool, describe in detail how you are going
+            to use that particular tool and explain parameters used to invoke the tool.
+            Tools provided: final_answer , _my_reverse_string , _my_image_load, _my_chess_board
+            YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of
+            numbers and/or strings.
+            If you are asked for a number, don’t use comma to write your number neither use units such as $ or percent
+            sign unless specified otherwise.
+            If you are asked for a string, don’t use articles, neither abbreviations (e.g. for cities), and write the digits in
+            plain text unless specified otherwise.
+            If you are asked for a comma separated list, apply the above rules depending of whether the element to be put
+            in the list is a number or a string.
+            Report your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
+        """,
+    planning=PlanningPromptTemplate(
+        initial_plan="""
+            """,
+        update_plan_pre_messages="""
+            """,
+        update_plan_post_messages="""
+            """,
+    ),
+    managed_agent=ManagedAgentPromptTemplate(task="", report=""),
+    final_answer=FinalAnswerPromptTemplate(
+        pre_messages="",
+        post_messages="""
+            """
+    ),
+)
+#question = f"Load an image for task id {task_id} and describe the chess position shown on the image. "
+#question = f"Load an image for task id {task_id} and display it using matplotlib "
+question = f"Load an image for task id {task_id} and analyze the chess board "
+reasoning_agent = CodeAgent(
+    name="CourseAssistant",
+    description="General AI Assistant",
+    tools=[ImageLoadTool(), FinalAnswerTool(), ReverseStringTool(), ChessBoard()],
+    model=InferenceClientModel(model_id=MODEL_REASONING),
+    planning_interval=3, # This is where you activate planning!,
+    prompt_templates=PROMPT_TEMPLATES,
+    #managed_agents=[web_search_agent],
+    additional_authorized_imports=["PIL","chess","my_tools","matplotlib","matplotlib.pyplot","chess_board_tool"],
+)
+reasoning_agent.run(question)

simple.sh ADDED Viewed

	@@ -0,0 +1,3 @@

+. ./venv/bin/activate
+clear
+python3 simple.py

test_tools.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from my_tools import ReverseStringTool, ImageLoadTool
+from chess_board_tool import ChessBoard
+import pytest
+import matplotlib.pyplot as plt
+import matplotlib as mp
+#pytest --capture=no
+@pytest.mark.parametrize("_inp,_exp",[("abc", "cba"),("ihg fed cba", "abc def ghi")])
+def test_tool_reverse_string(_inp,_exp):
+    assert ReverseStringTool().forward(_inp) == _exp
+@pytest.mark.parametrize("_task_id,_exp",[("cca530fc-4052-43b2-b130-b30968d8aa44", "")])
+def test_tool_image_load(_task_id,_exp):
+    #assert ReverseStringTool().forward(_inp) == _exp
+    print(f"Loading image for task id: {_task_id}")
+    t = ImageLoadTool()
+    result = t.forward(_task_id)
+    print(f"Got result: {result}")
+    mp.use('QtAgg')
+    #plt.imshow(result)
+    #plt.show()
+@pytest.mark.parametrize("_task_id,_exp",[("cca530fc-4052-43b2-b130-b30968d8aa44", "")])
+def test_tool_chess_board(_task_id,_exp):
+    #assert ReverseStringTool().forward(_inp) == _exp
+    print(f"Loading image for task id: {_task_id}")
+    t = ImageLoadTool()
+    image = t.forward(_task_id)
+    print(f"Got result: {image}")
+    board_tool = ChessBoard()
+    fen = board_tool.forward(image)