Spaces:

ed-donner
/

connect

Running on CPU Upgrade

App Files Files Community

ed-donner commited on Feb 12, 2025

Commit

683d749

1 Parent(s): c908a5c

Added leaderboard with DB connectivity

Browse files

Files changed (12) hide show

README.md +50 -0
app.py +6 -0
arena/board.py +63 -9
arena/board_view.py +5 -1
arena/c4.py +132 -22
arena/game.py +56 -4
arena/llm.py +54 -64
arena/player.py +31 -7
arena/record.py +178 -0
connect.png +0 -0
prototype.ipynb +0 -0
requirements.txt +2 -1

README.md CHANGED Viewed

@@ -15,3 +15,53 @@ short_description: Arena for playing Four-in-a-row between LLMs
 # Four-in-a-row Arena
 ### A battleground for pitting LLMs against each other in the classic board game

 # Four-in-a-row Arena
 ### A battleground for pitting LLMs against each other in the classic board game
+![Connect](connect.png)
+It has been great fun making this Arena and watching LLMs duke it out!
+Quick links:
+- The [GitHub repo](https://github.com/ed-donner/connect) for the code
+- The [HuggingFace Spaces](https://huggingface.co/spaces/ed-donner/connect) where it's running
+- My [LinkedIn](https://www.linkedin.com/in/eddonner/) - I love connecting!
+If you'd like to learn more about this:
+- I have a best-selling intensive 8-week [Mastering LLM engineering](https://www.udemy.com/course/llm-engineering-master-ai-and-large-language-models/?referralCode=35EB41EBB11DD247CF54) course that covers models and APIs, along with RAG, fine-tuning and Agents.
+- I'm running a number of [Live Events](https://www.oreilly.com/search/?q=author%3A%20%22Ed%20Donner%22) with O'Reilly and Pearson
+## Installing the code
+1. Clone the repo with `git clone https://github.com/ed-donner/connect.git`
+2. Change to the project directory with `cd connect`
+3. Create a python virtualenv with `python -m venv venv`
+4. Activate your environment with either `venv\Scripts\activate` on Windows, or `source venv/bin/activate` on Mac/Linux
+5. Then run `pip install -r requirements.txt` to install the packages
+## Setting up your API keys
+Please create a file with the exact name `.env` in the project root directory (connect).
+You would typically use Notepad (Windows) or nano (Mac) for this.
+If you're not familiar with setting up a .env file this way, ask ChatGPT! It will give much more eloquent instructions than me. 😂
+Your .env file should contain the following; add whichever keys you would like to use.
+```
+OPENAI_API_KEY=sk-proj-...
+ANTHROPIC_API_KEY=sk-ant-...
+DEEPSEEK_API_KEY=sk...
+GROQ_API_KEY=...
+```
+## Optional - using Ollama
+You can run Ollama locally, and the Arena will connect to run local models.
+1. Download and install Ollama from https://ollama.com noting that on a PC you might need to have administrator permissions for the install to work properly
+2. On a PC, start a Command prompt / Powershell (Press Win + R, type `cmd`, and press Enter). On a Mac, start a Terminal (Applications > Utilities > Terminal).
+3. Run `ollama run llama3.2` or for smaller machines try `ollama run llama3.2:1b`
+4. If this doesn't work, you may need to run `ollama serve` in another Powershell (Windows) or Terminal (Mac), and try step 3 again

app.py CHANGED Viewed

@@ -1,3 +1,9 @@
 from arena.c4 import make_display
 from dotenv import load_dotenv

+"""
+The main entry-point for the Spaces application
+Create a Gradio app and launch it
+"""
 from arena.c4 import make_display
 from dotenv import load_dotenv

arena/board.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from arena.board_view import to_svg
 RED = 1
 YELLOW = -1
@@ -10,8 +11,15 @@ cols = "ABCDEFG"
 class Board:
     def __init__(self):
         self.cells = [[0 for _ in range(7)] for _ in range(6)]
         self.player = RED
         self.winner = EMPTY
@@ -20,6 +28,9 @@ class Board:
         self.latest_x, self.latest_y = -1, -1
     def __repr__(self):
         result = ""
         for y in range(6):
             for x in range(7):
@@ -29,6 +40,9 @@ class Board:
         return result
     def message(self):
         if self.winner and self.forfeit:
             return f"{show[self.winner]} wins after an illegal move by {show[-1*self.winner]}\n"
         elif self.winner:
@@ -39,16 +53,24 @@ class Board:
             return f"{show[self.player]} to play\n"
     def html(self):
         result = '<div style="text-align: center;font-size:24px">'
         result += self.__repr__().replace("\n", "<br/>")
         result += "</div>"
         return result
     def svg(self):
-        """Convert the board state to an SVG representation"""
         return to_svg(self)
     def json(self):
         result = "{\n"
         result += '    "Column names": ["A", "B", "C", "D", "E", "F", "G"],\n'
         for y in range(6):
@@ -60,6 +82,9 @@ class Board:
         return result
     def alternative(self):
         result = "ABCDEFG\n"
         for y in range(6):
             for x in range(7):
@@ -67,19 +92,32 @@ class Board:
             result += "\n"
         return result
-    def height(self, x):
         height = 0
         while height < 6 and self.cells[height][x] != EMPTY:
             height += 1
         return height
-    def legal_moves(self):
         return [cols[x] for x in range(7) if self.height(x) < 6]
-    def illegal_moves(self):
         return [cols[x] for x in range(7) if self.height(x) == 6]
-    def winning_line(self, x, y, dx, dy):
         color = self.cells[y][x]
         for pointer in range(1, 4):
             xp = x + dx * pointer
@@ -88,20 +126,33 @@ class Board:
                 return EMPTY
         return color
-    def winning_cell(self, x, y):
         for dx, dy in ((0, 1), (1, 1), (1, 0), (1, -1)):
             if winner := self.winning_line(x, y, dx, dy):
                 return winner
         return EMPTY
-    def wins(self):
         for y in range(6):
             for x in range(7):
                 if winner := self.winning_cell(x, y):
                     return winner
         return EMPTY
-    def move(self, x):
         y = self.height(x)
         self.cells[y][x] = self.player
         self.latest_x, self.latest_y = x, y
@@ -113,5 +164,8 @@ class Board:
             self.player = -1 * self.player
         return self
-    def is_active(self):
         return not self.winner and not self.draw

 from arena.board_view import to_svg
+from typing import List
 RED = 1
 YELLOW = -1
 class Board:
+    """
+    A class to represent a Four-in-the-row Board
+    """
     def __init__(self):
+        """
+        Initialize this instance, starting with empty cells, RED to play
+        The latest x,y is used to track the most recent move, so it animates on the display
+        """
         self.cells = [[0 for _ in range(7)] for _ in range(6)]
         self.player = RED
         self.winner = EMPTY
         self.latest_x, self.latest_y = -1, -1
     def __repr__(self):
+        """
+        A visual representation
+        """
         result = ""
         for y in range(6):
             for x in range(7):
         return result
     def message(self):
+        """
+        A summary of the status
+        """
         if self.winner and self.forfeit:
             return f"{show[self.winner]} wins after an illegal move by {show[-1*self.winner]}\n"
         elif self.winner:
             return f"{show[self.player]} to play\n"
     def html(self):
+        """
+        Return an HTML representation
+        """
         result = '<div style="text-align: center;font-size:24px">'
         result += self.__repr__().replace("\n", "<br/>")
         result += "</div>"
         return result
     def svg(self):
+        """
+        Return an SVG representation
+        """
         return to_svg(self)
     def json(self):
+        """
+        Return a json representation
+        """
         result = "{\n"
         result += '    "Column names": ["A", "B", "C", "D", "E", "F", "G"],\n'
         for y in range(6):
         return result
     def alternative(self):
+        """
+        An alternative representation, used in prompting so that the LLM sees this 2 ways
+        """
         result = "ABCDEFG\n"
         for y in range(6):
             for x in range(7):
             result += "\n"
         return result
+    def height(self, x: int) -> int:
+        """
+        Return the height of the given column
+        """
         height = 0
         while height < 6 and self.cells[height][x] != EMPTY:
             height += 1
         return height
+    def legal_moves(self) -> List[str]:
+        """
+        Return the names of columns that are not full
+        """
         return [cols[x] for x in range(7) if self.height(x) < 6]
+    def illegal_moves(self) -> List[str]:
+        """
+        Return the names of columns that are full
+        """
         return [cols[x] for x in range(7) if self.height(x) == 6]
+    def winning_line(self, x: int, y: int, dx: int, dy: int) -> int:
+        """
+        Return RED or YELLOW if this cell is the start of a 4 in the row going in the direction dx, dy
+        Or EMPTY if not
+        """
         color = self.cells[y][x]
         for pointer in range(1, 4):
             xp = x + dx * pointer
                 return EMPTY
         return color
+    def winning_cell(self, x: int, y: int) -> int:
+        """
+        Return RED or YELLOW if this cell is the start of a 4 in the row
+        Or EMPTY if not
+        For performance reasons, only look in 4 of the possible 8 directions,
+        (because this test will run on both sides of the 4-in-a-row)
+        """
         for dx, dy in ((0, 1), (1, 1), (1, 0), (1, -1)):
             if winner := self.winning_line(x, y, dx, dy):
                 return winner
         return EMPTY
+    def wins(self) -> int:
+        """
+        Return RED or YELLOW if there is a 4-in-a-row of that color on the board
+        Or EMPTY if not
+        """
         for y in range(6):
             for x in range(7):
                 if winner := self.winning_cell(x, y):
                     return winner
         return EMPTY
+    def move(self, x: int):
+        """
+        Make a move in the given column
+        """
         y = self.height(x)
         self.cells[y][x] = self.player
         self.latest_x, self.latest_y = x, y
             self.player = -1 * self.player
         return self
+    def is_active(self) -> bool:
+        """
+        Return true if the game has not yet ended
+        """
         return not self.winner and not self.draw

arena/board_view.py CHANGED Viewed

@@ -3,7 +3,11 @@ YELLOW = -1
 EMPTY = 0
 def to_svg(board):
-    """Convert the board state to an SVG representation"""
     svg = '''
     <div style="display: flex; justify-content: center;">
     <svg width="450" height="420" viewBox="0 0 450 420">

 EMPTY = 0
 def to_svg(board):
+    """
+    Create an SVG representation of the board, with the latest piece dropping down via SVG
+    I must confess that this function was written almost entirely by Claude; done in 15 mins,
+    when it would have taken me a couple of hours. Amazing!
+    """
     svg = '''
     <div style="display: flex; justify-content: center;">
     <svg width="450" height="420" viewBox="0 0 450 420">

arena/c4.py CHANGED Viewed

@@ -4,7 +4,13 @@ from arena.llm import LLM
 import gradio as gr
-css = "footer{display:none !important}"
 js = """
 function refresh() {
@@ -18,20 +24,70 @@ function refresh() {
 """
-def message_html(game):
     return (
         f'<div style="text-align: center;font-size:18px">{game.board.message()}</div>'
     )
 def load_callback(red_llm, yellow_llm):
     game = Game(red_llm, yellow_llm)
     enabled = gr.Button(interactive=True)
     message = message_html(game)
-    return game, game.board.svg(), message, "", "", enabled, enabled, enabled
 def move_callback(game):
     game.move()
     message = message_html(game)
     if_active = gr.Button(interactive=game.board.is_active())
@@ -47,8 +103,13 @@ def move_callback(game):
 def run_callback(game):
     enabled = gr.Button(interactive=True)
     disabled = gr.Button(interactive=False)
     message = message_html(game)
     yield game, game.board.svg(), message, game.thoughts(RED), game.thoughts(
         YELLOW
@@ -59,26 +120,39 @@ def run_callback(game):
         yield game, game.board.svg(), message, game.thoughts(RED), game.thoughts(
             YELLOW
         ), disabled, disabled, disabled
     yield game, game.board.svg(), message, game.thoughts(RED), game.thoughts(
         YELLOW
     ), disabled, disabled, enabled
 def model_callback(player_name, game, new_model_name):
     player = game.players[player_name]
     player.switch_model(new_model_name)
     return game
 def red_model_callback(game, new_model_name):
     return model_callback(RED, game, new_model_name)
 def yellow_model_callback(game, new_model_name):
     return model_callback(YELLOW, game, new_model_name)
 def player_section(name, default):
     all_model_names = LLM.all_model_names()
     with gr.Row():
         gr.HTML(f'<div style="text-align: center;font-size:18px">{name} Player</div>')
@@ -94,6 +168,9 @@ def player_section(name, default):
 def make_display():
     with gr.Blocks(
         title="C4 Battle",
         css=css,
@@ -103,31 +180,60 @@ def make_display():
         game = gr.State()
-        with gr.Row():
-            gr.HTML(
-                '<div style="text-align: center;font-size:24px">Four-in-a-row LLM Showdown</div>'
-            )
-        with gr.Row():
-            with gr.Column(scale=1):
-                red_thoughts, red_dropdown = player_section("Red", "gpt-4o-mini")
-            with gr.Column(scale=2):
                 with gr.Row():
-                    message = gr.HTML(
-                        '<div style="text-align: center;font-size:18px">The Board</div>'
                     )
-                with gr.Row():
-                    board_display = gr.HTML()
                 with gr.Row():
                     with gr.Column(scale=1):
-                        move_button = gr.Button("Next move")
                     with gr.Column(scale=1):
-                        run_button = gr.Button("Run game", variant="primary")
                     with gr.Column(scale=1):
-                        reset_button = gr.Button("Start Over", variant="stop")
-            with gr.Column(scale=1):
-                yellow_thoughts, yellow_dropdown = player_section(
-                    "Yellow", "claude-3-5-sonnet-latest"
-                )
         blocks.load(
             load_callback,
@@ -191,4 +297,8 @@ def make_display():
             ],
         )
     return blocks

 import gradio as gr
+css = """
+.dataframe-fix .table-wrap {
+    min-height: 800px;
+    max-height: 800px;
+}
+footer{display:none !important}
+"""
 js = """
 function refresh() {
 """
+def message_html(game) -> str:
+    """
+    Return the message for the top of the UI
+    """
     return (
         f'<div style="text-align: center;font-size:18px">{game.board.message()}</div>'
     )
+def format_records_for_table(games):
+    """
+    Turn the results objects into a list of lists for the Gradio Dataframe
+    """
+    return [
+        [
+            game.when,
+            game.red_player,
+            game.yellow_player,
+            "Red" if game.red_won else "Yellow" if game.yellow_won else "Draw",
+        ]
+        for game in reversed(games)
+    ]
+def format_ratings_for_table(ratings):
+    """
+    Turn the ratings into a List of Lists for the Gradio Dataframe
+    """
+    items = sorted(ratings.items(), key=lambda x: x[1], reverse=True)
+    return [[item[0], int(round(item[1]))] for item in items]
 def load_callback(red_llm, yellow_llm):
+    """
+    Callback called when the game is started. Create a new Game object for the state.
+    """
     game = Game(red_llm, yellow_llm)
     enabled = gr.Button(interactive=True)
     message = message_html(game)
+    return (
+        game,
+        game.board.svg(),
+        message,
+        "",
+        "",
+        enabled,
+        enabled,
+        enabled,
+    )
+def leaderboard_callback(game):
+    """
+    Callback called when the user switches to the Leaderboard tab. Load in the results.
+    """
+    records_df = format_records_for_table(Game.get_games())
+    ratings_df = format_ratings_for_table(Game.get_ratings())
+    return records_df, ratings_df
 def move_callback(game):
+    """
+    Callback called when the user clicks to do a single move.
+    """
     game.move()
     message = message_html(game)
     if_active = gr.Button(interactive=game.board.is_active())
 def run_callback(game):
+    """
+    Callback called when the user runs an entire game. Reset the board, run the game, store results.
+    Yield interim results so the UI updates.
+    """
     enabled = gr.Button(interactive=True)
     disabled = gr.Button(interactive=False)
+    game.reset()
     message = message_html(game)
     yield game, game.board.svg(), message, game.thoughts(RED), game.thoughts(
         YELLOW
         yield game, game.board.svg(), message, game.thoughts(RED), game.thoughts(
             YELLOW
         ), disabled, disabled, disabled
+    game.record()
     yield game, game.board.svg(), message, game.thoughts(RED), game.thoughts(
         YELLOW
     ), disabled, disabled, enabled
 def model_callback(player_name, game, new_model_name):
+    """
+    Callback when the user changes the model
+    """
     player = game.players[player_name]
     player.switch_model(new_model_name)
     return game
 def red_model_callback(game, new_model_name):
+    """
+    Callback when red model is changed
+    """
     return model_callback(RED, game, new_model_name)
 def yellow_model_callback(game, new_model_name):
+    """
+    Callback when yellow model is changed
+    """
     return model_callback(YELLOW, game, new_model_name)
 def player_section(name, default):
+    """
+    Create the left and right sections of the UI
+    """
     all_model_names = LLM.all_model_names()
     with gr.Row():
         gr.HTML(f'<div style="text-align: center;font-size:18px">{name} Player</div>')
 def make_display():
+    """
+    The Gradio UI to show the Game, with event handlers
+    """
     with gr.Blocks(
         title="C4 Battle",
         css=css,
         game = gr.State()
+        with gr.Tabs():
+            with gr.TabItem("Game"):
                 with gr.Row():
+                    gr.HTML(
+                        '<div style="text-align: center;font-size:24px">Four-in-a-row LLM Showdown</div>'
                     )
                 with gr.Row():
                     with gr.Column(scale=1):
+                        red_thoughts, red_dropdown = player_section(
+                            "Red", "gpt-4o-mini"
+                        )
+                    with gr.Column(scale=2):
+                        with gr.Row():
+                            message = gr.HTML(
+                                '<div style="text-align: center;font-size:18px">The Board</div>'
+                            )
+                        with gr.Row():
+                            board_display = gr.HTML()
+                        with gr.Row():
+                            with gr.Column(scale=1):
+                                move_button = gr.Button("Next move")
+                            with gr.Column(scale=1):
+                                run_button = gr.Button("Run game", variant="primary")
+                            with gr.Column(scale=1):
+                                reset_button = gr.Button("Start Over", variant="stop")
                     with gr.Column(scale=1):
+                        yellow_thoughts, yellow_dropdown = player_section(
+                            "Yellow", "claude-3-5-sonnet-latest"
+                        )
+            with gr.TabItem("Leaderboard") as leaderboard_tab:
+                with gr.Row():
                     with gr.Column(scale=1):
+                        ratings_df = gr.Dataframe(
+                            headers=["Player", "ELO"],
+                            label="Ratings",
+                            column_widths=[2, 1],
+                            wrap=True,
+                            col_count=2,
+                            row_count=10,
+                            max_height=800,
+                            elem_classes=["dataframe-fix"],
+                        )
+                    with gr.Column(scale=2):
+                        results_df = gr.Dataframe(
+                            headers=["When", "Red Player", "Yellow Player", "Winner"],
+                            label="Game History",
+                            column_widths=[2, 2, 2, 1],
+                            wrap=True,
+                            col_count=4,
+                            row_count=10,
+                            max_height=800,
+                            elem_classes=["dataframe-fix"],
+                        )
         blocks.load(
             load_callback,
             ],
         )
+        leaderboard_tab.select(
+            leaderboard_callback, inputs=[game], outputs=[results_df, ratings_df]
+        )
     return blocks

arena/game.py CHANGED Viewed

@@ -1,26 +1,78 @@
-from arena.board import Board, RED, YELLOW, EMPTY, pieces
 from arena.player import Player
 class Game:
-    def __init__(self, model_red, model_yellow):
         self.board = Board()
         self.players = {
             RED: Player(model_red, RED),
             YELLOW: Player(model_yellow, YELLOW),
         }
     def move(self):
         self.players[self.board.player].move(self.board)
-    def is_active(self):
         return self.board.is_active()
-    def thoughts(self, player):
         return self.players[player].thoughts()
     def run(self):
         while self.is_active():
             self.move()
             print(self.board)

+from arena.board import Board, RED, YELLOW
 from arena.player import Player
+from arena.record import get_games, Result, record_game, ratings
+from datetime import datetime
+from typing import List
 class Game:
+    """
+    A Game consists of a Board and 2 players
+    """
+    def __init__(self, model_red: str, model_yellow: str):
+        """
+        Initialize this Game; a new board, and new Player objects
+        """
         self.board = Board()
         self.players = {
             RED: Player(model_red, RED),
             YELLOW: Player(model_yellow, YELLOW),
         }
+    def reset(self):
+        """
+        Restart the game by resetting the board; keep players the same
+        """
+        self.board = Board()
     def move(self):
+        """
+        Make the next move. Delegate to the current player to make a move on this board.
+        """
         self.players[self.board.player].move(self.board)
+    def is_active(self) -> bool:
+        """
+        Return true if the game hasn't yet ended
+        """
         return self.board.is_active()
+    def thoughts(self, player) -> str:
+        """
+        Return the inner thoughts of the given player
+        """
         return self.players[player].thoughts()
+    @staticmethod
+    def get_games() -> List:
+        """
+        Return all the games stored in the db
+        """
+        return get_games()
+    @staticmethod
+    def get_ratings():
+        """
+        Return the ELO ratings of all players
+        """
+        return ratings()
+    def record(self):
+        """
+        Store the results of this game in the DB
+        """
+        red_player = self.players[RED].llm.model_name
+        yellow_player = self.players[YELLOW].llm.model_name
+        red_won = self.board.winner == RED
+        yellow_won = self.board.winner == YELLOW
+        result = Result(red_player, yellow_player, red_won, yellow_won, datetime.now())
+        record_game(result)
     def run(self):
+        """
+        If being used outside gradio; move and print in a loop
+        """
         while self.is_active():
             self.move()
             print(self.board)

arena/llm.py CHANGED Viewed

@@ -48,7 +48,11 @@ class LLM(ABC):
         return result
     def protected_send(self, system: str, user: str, max_tokens: int = 3000) -> str:
-        retries = 5
         while retries:
             retries -= 1
             try:
@@ -61,9 +65,27 @@ class LLM(ABC):
         return "{}"
     def _send(self, system: str, user: str, max_tokens: int = 3000) -> str:
-        raise NotImplementedError
-    def api_model_name(self):
         if " " in self.model_name:
             return self.model_name.split(" ")[0]
         else:
@@ -83,6 +105,10 @@ class LLM(ABC):
     @classmethod
     def all_model_names(cls) -> List[str]:
         models = list(cls.model_map().keys())
         allowed = os.getenv("MODELS")
         if allowed:
@@ -153,28 +179,10 @@ class GPT(LLM):
         super().__init__(model_name, temperature)
         self.client = OpenAI()
-    def _send(self, system: str, user: str, max_tokens: int = 3000) -> str:
-        """
-        Send a message to GPT
-        :param system: the context in which this message is to be taken
-        :param user: the prompt
-        :param max_tokens: max number of tokens to generate
-        :return: the response from the AI
-        """
-        response = self.client.chat.completions.create(
-            model=self.api_model_name(),
-            messages=[
-                {"role": "system", "content": system},
-                {"role": "user", "content": user},
-            ],
-            response_format={"type": "json_object"},
-        )
-        return response.choices[0].message.content
 class O1(LLM):
     """
-    A class to act as an interface to the remote AI, in this case GPT
     """
     model_names = ["o1-mini"]
@@ -188,7 +196,7 @@ class O1(LLM):
     def _send(self, system: str, user: str, max_tokens: int = 3000) -> str:
         """
-        Send a message to GPT
         :param system: the context in which this message is to be taken
         :param user: the prompt
         :param max_tokens: max number of tokens to generate
@@ -206,7 +214,7 @@ class O1(LLM):
 class O3(LLM):
     """
-    A class to act as an interface to the remote AI, in this case GPT
     """
     model_names = ["o3-mini"]
@@ -225,7 +233,7 @@ class O3(LLM):
     def _send(self, system: str, user: str, max_tokens: int = 3000) -> str:
         """
-        Send a message to GPT
         :param system: the context in which this message is to be taken
         :param user: the prompt
         :param max_tokens: max number of tokens to generate
@@ -241,6 +249,25 @@ class O3(LLM):
         return response.choices[0].message.content
 class Ollama(LLM):
     """
     A class to act as an interface to the remote AI, in this case Ollama via the OpenAI client
@@ -250,7 +277,7 @@ class Ollama(LLM):
     def __init__(self, model_name: str, temperature: float):
         """
-        Create a new instance of the OpenAI client
         """
         super().__init__(model_name, temperature)
         self.client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
@@ -296,25 +323,6 @@ class DeepSeekAPI(LLM):
             api_key=deepseek_api_key, base_url="https://api.deepseek.com"
         )
-    def _send(self, system: str, user: str, max_tokens: int = 3000) -> str:
-        """
-        Send a message to DeepSeek
-        :param system: the context in which this message is to be taken
-        :param user: the prompt
-        :param max_tokens: max number of tokens to generate
-        :return: the response from the AI
-        """
-        response = self.client.chat.completions.create(
-            model=self.api_model_name(),
-            messages=[
-                {"role": "system", "content": system},
-                {"role": "user", "content": user},
-            ],
-        )
-        reply = response.choices[0].message.content
-        return reply
 class DeepSeekLocal(LLM):
     """
@@ -367,25 +375,7 @@ class GroqAPI(LLM):
     def __init__(self, model_name: str, temperature: float):
         """
-        Create a new instance of the OpenAI client
         """
         super().__init__(model_name, temperature)
         self.client = Groq()
-    def _send(self, system: str, user: str, max_tokens: int = 3000) -> str:
-        """
-        Send a message to GPT
-        :param system: the context in which this message is to be taken
-        :param user: the prompt
-        :param max_tokens: max number of tokens to generate
-        :return: the response from the AI
-        """
-        response = self.client.chat.completions.create(
-            model=self.api_model_name(),
-            messages=[
-                {"role": "system", "content": system},
-                {"role": "user", "content": user},
-            ],
-            response_format={"type": "json_object"},
-        )
-        return response.choices[0].message.content

         return result
     def protected_send(self, system: str, user: str, max_tokens: int = 3000) -> str:
+        """
+        Wrap the send call in an exception handler, giving the LLM 3 chances in total, in case
+        of overload errors. If it fails 3 times, then it forfeits!
+        """
+        retries = 3
         while retries:
             retries -= 1
             try:
         return "{}"
     def _send(self, system: str, user: str, max_tokens: int = 3000) -> str:
+        """
+        Send a message to the model - this default implementation follows the OpenAI API structure
+        :param system: the context in which this message is to be taken
+        :param user: the prompt
+        :param max_tokens: max number of tokens to generate
+        :return: the response from the AI
+        """
+        response = self.client.chat.completions.create(
+            model=self.api_model_name(),
+            messages=[
+                {"role": "system", "content": system},
+                {"role": "user", "content": user},
+            ],
+            response_format={"type": "json_object"},
+        )
+        return response.choices[0].message.content
+    def api_model_name(self) -> str:
+        """
+        Return the actual model_name to be used in the call to the API; strip out anything after a space
+        """
         if " " in self.model_name:
             return self.model_name.split(" ")[0]
         else:
     @classmethod
     def all_model_names(cls) -> List[str]:
+        """
+        Return a list of all the model names supported.
+        Use the ones specified in the model_map, but also check if there's an env variable set that restricts the models
+        """
         models = list(cls.model_map().keys())
         allowed = os.getenv("MODELS")
         if allowed:
         super().__init__(model_name, temperature)
         self.client = OpenAI()
 class O1(LLM):
     """
+    A class to act as an interface to the remote AI, in this case O1
     """
     model_names = ["o1-mini"]
     def _send(self, system: str, user: str, max_tokens: int = 3000) -> str:
         """
+        Send a message to O1
         :param system: the context in which this message is to be taken
         :param user: the prompt
         :param max_tokens: max number of tokens to generate
 class O3(LLM):
     """
+    A class to act as an interface to the remote AI, in this case O3
     """
     model_names = ["o3-mini"]
     def _send(self, system: str, user: str, max_tokens: int = 3000) -> str:
         """
+        Send a message to O3
         :param system: the context in which this message is to be taken
         :param user: the prompt
         :param max_tokens: max number of tokens to generate
         return response.choices[0].message.content
+class Gemini(LLM):
+    """
+    A class to act as an interface to the remote AI, in this case Gemini
+    """
+    model_names = ["gemini-2.0-flash", "gemini-1.5-flash"]
+    def __init__(self, model_name: str, temperature: float):
+        """
+        Create a new instance of the OpenAI client
+        """
+        super().__init__(model_name, temperature)
+        google_api_key = os.getenv("GOOGLE_API_KEY")
+        self.client = OpenAI(
+            api_key=google_api_key,
+            base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
+        )
 class Ollama(LLM):
     """
     A class to act as an interface to the remote AI, in this case Ollama via the OpenAI client
     def __init__(self, model_name: str, temperature: float):
         """
+        Create a new instance of the OpenAI client for Ollama
         """
         super().__init__(model_name, temperature)
         self.client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
             api_key=deepseek_api_key, base_url="https://api.deepseek.com"
         )
 class DeepSeekLocal(LLM):
     """
     def __init__(self, model_name: str, temperature: float):
         """
+        Create a new instance of the Groq client
         """
         super().__init__(model_name, temperature)
         self.client = Groq()

arena/player.py CHANGED Viewed

@@ -5,8 +5,15 @@ import random
 class Player:
-    def __init__(self, model, color):
         self.color = color
         self.model = model
         self.llm = LLM.create(self.model)
@@ -15,7 +22,10 @@ class Player:
         self.opportunities = ""
         self.strategy = ""
-    def system(self, board, legal_moves, illegal_moves):
         return f"""You are playing the board game Connect 4.
 Players take turns to drop counters into one of 7 columns A, B, C, D, E, F, G.
 The winner is the first player to get 4 counters in a row in any direction.
@@ -33,7 +43,10 @@ You should respond in JSON according to this spec:
 You must pick one of these letters for your move_column: {legal_moves}{illegal_moves}"""
-    def user(self, board, legal_moves, illegal_moves):
         return f"""It is your turn to make a move as {pieces[self.color]}.
 Here is the current board, with row 1 at the bottom of the board:
@@ -78,8 +91,10 @@ Now make your decision.
 You must pick one of these letters for your move_column: {legal_moves}{illegal_moves}
 """
-    def process_move(self, reply, board):
-        print(reply)
         try:
             if len(reply) == 3 and reply[0] == "{" and reply[2] == "}":
                 reply = f'{{"move_column": "{reply[1]}"}}'
@@ -100,6 +115,9 @@ You must pick one of these letters for your move_column: {legal_moves}{illegal_m
             board.winner = -1 * board.player
     def move(self, board):
         legal_moves = ", ".join(board.legal_moves())
         if illegal := board.illegal_moves():
             illegal_moves = (
@@ -114,6 +132,9 @@ You must pick one of these letters for your move_column: {legal_moves}{illegal_m
         self.process_move(reply, board)
     def thoughts(self):
         result = '<div style="text-align: left;font-size:14px"><br/>'
         result += f"<b>Evaluation:</b><br/>{self.evaluation}<br/><br/>"
         result += f"<b>Threats:</b><br/>{self.threats}<br/><br/>"
@@ -122,5 +143,8 @@ You must pick one of these letters for your move_column: {legal_moves}{illegal_m
         result += "</div>"
         return result
-    def switch_model(self, new_model_name):
         self.llm = LLM.create(new_model_name)

 class Player:
+    """
+    This class represents one AI player in the game, and is responsible for managing the prompts
+    Delegating to an LLM instance to connect to the LLM
+    """
+    def __init__(self, model: str, color: int):
+        """
+        Set up this instance for the given model and player color
+        """
         self.color = color
         self.model = model
         self.llm = LLM.create(self.model)
         self.opportunities = ""
         self.strategy = ""
+    def system(self, board, legal_moves: str, illegal_moves: str) -> str:
+        """
+        Return the system prompt for this move
+        """
         return f"""You are playing the board game Connect 4.
 Players take turns to drop counters into one of 7 columns A, B, C, D, E, F, G.
 The winner is the first player to get 4 counters in a row in any direction.
 You must pick one of these letters for your move_column: {legal_moves}{illegal_moves}"""
+    def user(self, board, legal_moves: str, illegal_moves: str) -> str:
+        """
+        Return the user prompt for this move
+        """
         return f"""It is your turn to make a move as {pieces[self.color]}.
 Here is the current board, with row 1 at the bottom of the board:
 You must pick one of these letters for your move_column: {legal_moves}{illegal_moves}
 """
+    def process_move(self, reply: str, board):
+        """
+        Interpret the reply and make the move; if the move is illegal, then the current player loses
+        """
         try:
             if len(reply) == 3 and reply[0] == "{" and reply[2] == "}":
                 reply = f'{{"move_column": "{reply[1]}"}}'
             board.winner = -1 * board.player
     def move(self, board):
+        """
+        Have the underlying LLM make a move, and process the result
+        """
         legal_moves = ", ".join(board.legal_moves())
         if illegal := board.illegal_moves():
             illegal_moves = (
         self.process_move(reply, board)
     def thoughts(self):
+        """
+        Return HTML to describe the inner thoughts
+        """
         result = '<div style="text-align: left;font-size:14px"><br/>'
         result += f"<b>Evaluation:</b><br/>{self.evaluation}<br/><br/>"
         result += f"<b>Threats:</b><br/>{self.threats}<br/><br/>"
         result += "</div>"
         return result
+    def switch_model(self, new_model_name: str):
+        """
+        Change the underlying LLM to the new model
+        """
         self.llm = LLM.create(new_model_name)

arena/record.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import logging
+import os
+import math
+from datetime import datetime
+from typing import List, Dict
+from dataclasses import dataclass, asdict
+from pymongo import MongoClient
+from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
+@dataclass
+class Result:
+    red_player: str
+    yellow_player: str
+    red_won: bool
+    yellow_won: bool
+    when: datetime
+COLLECTION = "connect"
+def _get_collection():
+    """Helper function to get MongoDB collection with error handling"""
+    try:
+        mongo_uri = os.getenv("MONGO_URI")
+        if mongo_uri:
+            client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000)
+            # Quick check if we can actually connect
+            client.admin.command("ismaster")
+            db = client.outsmart
+            return db[COLLECTION]
+    except (ConnectionFailure, ServerSelectionTimeoutError):
+        return None
+def record_game(result: Result) -> bool:
+    """
+    Store the results in the database, if database is available.
+    Returns True if successful, False if database is unavailable.
+    """
+    collection = _get_collection()
+    if collection is None:
+        return False
+    # Convert Result object to dictionary for MongoDB storage
+    game_dict = asdict(result)
+    try:
+        collection.insert_one(game_dict)
+        return True
+    except Exception as e:
+        logging.error("Failed to record a game in the database")
+        logging.exception(e)
+        return False
+def get_games() -> List[Result]:
+    """
+    Return all games in the order that they were played.
+    Returns empty list if database is unavailable.
+    """
+    collection = _get_collection()
+    if collection is None:
+        return []
+    try:
+        # Sort by _id to maintain insertion order
+        games = collection.find().sort("_id", 1)
+        # Convert MongoDB documents back to Result objects
+        results = []
+        for game in games:
+            # Remove MongoDB's _id field
+            game.pop("_id", None)
+            results.append(Result(**game))
+        return results
+    except Exception as e:
+        logging.error("Error getting games")
+        logging.exception(e)
+        return []
+class EloCalculator:
+    def __init__(self, k_factor: float = 32, default_rating: int = 1000):
+        """
+        Initialize the ELO calculator.
+        Args:
+            k_factor: Determines how much ratings change after each game
+            default_rating: Starting rating for new players
+        """
+        self.k_factor = k_factor
+        self.default_rating = default_rating
+        self.ratings: Dict[str, float] = {}
+    def get_player_rating(self, player: str) -> float:
+        """Get a player's current rating, or default if they're new."""
+        return self.ratings.get(player, self.default_rating)
+    def calculate_expected_score(self, rating_a: float, rating_b: float) -> float:
+        """
+        Calculate the expected score (win probability) for player A against player B.
+        Uses the ELO formula: 1 / (1 + 10^((ratingB - ratingA)/400))
+        """
+        return 1 / (1 + math.pow(10, (rating_b - rating_a) / 400))
+    def update_ratings(
+        self, player_a: str, player_b: str, score_a: float, score_b: float
+    ) -> None:
+        """
+        Update ratings for two players based on their game outcome.
+        Args:
+            player_a: Name of first player
+            player_b: Name of second player
+            score_a: Actual score for player A (1 for win, 0.5 for draw, 0 for loss)
+            score_b: Actual score for player B (1 for win, 0.5 for draw, 0 for loss)
+        """
+        rating_a = self.get_player_rating(player_a)
+        rating_b = self.get_player_rating(player_b)
+        expected_a = self.calculate_expected_score(rating_a, rating_b)
+        expected_b = 1 - expected_a
+        # Update ratings using the ELO formula: R' = R + K * (S - E)
+        # where R is the current rating, K is the k-factor,
+        # S is the actual score, and E is the expected score
+        new_rating_a = rating_a + self.k_factor * (score_a - expected_a)
+        new_rating_b = rating_b + self.k_factor * (score_b - expected_b)
+        self.ratings[player_a] = new_rating_a
+        self.ratings[player_b] = new_rating_b
+def calculate_elo_ratings(
+    results: List[Result], exclude_self_play: bool = True
+) -> Dict[str, float]:
+    """
+    Calculate final ELO ratings for all players based on a list of game results.
+    Args:
+        results: List of game results, sorted by date
+        exclude_self_play: If True, skip games where a player plays against themselves
+    Returns:
+        Dictionary mapping player names to their final ELO ratings
+    """
+    calculator = EloCalculator()
+    for result in results:
+        # Skip self-play games if requested
+        if exclude_self_play and result.red_player == result.yellow_player:
+            continue
+        # Convert game result to ELO scores (1 for win, 0.5 for draw, 0 for loss)
+        if result.red_won and not result.yellow_won:
+            red_score, yellow_score = 1.0, 0.0
+        elif result.yellow_won and not result.red_won:
+            red_score, yellow_score = 0.0, 1.0
+        else:
+            # Draw (including double-win or double-loss cases)
+            red_score, yellow_score = 0.5, 0.5
+        calculator.update_ratings(
+            result.red_player, result.yellow_player, red_score, yellow_score
+        )
+    return calculator.ratings
+def ratings() -> Dict[str, float]:
+    """
+    Return the ELO ratings from all prior games in the DB
+    """
+    games = get_games()
+    return calculate_elo_ratings(games)

connect.png ADDED Viewed

prototype.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ gradio
 google.generativeai
 anthropic
 groq
-black

 google.generativeai
 anthropic
 groq
+black
+pymongo