QuadConnect-beta

Sleeping

App Files Files Community

Lyte commited on Feb 20

Commit

5a73433

verified ·

1 Parent(s): d175a25

Update app.py

Browse files

Files changed (1) hide show

app.py +301 -58

app.py CHANGED Viewed

@@ -1,81 +1,324 @@
 import os
 import gradio as gr
 from llama_cpp import Llama
-from huggingface_hub import hf_hub_download, login
-#import os
 #login(os.getenv("HF_TOKEN")) my bad now its public
 model = Llama(
     model_path=hf_hub_download(
-        repo_id=os.environ.get("REPO_ID", "bartowski/HuatuoGPT-o1-7B-v0.1-GGUF"),
-        filename=os.environ.get("MODEL_FILE", "HuatuoGPT-o1-7B-v0.1-Q4_0.gguf"),
     )
 )
-DESCRIPTION = '''
-# FreedomIntelligence/HuatuoGPT-o1-7B | Duplicate the space and set it to private for faster & personal inference for free.
-HuatuoGPT-o1 is a medical LLM designed for advanced medical reasoning.
-It generates a complex thought process, reflecting and refining its reasoning, before providing a final response.
-**To start a new chat**, click "clear" and start a new dialog.
-'''
-LICENSE = """
---- Apache 2.0 License ---
 """
-def user(message, history):
-    return "", history + [{"role": "user", "content": message}]
-def generate_text(history, max_tokens=512, temperature=0.9, top_p=0.95):
-    """Generate a response using the Llama model."""
-    messages = [{"role": item["role"], "content": item["content"]} for item in history[:-1]]
-    message = history[-1]['content']
-    response = model.create_chat_completion(
-        messages=messages + [{"role": "user", "content": message}],
-        temperature=temperature,
-        max_tokens=max_tokens,
-        top_p=top_p,
-        stream=True,
-    )
-    history.append({"role": "assistant", "content": ""})
-    for streamed in response:
-        delta = streamed["choices"][0].get("delta", {})
-        text_chunk = delta.get("content", "")
-        history[-1]['content'] += text_chunk
-        yield history
-with gr.Blocks() as demo:
-    gr.Markdown(DESCRIPTION)
-    chatbot = gr.Chatbot(type="messages")
-    msg = gr.Textbox()
-    clear = gr.Button("Clear")
-    with gr.Accordion("Adjust Parameters", open=False):
-        max_tokens = gr.Slider(minimum=512, maximum=4096, value=1024, step=1, label="Max Tokens")
-        temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.9, step=0.1, label="Temperature")
-        top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
-    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-        generate_text, [chatbot, max_tokens, temperature, top_p], chatbot
-    )
-    clear.click(lambda: None, None, chatbot, queue=False)
-    gr.Examples(
-        examples=[
-            ["How many r's are in the word strawberry?"],
-            ['How to stop a cough?'],
-            ['How do I relieve feet pain?'],
-        ],
-        inputs=msg,
-        label="Examples",
-    )
-    gr.Markdown(LICENSE)
 if __name__ == "__main__":
-    demo.launch()

 import os
 import gradio as gr
 from llama_cpp import Llama
+from huggingface_hub import hf_hub_download#, login
+import numpy as np
 #login(os.getenv("HF_TOKEN")) my bad now its public
 model = Llama(
     model_path=hf_hub_download(
+        repo_id=os.environ.get("REPO_ID", "Lyte/QuadConnect2.5-0.5B-GRPO"),
+        filename=os.environ.get("MODEL_FILE", "unsloth.Q8_0.gguf"),
     )
 )
+SYSTEM_PROMPT = """You are a Connect Four player[Connect Four is played on a 6 x 7 grid (with 6 rows and 7 columns]. Given the current board state, predict the next move. Respond in the following format:
+<reasoning>
+Explain your reasoning for choosing the move, considering the current board state and potential future moves.
+</reasoning>
+<move>
+The column and row of your move in the format 'a1', 'b3', 'g5', 'c6', etc. (column letter followed by row number).
+</move>
 """
+class ConnectFour:
+    def __init__(self):
+        self.board = np.zeros((6, 7))
+        self.current_player = 1  # 1 for player, 2 for AI
+        self.game_over = False
+    def make_move(self, col):
+        if self.game_over:
+            return False, -1
+        # Find the lowest empty row in the selected column
+        for row in range(5, -1, -1):
+            if self.board[row][col] == 0:
+                self.board[row][col] = self.current_player
+                return True, row
+        return False, -1
+    def check_winner(self):
+        # Check horizontal
+        for row in range(6):
+            for col in range(4):
+                if (self.board[row][col] != 0 and
+                    self.board[row][col] == self.board[row][col+1] ==
+                    self.board[row][col+2] == self.board[row][col+3]):
+                    return self.board[row][col]
+        # Check vertical
+        for row in range(3):
+            for col in range(7):
+                if (self.board[row][col] != 0 and
+                    self.board[row][col] == self.board[row+1][col] ==
+                    self.board[row+2][col] == self.board[row+3][col]):
+                    return self.board[row][col]
+        # Check diagonal (positive slope)
+        for row in range(3):
+            for col in range(4):
+                if (self.board[row][col] != 0 and
+                    self.board[row][col] == self.board[row+1][col+1] ==
+                    self.board[row+2][col+2] == self.board[row+3][col+3]):
+                    return self.board[row][col]
+        # Check diagonal (negative slope)
+        for row in range(3, 6):
+            for col in range(4):
+                if (self.board[row][col] != 0 and
+                    self.board[row][col] == self.board[row-1][col+1] ==
+                    self.board[row-2][col+2] == self.board[row-3][col+3]):
+                    return self.board[row][col]
+        return 0
+    def board_to_string(self):
+        moves = []
+        for row in range(6):
+            for col in range(7):
+                if self.board[row][col] != 0:
+                    col_letter = chr(ord('a') + col)
+                    row_num = str(6 - row)  # Convert to 1-based indexing
+                    player = "X" if self.board[row][col] == 1 else "O"
+                    moves.append(f"{col_letter}{row_num}={player}")
+        return ", ".join(moves)
+    def parse_ai_move(self, move_str):
+        # Parse move like 'a1', 'b3', etc.
+        col = ord(move_str[0].lower()) - ord('a')
+        return col
+def create_interface():
+    game = ConnectFour()
+    css = """
+    .connect4-board {
+        display: grid;
+        grid-template-columns: repeat(7, 1fr);
+        gap: 8px;
+        max-width: 600px;
+        margin: 10px auto;
+        background: #2196F3;
+        padding: 15px;
+        border-radius: 15px;
+        box-shadow: 0 4px 8px rgba(0,0,0,0.2);
+    }
+    .connect4-cell {
+        aspect-ratio: 1;
+        background: white;
+        border-radius: 50%;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        font-size: 2em;
+    }
+    .player1 { background: #f44336 !important; }
+    .player2 { background: #ffc107 !important; }
+    #ai-status {
+        font-size: 1.2em;
+        margin: 10px 0;
+        color: #2196F3;
+        font-weight: bold;
+    }
+    #ai-reasoning {
+        background: #22004d;
+        border-radius: 10px;
+        padding: 15px;
+        margin: 15px 0;
+        font-family: monospace;
+        min-height: 100px;
+    }
+    .reasoning-box {
+        border-left: 4px solid #2196F3;
+        padding-left: 15px;
+        margin: 10px 0;
+        background: #22004d;
+        border-radius: 0 10px 10px 0;
+    }
+    #column-buttons {
+        display: flex;
+        justify-content: center;
+        align-items: anchor-center;
+        max-width: 600px;
+        margin: 0 auto;
+        padding: 0 15px;
+    }
+    #column-buttons button {
+      margin: 0px 7px;
+    }
+    div.svelte-iyf88w {
+      display: block;
+    }
+    """
+    with gr.Blocks(css=css) as interface:
+        gr.Markdown("# 🎮 Connect Four vs AI")
+        gr.Markdown("### This is just a quick prototype for now, and the current model was trained just for 200 steps to test the concept, the reward functions were flawed, update coming soon!")
+        with gr.Row():
+            with gr.Column(scale=2):
+                # Status display
+                status = gr.Markdown("Your turn! Click a button to drop your piece!", elem_id="ai-status")
+                # Column buttons
+                with gr.Group(elem_id="column-buttons"):
+                    col_buttons = []
+                    for i in range(7):
+                        btn = gr.Button(f"⬇️ {i+1}", scale=1)
+                        col_buttons.append(btn)
+                # Game board
+                board_display = gr.HTML(render_board(), elem_id="board-display")
+                reset_btn = gr.Button("🔄 New Game", variant="primary")
+        with gr.Column(scale=1):
+            # AI reasoning display
+            gr.Markdown("### 🤖 AI's Thoughts")
+            reasoning_display = gr.HTML(
+                value='<div id="ai-reasoning">Waiting for your move...</div>',
+                elem_id="ai-reasoning-container"
+                )
+        def handle_move(col):
+            if game.game_over:
+                return [
+                    render_board(game.board),
+                    "Game is over! Click New Game to play again.",
+                    '<div id="ai-reasoning">Game Over!</div>'
+                ]
+            # Player move
+            success, row = game.make_move(col)
+            if not success:
+                return [
+                    render_board(game.board),
+                    "Column is full! Try another one.",
+                    '<div id="ai-reasoning">Invalid move!</div>'
+                ]
+            # Check for winner
+            winner = game.check_winner()
+            if winner == 1:
+                game.game_over = True
+                return [
+                    render_board(game.board),
+                    "🎉 You win! 🎉",
+                    '<div id="ai-reasoning">Congratulations! You won!</div>'
+                ]
+            # AI move
+            game.current_player = 2
+            board_state = game.board_to_string()
+            prompt = f"Current Board: {board_state}. Make a move."
+            # Get AI response
+            response = model.create_chat_completion(
+                messages=[
+                    {"role": "system", "content": SYSTEM_PROMPT},
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=0.7,
+                max_tokens=512
+            )
+            ai_response = response['choices'][0]['message']['content']
+            # Extract reasoning and move
+            try:
+                reasoning = ai_response.split("<reasoning>")[1].split("</reasoning>")[0].strip()
+                move_str = ai_response.split("<move>")[1].split("</move>")[0].strip()
+                ai_col = game.parse_ai_move(move_str)
+                # Format reasoning for display
+                reasoning_html = f'''
+                <div id="ai-reasoning">
+                    <div class="reasoning-box">
+                        <p><strong>🤔 Reasoning:</strong></p>
+                        <p>{reasoning}</p>
+                        <p><strong>📍 Move chosen:</strong> {move_str}</p>
+                    </div>
+                </div>
+                '''
+                success, _ = game.make_move(ai_col)
+                if success:
+                    # Check for AI winner
+                    winner = game.check_winner()
+                    if winner == 2:
+                        game.game_over = True
+                        return [
+                            render_board(game.board),
+                            "🤖 AI wins! Better luck next time!",
+                            reasoning_html
+                        ]
+                else:
+                    return [
+                        render_board(game.board),
+                        "AI made invalid move! You win by default!",
+                        '<div id="ai-reasoning">AI made an invalid move!</div>'
+                    ]
+            except Exception as e:
+                game.game_over = True
+                return [
+                    render_board(game.board),
+                    "AI error occurred! You win by default!",
+                    f'<div id="ai-reasoning">Error: {str(e)}</div>'
+                ]
+            game.current_player = 1
+            return [render_board(game.board), "Your turn!", reasoning_html]
+        def reset_game():
+            game.board = np.zeros((6, 7))
+            game.current_player = 1
+            game.game_over = False
+            return [
+                render_board(),
+                "Your turn! Click a button to drop your piece!",
+                '<div id="ai-reasoning">New game started! Make your move...</div>'
+            ]
+        # Event handlers
+        for i, btn in enumerate(col_buttons):
+            btn.click(
+                fn=handle_move,
+                inputs=[gr.Number(value=i, visible=False)],
+                outputs=[board_display, status, reasoning_display]
+            )
+        reset_btn.click(
+            fn=reset_game,
+            outputs=[board_display, status, reasoning_display]
+        )
+    return interface
+def render_board(board=None):
+    if board is None:
+        board = np.zeros((6, 7))
+    html = '<div class="connect4-board">'
+    for row in range(6):
+        for col in range(7):
+            cell_class = "connect4-cell"
+            content = "⚪"
+            if board[row][col] == 1:
+                cell_class += " player1"
+                content = "🔴"
+            elif board[row][col] == 2:
+                cell_class += " player2"
+                content = "🟡"
+            html += f'<div class="{cell_class}">{content}</div>'
+    html += "</div>"
+    return html
+# Launch the interface
 if __name__ == "__main__":
+    interface = create_interface()
+    interface.launch(debug=True)