Spaces:

LLM-course
/

Chess1MChallenge

Running

App Files Files Community

nathanael-fijalkow commited on Jan 7

Commit

19d5912

1 Parent(s): ea5cabf

another fix?

Browse files

Files changed (2) hide show

app.py +11 -11
src/evaluate.py +21 -10

app.py CHANGED Viewed

@@ -281,7 +281,7 @@ def play_move(
             render_board_svg(current_fen if current_fen != "startpos" else None),
             current_fen,
             move_history,
-            f"⚠️ Model generated illegal move: {move_token}",
         )
     except Exception as e:
@@ -289,7 +289,7 @@ def play_move(
             render_board_svg(),
             "startpos",
             "",
-            f"❌ Error: {str(e)}",
         )
@@ -543,7 +543,7 @@ with gr.Blocks(
         with gr.TabItem("🏆 Leaderboard"):
             gr.Markdown("### Current Rankings")
             leaderboard_html = gr.HTML(value=format_leaderboard_html(load_leaderboard()))
-            refresh_btn = gr.Button("🔄 Refresh Leaderboard")
             refresh_btn.click(refresh_leaderboard, outputs=leaderboard_html)
         # Interactive Demo Tab
@@ -566,8 +566,8 @@ with gr.Blocks(
                     )
                     with gr.Row():
-                        play_btn = gr.Button("▶️ Model Move", variant="primary")
-                        reset_btn = gr.Button("🔄 Reset")
                     status_text = gr.Textbox(label="Status", interactive=False)
@@ -618,7 +618,7 @@ with gr.Blocks(
                     label="Number of Positions",
                 )
-            legal_btn = gr.Button("✅ Run Legal Move Evaluation", variant="primary")
             legal_results = gr.Markdown()
             legal_btn.click(
@@ -675,7 +675,7 @@ with gr.Blocks(
 def verify_webhook_secret(secret: str) -> bool:
     """Verify the webhook secret from Hugging Face."""
     if not WEBHOOK_SECRET:
-        print("⚠️ WEBHOOK_SECRET not set - skipping verification")
         return True
     return hmac.compare_digest(WEBHOOK_SECRET, secret)
@@ -714,10 +714,10 @@ def run_auto_evaluation(model_id: str):
         })
         save_leaderboard(leaderboard)
-        print(f"✅ Auto-evaluation complete for {model_id}: legal_rate={results.get('legal_rate_with_retry', 0):.1%}")
     except Exception as e:
-        print(f"❌ Auto-evaluation failed for {model_id}: {e}")
         import traceback
         traceback.print_exc()
@@ -729,7 +729,7 @@ async def handle_webhook(request: Request, background_tasks: BackgroundTasks):
     # Verify secret
     if not verify_webhook_secret(secret):
-        print("❌ Webhook secret verification failed")
         return {"error": "Invalid secret"}, 403
     data = await request.json()
@@ -746,7 +746,7 @@ async def handle_webhook(request: Request, background_tasks: BackgroundTasks):
         if event_type in ["create", "update"]:
             # Check if it's a chess model
             if "chess" in repo_name.lower():
-                print(f"🎯 Queuing evaluation for chess model: {repo_name}")
                 background_tasks.add_task(run_auto_evaluation, repo_name)
                 return {"status": "evaluation_queued", "model": repo_name}
             else:

             render_board_svg(current_fen if current_fen != "startpos" else None),
             current_fen,
             move_history,
+            f"Model generated illegal move: {move_token}",
         )
     except Exception as e:
             render_board_svg(),
             "startpos",
             "",
+            f"Error: {str(e)}",
         )
         with gr.TabItem("🏆 Leaderboard"):
             gr.Markdown("### Current Rankings")
             leaderboard_html = gr.HTML(value=format_leaderboard_html(load_leaderboard()))
+            refresh_btn = gr.Button("Refresh Leaderboard")
             refresh_btn.click(refresh_leaderboard, outputs=leaderboard_html)
         # Interactive Demo Tab
                     )
                     with gr.Row():
+                        play_btn = gr.Button("Model Move", variant="primary")
+                        reset_btn = gr.Button("Reset")
                     status_text = gr.Textbox(label="Status", interactive=False)
                     label="Number of Positions",
                 )
+            legal_btn = gr.Button("Run Legal Move Evaluation", variant="primary")
             legal_results = gr.Markdown()
             legal_btn.click(
 def verify_webhook_secret(secret: str) -> bool:
     """Verify the webhook secret from Hugging Face."""
     if not WEBHOOK_SECRET:
+        print("WEBHOOK_SECRET not set - skipping verification")
         return True
     return hmac.compare_digest(WEBHOOK_SECRET, secret)
         })
         save_leaderboard(leaderboard)
+        print(f"Auto-evaluation complete for {model_id}: legal_rate={results.get('legal_rate_with_retry', 0):.1%}")
     except Exception as e:
+        print(f"Auto-evaluation failed for {model_id}: {e}")
         import traceback
         traceback.print_exc()
     # Verify secret
     if not verify_webhook_secret(secret):
+        print("Webhook secret verification failed")
         return {"error": "Invalid secret"}, 403
     data = await request.json()
         if event_type in ["create", "update"]:
             # Check if it's a chess model
             if "chess" in repo_name.lower():
+                print(f"Queuing evaluation for chess model: {repo_name}")
                 background_tasks.add_task(run_auto_evaluation, repo_name)
                 return {"status": "evaluation_queued", "model": repo_name}
             else:

src/evaluate.py CHANGED Viewed

@@ -477,9 +477,11 @@ def load_model_from_hub(model_id: str, device: str = "auto"):
     Returns:
         Tuple of (model, tokenizer).
     """
     from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
-    # Import custom classes - this also triggers registration at module load
     try:
         from src.model import ChessConfig, ChessForCausalLM
         from src.tokenizer import ChessTokenizer
@@ -487,31 +489,40 @@ def load_model_from_hub(model_id: str, device: str = "auto"):
         from .model import ChessConfig, ChessForCausalLM
         from .tokenizer import ChessTokenizer
-    # Explicitly register to ensure it's done before loading
     try:
         AutoConfig.register("chess_transformer", ChessConfig)
     except ValueError:
-        pass  # Already registered
     try:
         AutoModelForCausalLM.register(ChessConfig, ChessForCausalLM)
     except ValueError:
-        pass  # Already registered
-    # Load using our local classes directly (most reliable)
     print(f"Loading model {model_id}...")
-    config = ChessConfig.from_pretrained(model_id, trust_remote_code=True)
     model = ChessForCausalLM.from_pretrained(
         model_id,
         config=config,
         device_map=device,
-        trust_remote_code=True,
     )
-    # Load tokenizer - try custom class first, then generic
     try:
         tokenizer = ChessTokenizer.from_pretrained(model_id)
-    except Exception:
         tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
     return model, tokenizer

     Returns:
         Tuple of (model, tokenizer).
     """
+    import json
+    from huggingface_hub import hf_hub_download
     from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
+    # Import custom classes
     try:
         from src.model import ChessConfig, ChessForCausalLM
         from src.tokenizer import ChessTokenizer
         from .model import ChessConfig, ChessForCausalLM
         from .tokenizer import ChessTokenizer
+    # Register BEFORE any from_pretrained calls
     try:
         AutoConfig.register("chess_transformer", ChessConfig)
     except ValueError:
+        pass
     try:
         AutoModelForCausalLM.register(ChessConfig, ChessForCausalLM)
     except ValueError:
+        pass
     print(f"Loading model {model_id}...")
+    # Download and load config manually to avoid transformers auto-detection issues
+    config_path = hf_hub_download(repo_id=model_id, filename="config.json")
+    with open(config_path, "r") as f:
+        config_dict = json.load(f)
+    # Remove model_type to avoid conflicts, instantiate our config directly
+    config_dict.pop("model_type", None)
+    config_dict.pop("architectures", None)
+    config = ChessConfig(**config_dict)
+    # Load model weights with our config
     model = ChessForCausalLM.from_pretrained(
         model_id,
         config=config,
         device_map=device,
     )
+    # Load tokenizer
     try:
         tokenizer = ChessTokenizer.from_pretrained(model_id)
+    except Exception as e:
+        print(f"ChessTokenizer failed ({e}), trying AutoTokenizer...")
         tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
     return model, tokenizer