Spaces:

LLM-course
/

Chess1MChallenge

Running

App Files Files Community

Chess1MChallenge / submit.py

nathanael-fijalkow

First version ready with webhook and deterministic eval

cb44915 8 days ago

raw

history blame contribute delete

9.82 kB

	#!/usr/bin/env python3
	"""
	Submission script for the Chess Challenge.

	This script validates and uploads your trained model to the Hugging Face Hub
	under the LLM-course organization.

	Your model directory must contain:
	- config.json: Model configuration with auto_map for custom architecture
	- model.safetensors (or pytorch_model.bin): Model weights
	- tokenizer_config.json: Tokenizer configuration with auto_map
	- vocab.json: Vocabulary file
	- model.py: Your custom model architecture (for trust_remote_code)
	- tokenizer.py: Your custom tokenizer (for trust_remote_code)

	Usage:
	python submit.py --model_path ./my_model --model_name my-chess-model
	"""

	import argparse
	import os
	import sys
	from pathlib import Path


	# Required files for a valid submission
	REQUIRED_FILES = {
	"config.json": "Model configuration (must include auto_map)",
	"tokenizer_config.json": "Tokenizer configuration (must include auto_map)",
	"vocab.json": "Vocabulary file",
	"model.py": "Custom model architecture (for trust_remote_code=True)",
	"tokenizer.py": "Custom tokenizer class (for trust_remote_code=True)",
	}

	# At least one of these weight files must exist
	WEIGHT_FILES = ["model.safetensors", "pytorch_model.bin"]


	def validate_model_directory(model_path: Path) -> tuple[bool, list[str]]:
	"""
	Validate that the model directory contains all required files.

	Returns:
	Tuple of (is_valid, list of error messages).
	"""
	errors = []

	# Check required files
	for filename, description in REQUIRED_FILES.items():
	if not (model_path / filename).exists():
	errors.append(f"Missing {filename}: {description}")

	# Check weight files (need at least one)
	has_weights = any((model_path / f).exists() for f in WEIGHT_FILES)
	if not has_weights:
	errors.append(f"Missing model weights: need {' or '.join(WEIGHT_FILES)}")

	return len(errors) == 0, errors


	def validate_auto_map(model_path: Path) -> tuple[bool, list[str]]:
	"""
	Validate that config.json and tokenizer_config.json have auto_map fields.

	Returns:
	Tuple of (is_valid, list of error messages).
	"""
	import json

	errors = []

	# Check config.json for auto_map
	config_path = model_path / "config.json"
	if config_path.exists():
	with open(config_path) as f:
	config = json.load(f)
	if "auto_map" not in config:
	errors.append(
	"config.json missing 'auto_map' field. Add:\n"
	' "auto_map": {\n'
	' "AutoConfig": "model.YourConfig",\n'
	' "AutoModelForCausalLM": "model.YourModel"\n'
	' }'
	)

	# Check tokenizer_config.json for auto_map
	tokenizer_config_path = model_path / "tokenizer_config.json"
	if tokenizer_config_path.exists():
	with open(tokenizer_config_path) as f:
	tokenizer_config = json.load(f)
	if "auto_map" not in tokenizer_config:
	errors.append(
	"tokenizer_config.json missing 'auto_map' field. Add:\n"
	' "auto_map": {\n'
	' "AutoTokenizer": ["tokenizer.YourTokenizer", null]\n'
	' }\n'
	'Note: AutoTokenizer value must be a list [slow_class, fast_class].'
	)
	elif "AutoTokenizer" in tokenizer_config.get("auto_map", {}):
	auto_tok = tokenizer_config["auto_map"]["AutoTokenizer"]
	if isinstance(auto_tok, str):
	errors.append(
	"tokenizer_config.json auto_map.AutoTokenizer must be a list, not a string.\n"
	'Change from: "AutoTokenizer": "tokenizer.YourTokenizer"\n'
	'To: "AutoTokenizer": ["tokenizer.YourTokenizer", null]'
	)

	return len(errors) == 0, errors


	def count_parameters(model_path: Path) -> int:
	"""Count parameters in the model."""
	from transformers import AutoModelForCausalLM

	model = AutoModelForCausalLM.from_pretrained(
	model_path,
	trust_remote_code=True,
	local_files_only=True,
	)
	return sum(p.numel() for p in model.parameters())


	def main():
	parser = argparse.ArgumentParser(
	description="Submit your chess model to the Hugging Face Hub",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Required files in your model directory:
	- config.json Model configuration with auto_map
	- model.safetensors Model weights (or pytorch_model.bin)
	- tokenizer_config.json Tokenizer configuration with auto_map
	- vocab.json Vocabulary file
	- model.py Custom model architecture
	- tokenizer.py Custom tokenizer class

	Example:
	python submit.py --model_path ./my_model --model_name my-chess-model
	"""
	)
	parser.add_argument(
	"--model_path", type=str, required=True,
	help="Path to your trained model directory"
	)
	parser.add_argument(
	"--model_name", type=str, required=True,
	help="Name for your model on the Hub (e.g., 'my-chess-model')"
	)
	args = parser.parse_args()

	model_path = Path(args.model_path)
	organization = "LLM-course"

	print("=" * 60)
	print("CHESS CHALLENGE - MODEL SUBMISSION")
	print("=" * 60)

	# Check model path exists
	if not model_path.exists():
	print(f"\nError: Model path '{model_path}' does not exist.")
	return 1

	# Validate required files
	print("\n[1/5] Checking required files...")
	is_valid, errors = validate_model_directory(model_path)
	if not is_valid:
	print("\nError: Model directory is incomplete:")
	for error in errors:
	print(f" - {error}")
	print("\nSee example_solution/ for a complete example.")
	return 1
	print(" All required files present.")

	# Validate auto_map fields
	print("\n[2/5] Validating auto_map configuration...")
	is_valid, errors = validate_auto_map(model_path)
	if not is_valid:
	print("\nError: Configuration files need auto_map:")
	for error in errors:
	print(f" - {error}")
	return 1
	print(" auto_map configuration valid.")

	# Count parameters
	print("\n[3/5] Counting parameters...")
	try:
	n_params = count_parameters(model_path)
	print(f" Parameters: {n_params:,}")
	if n_params > 1_000_000:
	print(f"\n WARNING: Model exceeds 1M parameter limit!")
	print(f" Your model has {n_params:,} parameters.")
	print(f" It will fail the evaluation parameter check.")
	except Exception as e:
	print(f"\nError: Could not load model to count parameters: {e}")
	return 1

	# Hugging Face login
	print("\n[4/5] Checking Hugging Face authentication...")
	try:
	from huggingface_hub import HfApi, whoami
	except ImportError:
	print("\nError: huggingface_hub not installed.")
	print("Install with: pip install huggingface_hub")
	return 1

	try:
	user_info = whoami()
	username = user_info["name"]
	print(f" Logged in as: {username}")
	except Exception:
	print("\n Not logged in. Starting login process...")
	print(" You need a Hugging Face account and access token.")
	print(" Get your token at: https://huggingface.co/settings/tokens")
	print()

	# Interactive login
	from huggingface_hub import login
	try:
	login()
	user_info = whoami()
	username = user_info["name"]
	print(f"\n Successfully logged in as: {username}")
	except Exception as e:
	print(f"\nError: Login failed: {e}")
	return 1

	# Upload to Hub
	print("\n[5/5] Uploading to Hugging Face Hub...")
	repo_id = f"{organization}/{args.model_name}"
	print(f" Repository: {repo_id}")

	api = HfApi()

	try:
	# Create repo if it doesn't exist
	api.create_repo(repo_id=repo_id, exist_ok=True)

	# Create a model card
	model_card = f"""---
	library_name: transformers
	tags:
	- chess
	- llm-course
	- chess-challenge
	license: mit
	---

	# {args.model_name}

	Chess model submitted to the LLM Course Chess Challenge.

	## Submission Info

	- Submitted by: [{username}](https://huggingface.co/{username})
	- Parameters: {n_params:,}
	- Organization: {organization}

	## Usage

	```python
	from transformers import AutoModelForCausalLM, AutoTokenizer

	model = AutoModelForCausalLM.from_pretrained("{repo_id}", trust_remote_code=True)
	tokenizer = AutoTokenizer.from_pretrained("{repo_id}", trust_remote_code=True)
	```

	## Evaluation

	This model is evaluated at the [Chess Challenge Arena](https://huggingface.co/spaces/LLM-course/Chess1MChallenge).
	"""

	# Write model card
	readme_path = model_path / "README.md"
	readme_path.write_text(model_card)

	# Upload all files
	api.upload_folder(
	folder_path=model_path,
	repo_id=repo_id,
	commit_message=f"Chess Challenge submission by {username}",
	)

	except Exception as e:
	print(f"\nError: Upload failed: {e}")
	return 1

	print("\n" + "=" * 60)
	print("SUBMISSION COMPLETE!")
	print("=" * 60)
	print(f"\nYour model is available at:")
	print(f" https://huggingface.co/{repo_id}")
	print(f"\nSubmitted by: {username}")
	print(f"Parameters: {n_params:,}")
	print(f"\nNext step: Go to the Chess Challenge Arena to run evaluation:")
	print(f" https://huggingface.co/spaces/LLM-course/Chess1MChallenge")

	return 0


	if __name__ == "__main__":
	sys.exit(main())