| """ | |
| ChessEcon Training — Model Loader | |
| Downloads and loads Qwen/Llama models from HuggingFace for RL training. | |
| Uses HF_TOKEN from .env for gated models (e.g., Llama-3.2-3B-Instruct). | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import logging | |
| from pathlib import Path | |
| from typing import Tuple, Optional | |
| logger = logging.getLogger(__name__) | |
| def download_model(model_name: str, cache_dir: str, hf_token: Optional[str] = None) -> str: | |
| """ | |
| Download a model from HuggingFace Hub to local cache. | |
| Returns the local path to the downloaded model. | |
| """ | |
| from huggingface_hub import snapshot_download, login | |
| if hf_token: | |
| login(token=hf_token, add_to_git_credential=False) | |
| logger.info("Logged in to HuggingFace Hub") | |
| local_path = Path(cache_dir) / model_name.replace("/", "--") | |
| if local_path.exists() and any(local_path.iterdir()): | |
| logger.info(f"Model already cached at {local_path}") | |
| return str(local_path) | |
| logger.info(f"Downloading {model_name} to {cache_dir} ...") | |
| path = snapshot_download( | |
| repo_id=model_name, | |
| cache_dir=cache_dir, | |
| token=hf_token, | |
| ignore_patterns=["*.msgpack", "*.h5", "flax_model*", "tf_model*", "*.ot"], | |
| ) | |
| logger.info(f"Model downloaded to {path}") | |
| return path | |
| def load_model_and_tokenizer( | |
| model_name: str, | |
| cache_dir: str, | |
| device: str = "cpu", | |
| hf_token: Optional[str] = None, | |
| for_training: bool = True, | |
| ) -> Tuple: | |
| """ | |
| Load a HuggingFace model and tokenizer for RL training. | |
| Returns (model, tokenizer). | |
| """ | |
| try: | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| except ImportError: | |
| raise ImportError("transformers and torch are required for training. Run: pip install transformers torch") | |
| logger.info(f"Loading model: {model_name} on {device}") | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_name, | |
| cache_dir=cache_dir, | |
| token=hf_token, | |
| trust_remote_code=True, | |
| ) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| dtype = None | |
| if device == "cuda": | |
| try: | |
| import torch | |
| dtype = torch.bfloat16 | |
| except Exception: | |
| pass | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| cache_dir=cache_dir, | |
| token=hf_token, | |
| torch_dtype=dtype, | |
| device_map=device if device != "cpu" else None, | |
| trust_remote_code=True, | |
| ) | |
| if device == "cpu": | |
| model = model.to("cpu") | |
| if for_training: | |
| model.train() | |
| else: | |
| model.eval() | |
| total_params = sum(p.numel() for p in model.parameters()) / 1e6 | |
| logger.info(f"Model loaded: {total_params:.1f}M parameters on {device}") | |
| return model, tokenizer | |
| def generate_move( | |
| model, | |
| tokenizer, | |
| prompt: str, | |
| max_new_tokens: int = 64, | |
| temperature: float = 0.9, | |
| device: str = "cpu", | |
| ) -> str: | |
| """Generate a chess move from the model given a prompt.""" | |
| import torch | |
| inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| do_sample=True, | |
| pad_token_id=tokenizer.pad_token_id, | |
| ) | |
| generated = outputs[0][inputs["input_ids"].shape[1]:] | |
| return tokenizer.decode(generated, skip_special_tokens=True).strip() | |