| """Train on Modal serverless GPU. |
| |
| Modal lets you rent GPUs by the second. Cheaper than RunPod for short jobs. |
| |
| Setup: |
| pip install modal |
| modal setup # login |
| modal secret create huggingface HF_TOKEN=<your_token> |
| |
| Run: |
| modal run train_modal.py --model EleutherAI/pythia-1.4b |
| """ |
| import modal |
|
|
| app = modal.App("mel-corpus-training") |
|
|
| image = ( |
| modal.Image.debian_slim(python_version="3.11") |
| .pip_install([ |
| "torch>=2.0.0", "transformers>=4.40.0", "peft>=0.10.0", |
| "accelerate>=0.30.0", "datasets>=2.18.0", "bitsandbytes>=0.43.0", |
| "huggingface_hub>=0.22.0", |
| ]) |
| .apt_install("git") |
| ) |
|
|
| volume = modal.Volume.from_name("mel-training", create_if_missing=True) |
|
|
|
|
| @app.function( |
| image=image, |
| gpu="A100-40GB", |
| timeout=60 * 60 * 12, |
| volumes={"/workspace": volume}, |
| secrets=[modal.Secret.from_name("huggingface")], |
| ) |
| def train( |
| model_id: str = "EleutherAI/pythia-1.4b", |
| bridge_repo: str = "Melofhell00/claude-bridge", |
| output_repo: str = None, |
| epochs: int = 3, |
| ): |
| import os |
| import subprocess |
| from huggingface_hub import hf_hub_download, snapshot_download, HfApi |
| |
| os.chdir("/workspace") |
| |
| |
| print(f"Downloading corpus from {bridge_repo}...") |
| corpus_path = hf_hub_download( |
| repo_id=bridge_repo, |
| filename="unified_corpus_2026_05_12/unified_corpus.txt", |
| repo_type="dataset", |
| ) |
| print(f"Corpus: {corpus_path}") |
| |
| |
| snapshot_download( |
| repo_id="Melofhell00/mel-training-package", |
| repo_type="model", |
| local_dir="/workspace/training_package", |
| ) |
| |
| |
| print("Preparing data...") |
| subprocess.run([ |
| "python", "/workspace/training_package/prepare_data.py", |
| "--corpus", corpus_path, |
| "--output", "/workspace/train.jsonl", |
| "--tokenizer", model_id, |
| ], check=True) |
| |
| |
| print("Training...") |
| output_name = output_repo or f"mel-{model_id.split('/')[-1]}" |
| cmd = [ |
| "python", "/workspace/training_package/train.py", |
| "--model", model_id, |
| "--data", "/workspace/train.jsonl", |
| "--output", f"/workspace/{output_name}", |
| "--epochs", str(epochs), |
| "--use-4bit", |
| "--hf-repo", f"Melofhell00/{output_name}", |
| ] |
| subprocess.run(cmd, check=True) |
| |
| print(f"Done. Pushed to Melofhell00/{output_name}") |
| return f"Melofhell00/{output_name}" |
|
|
|
|
| @app.local_entrypoint() |
| def main(model: str = "EleutherAI/pythia-1.4b", epochs: int = 3): |
| result = train.remote(model_id=model, epochs=epochs) |
| print(f"\nResult: {result}") |
|
|