File size: 2,772 Bytes
fde73f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""Train on Modal serverless GPU.

Modal lets you rent GPUs by the second. Cheaper than RunPod for short jobs.

Setup:
    pip install modal
    modal setup  # login
    modal secret create huggingface HF_TOKEN=<your_token>
    
Run:
    modal run train_modal.py --model EleutherAI/pythia-1.4b
"""
import modal

app = modal.App("mel-corpus-training")

image = (
    modal.Image.debian_slim(python_version="3.11")
    .pip_install([
        "torch>=2.0.0", "transformers>=4.40.0", "peft>=0.10.0",
        "accelerate>=0.30.0", "datasets>=2.18.0", "bitsandbytes>=0.43.0",
        "huggingface_hub>=0.22.0",
    ])
    .apt_install("git")
)

volume = modal.Volume.from_name("mel-training", create_if_missing=True)


@app.function(
    image=image,
    gpu="A100-40GB",  # change to T4, A10, A100-80GB as needed
    timeout=60 * 60 * 12,  # 12 hour max
    volumes={"/workspace": volume},
    secrets=[modal.Secret.from_name("huggingface")],
)
def train(
    model_id: str = "EleutherAI/pythia-1.4b",
    bridge_repo: str = "Melofhell00/claude-bridge",
    output_repo: str = None,
    epochs: int = 3,
):
    import os
    import subprocess
    from huggingface_hub import hf_hub_download, snapshot_download, HfApi
    
    os.chdir("/workspace")
    
    # Pull unified corpus from bridge
    print(f"Downloading corpus from {bridge_repo}...")
    corpus_path = hf_hub_download(
        repo_id=bridge_repo,
        filename="unified_corpus_2026_05_12/unified_corpus.txt",
        repo_type="dataset",
    )
    print(f"Corpus: {corpus_path}")
    
    # Pull training scripts from this repo (uploaded separately)
    snapshot_download(
        repo_id="Melofhell00/mel-training-package",
        repo_type="model",
        local_dir="/workspace/training_package",
    )
    
    # Prepare data
    print("Preparing data...")
    subprocess.run([
        "python", "/workspace/training_package/prepare_data.py",
        "--corpus", corpus_path,
        "--output", "/workspace/train.jsonl",
        "--tokenizer", model_id,
    ], check=True)
    
    # Train
    print("Training...")
    output_name = output_repo or f"mel-{model_id.split('/')[-1]}"
    cmd = [
        "python", "/workspace/training_package/train.py",
        "--model", model_id,
        "--data", "/workspace/train.jsonl",
        "--output", f"/workspace/{output_name}",
        "--epochs", str(epochs),
        "--use-4bit",
        "--hf-repo", f"Melofhell00/{output_name}",
    ]
    subprocess.run(cmd, check=True)
    
    print(f"Done. Pushed to Melofhell00/{output_name}")
    return f"Melofhell00/{output_name}"


@app.local_entrypoint()
def main(model: str = "EleutherAI/pythia-1.4b", epochs: int = 3):
    result = train.remote(model_id=model, epochs=epochs)
    print(f"\nResult: {result}")