File size: 8,315 Bytes
c4b369c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
from InquirerPy import prompt
from rich.console import Console
from rich.table import Table
from utils.device import get_system_info
from utils.validators import detect_datasets
import os
import json
from pathlib import Path
import datetime

console = Console()

def display_system_summary():
    info = get_system_info()

    table = Table(title="πŸ–₯️ System Detection Summary", show_lines=True)
    table.add_column("Property", style="cyan", no_wrap=True)
    table.add_column("Value", style="green")

    for key, val in info.items():
        if key == "GPUs":
            for i, gpu in enumerate(val):
                table.add_row(f"GPU {i} Name", gpu['name'])
                table.add_row(f"GPU {i} Memory", gpu['memory'])
        else:
            table.add_row(key, str(val))

    console.print("\n")
    console.print(table)

def get_available_models():
    # Default Hugging Face cache path
    hf_cache = os.path.expanduser("~/.cache/huggingface/hub/models--")
    model_choices = []

    if os.path.exists(hf_cache):
        for root, dirs, files in os.walk(hf_cache):
            for d in dirs:
                if d.startswith("snapshots"):
                    model_dir = os.path.basename(os.path.dirname(root))
                    model_choices.append(model_dir.replace("models--", "").replace("--", "/"))
    
    # Add manually defined models
    model_choices += [
        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        "microsoft/Phi-2",
        "Qwen/Qwen1.5-0.5B",
        "manual-entry (custom path/repo)"
    ]

    # De-dupe and sort
    return sorted(list(set(model_choices)))

def run():
    console.print("\n[bold magenta]πŸ§ͺ Supervised Fine-Tuning Setup[/bold magenta]")

    questions = [
        {
            "type": "list",
            "name": "setup_mode",
            "message": "Choose Setup Mode:",
            "choices": ["Basic Setup – Essential configuration only", "Advanced Setup – Full control over all parameters"],
        }
    ]

    answers = prompt(questions)
    setup_mode = answers.get("setup_mode").split(" ")[0].lower()  # 'basic' or 'advanced'
    
    console.print(f"\n[green]βœ… You selected:[/green] [yellow]{answers.get('setup_mode')}[/yellow]")

    # Display system summary
    display_system_summary()

    # GPU selection
    gpu_options = []
    info = get_system_info()
    for idx, gpu in enumerate(info['GPUs']):
        gpu_options.append(f"Single GPU – GPU {idx}: {gpu['name']}")

    if len(gpu_options) > 1:
        gpu_options.append("Multi-GPU – All")
        gpu_options.append("Multi-GPU – Custom")

    gpu_question = [
        {
            "type": "list",
            "name": "gpu_choice",
            "message": "οΏ½οΏ½ Choose Training Configuration:",
            "choices": gpu_options,
        }
    ]
    gpu_answer = prompt(gpu_question)
    selected_gpu = gpu_answer.get("gpu_choice")

    console.print(f"\n[green]βœ… You selected GPU config:[/green] [yellow]{selected_gpu}[/yellow]")

    # Model selection
    model_question = [
        {
            "type": "list",
            "name": "base_model",
            "message": "🧠 Choose Base Model:",
            "choices": get_available_models()
        }
    ]

    model_answer = prompt(model_question)
    selected_model = model_answer.get("base_model")

    # If manual-entry selected
    if selected_model == "manual-entry (custom path/repo)":
        manual_input = prompt([
            {
                "type": "input",
                "name": "custom_model",
                "message": "Enter Hugging Face repo or local model path:"
            }
        ])
        selected_model = manual_input.get("custom_model")

    console.print(f"\n[green]βœ… You selected model:[/green] [yellow]{selected_model}[/yellow]")

    # Dataset selection
    dataset_options = detect_datasets()
    if not dataset_options:
        console.print("[bold red]⚠️ No datasets found in ~/humigence_data[/bold red]")
        return

    dataset_question = [
        {
            "type": "list",
            "name": "dataset_path",
            "message": "πŸ“š Choose Dataset to Train On:",
            "choices": [opt[0] for opt in dataset_options]
        }
    ]

    dataset_answer = prompt(dataset_question)
    selected_dataset = [
        path for name, path in dataset_options if name == dataset_answer["dataset_path"]
    ][0]

    console.print(f"\n[green]βœ… You selected dataset:[/green] [yellow]{selected_dataset}[/yellow]")

    # Training recipe selection
    recipe_question = [
        {
            "type": "list",
            "name": "recipe",
            "message": "πŸ§ͺ Choose Training Recipe:",
            "choices": [
                "QLoRA (4-bit NF4)",
                "LoRA (FP16)",
                "LoRA (BF16)",
                "Full Fine-tuning (FP32)"
            ],
        }
    ]

    recipe_answer = prompt(recipe_question)
    selected_recipe = recipe_answer.get("recipe")

    console.print(f"\n[green]βœ… Training recipe:[/green] [yellow]{selected_recipe}[/yellow]")

    # Parameter branching - Basic vs Advanced
    if setup_mode == "advanced":
        param_questions = [
            {
                "type": "input",
                "name": "learning_rate",
                "message": "Enter Learning Rate:",
                "default": "2e-5"
            },
            {
                "type": "input",
                "name": "num_train_epochs",
                "message": "Enter Number of Epochs:",
                "default": "3"
            },
            {
                "type": "input",
                "name": "gradient_accumulation_steps",
                "message": "Enter Gradient Accumulation Steps:",
                "default": "4"
            },
            {
                "type": "input",
                "name": "logging_steps",
                "message": "Enter Logging Steps:",
                "default": "10"
            },
            {
                "type": "input",
                "name": "save_steps",
                "message": "Enter Save Steps:",
                "default": "100"
            }
        ]

        param_answers = prompt(param_questions)
    else:
        # Basic mode defaults
        param_answers = {
            "learning_rate": "2e-5",
            "num_train_epochs": "3",
            "gradient_accumulation_steps": "4",
            "logging_steps": "10",
            "save_steps": "100"
        }

    console.print(f"\n[cyan]πŸ“¦ Hyperparameters Loaded:[/cyan]")
    for k, v in param_answers.items():
        console.print(f"[bold]{k}[/bold]: {v}")

    # Combine config
    final_config = {
        "setup_mode": setup_mode,
        "gpu_config": selected_gpu,
        "base_model": selected_model,
        "dataset_path": selected_dataset,
        "training_recipe": selected_recipe,
        **param_answers,
        "timestamp": datetime.datetime.now().isoformat()
    }

    # Create directory and write config snapshot
    run_dir = Path("runs/humigence")
    run_dir.mkdir(parents=True, exist_ok=True)
    snapshot_path = run_dir / "config.snapshot.json"

    with open(snapshot_path, "w") as f:
        json.dump(final_config, f, indent=2)

    console.print(f"\n[bold green]βœ… Configuration saved to:[/bold green] [cyan]{snapshot_path}[/cyan]")

    # Generate reproduce.sh script
    reproduce_script = f"""#!/bin/bash
# Re-run this exact training config
python3 -m pipelines.lora_trainer --config {snapshot_path}
"""

    reproduce_path = run_dir / "reproduce.sh"
    with open(reproduce_path, "w") as f:
        f.write(reproduce_script)

    # Make executable
    reproduce_path.chmod(0o755)

    console.print(f"[bold green]βœ… Reproduction script saved to:[/bold green] [cyan]{reproduce_path}[/cyan]")

    # Final confirmation prompt
    final_prompt = prompt([
        {
            "type": "confirm",
            "name": "confirm_training",
            "message": "πŸš€ Proceed with training now?",
            "default": True
        }
    ])

    if not final_prompt["confirm_training"]:
        console.print("[bold yellow]❌ Training cancelled.[/bold yellow]")
        return
    else:
        console.print("[bold green]οΏ½οΏ½ Starting training...[/bold green]")
        # Call training engine next (Step 13)

if __name__ == "__main__":
    run()