File size: 5,477 Bytes
7275aef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# cli/atomic_eval.py
import typer
import sys
from pathlib import Path
from rich.console import Console
from rich.panel import Panel
from rich.prompt import Confirm
# Add the parent directory to the path
sys.path.insert(0, str(Path(__file__).parent.parent))
from pipelines.production_pipeline import ProductionPipeline
app = typer.Typer()
console = Console()
@app.command()
def run(
dataset_path: str = typer.Option(..., help="Path to the dataset file"),
model_name: str = typer.Option("microsoft/DialoGPT-medium", help="Model name to fine-tune"),
output_dir: str = typer.Option("runs/humigence", help="Output directory for results"),
gpu_id: int = typer.Option(0, help="GPU ID to use for atomic evaluation"),
use_atomic_evaluation: bool = typer.Option(True, help="Use atomic evaluation (true process isolation)"),
per_device_train_batch_size: int = typer.Option(4, help="Training batch size per device"),
per_device_eval_batch_size: int = typer.Option(8, help="Evaluation batch size per device"),
num_train_epochs: int = typer.Option(3, help="Number of training epochs"),
learning_rate: float = typer.Option(5e-5, help="Learning rate"),
max_length: int = typer.Option(512, help="Maximum sequence length"),
):
"""
Run Humigence pipeline with atomic evaluation for guaranteed device isolation
"""
# Display configuration
config_panel = Panel(
f"""[bold cyan]Humigence Pipeline Configuration[/bold cyan]
[bold]Dataset:[/bold] {dataset_path}
[bold]Model:[/bold] {model_name}
[bold]Output Directory:[/bold] {output_dir}
[bold]GPU ID:[/bold] {gpu_id}
[bold]Atomic Evaluation:[/bold] {'β
Enabled' if use_atomic_evaluation else 'β Disabled'}
[bold]Training Batch Size:[/bold] {per_device_train_batch_size}
[bold]Evaluation Batch Size:[/bold] {per_device_eval_batch_size}
[bold]Epochs:[/bold] {num_train_epochs}
[bold]Learning Rate:[/bold] {learning_rate}
[bold]Max Length:[/bold] {max_length}""",
title="π Configuration",
border_style="cyan"
)
console.print(config_panel)
if use_atomic_evaluation:
atomic_panel = Panel(
"""[bold green]Atomic Evaluation Enabled[/bold green]
β
True process isolation
β
No device contamination
β
Guaranteed single-GPU evaluation
β
Clean environment separation""",
title="π Atomic Evaluation",
border_style="green"
)
console.print(atomic_panel)
# Confirm before proceeding
if not Confirm.ask("\n[bold]Proceed with pipeline execution?[/bold]"):
console.print("[yellow]Pipeline cancelled by user[/yellow]")
return
# Create configuration
config = {
"dataset_path": dataset_path,
"model_name": model_name,
"output_dir": output_dir,
"gpu_id": gpu_id,
"use_atomic_evaluation": use_atomic_evaluation,
"per_device_train_batch_size": per_device_train_batch_size,
"per_device_eval_batch_size": per_device_eval_batch_size,
"num_train_epochs": num_train_epochs,
"learning_rate": learning_rate,
"max_length": max_length,
"warmup_steps": 100,
"logging_steps": 10,
"save_steps": 500,
"eval_steps": 500,
"save_total_limit": 3,
"load_best_model_at_end": True,
"metric_for_best_model": "eval_loss",
"greater_is_better": False,
"fp16": True,
"dataloader_num_workers": 4,
"remove_unused_columns": False,
}
try:
# Initialize and run pipeline
console.print("\n[bold cyan]π Starting Humigence Pipeline...[/bold cyan]")
pipeline = ProductionPipeline(config)
result = pipeline.run()
if result["status"] == "success":
success_panel = Panel(
f"""[bold green]Pipeline Completed Successfully![/bold green]
[bold]Status:[/bold] {result['status']}
[bold]Dataset Info:[/bold] {result.get('dataset_info', 'N/A')}
[bold]Report Path:[/bold] {result.get('report_path', 'N/A')}
[bold]Evaluation Results:[/bold]
{_format_evaluation_results(result.get('evaluation_results', {}))}""",
title="π Success",
border_style="green"
)
console.print(success_panel)
else:
error_panel = Panel(
f"""[bold red]Pipeline Failed[/bold red]
[bold]Status:[/bold] {result['status']}
[bold]Error:[/bold] {result.get('error', 'Unknown error')}""",
title="β Error",
border_style="red"
)
console.print(error_panel)
except Exception as e:
console.print(f"\n[bold red]β Pipeline execution failed: {e}[/bold red]")
raise typer.Exit(1)
def _format_evaluation_results(results):
"""Format evaluation results for display"""
if not results:
return "No evaluation results available"
formatted = []
for key, value in results.items():
if isinstance(value, dict):
if 'eval_loss' in value and 'perplexity' in value:
formatted.append(f" {key}: Loss={value['eval_loss']:.4f}, Perplexity={value['perplexity']:.2f}")
else:
formatted.append(f" {key}: {value}")
else:
formatted.append(f" {key}: {value}")
return "\n".join(formatted) if formatted else "No metrics available"
if __name__ == "__main__":
app()
|