|
|
|
|
|
|
|
|
import typer |
|
|
import sys |
|
|
from pathlib import Path |
|
|
from rich.console import Console |
|
|
from rich.panel import Panel |
|
|
from rich.prompt import Confirm |
|
|
|
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent)) |
|
|
|
|
|
from pipelines.production_pipeline import ProductionPipeline |
|
|
|
|
|
app = typer.Typer() |
|
|
console = Console() |
|
|
|
|
|
@app.command() |
|
|
def run( |
|
|
dataset_path: str = typer.Option(..., help="Path to the dataset file"), |
|
|
model_name: str = typer.Option("microsoft/DialoGPT-medium", help="Model name to fine-tune"), |
|
|
output_dir: str = typer.Option("runs/humigence", help="Output directory for results"), |
|
|
gpu_id: int = typer.Option(0, help="GPU ID to use for atomic evaluation"), |
|
|
use_atomic_evaluation: bool = typer.Option(True, help="Use atomic evaluation (true process isolation)"), |
|
|
per_device_train_batch_size: int = typer.Option(4, help="Training batch size per device"), |
|
|
per_device_eval_batch_size: int = typer.Option(8, help="Evaluation batch size per device"), |
|
|
num_train_epochs: int = typer.Option(3, help="Number of training epochs"), |
|
|
learning_rate: float = typer.Option(5e-5, help="Learning rate"), |
|
|
max_length: int = typer.Option(512, help="Maximum sequence length"), |
|
|
): |
|
|
""" |
|
|
Run Humigence pipeline with atomic evaluation for guaranteed device isolation |
|
|
""" |
|
|
|
|
|
|
|
|
config_panel = Panel( |
|
|
f"""[bold cyan]Humigence Pipeline Configuration[/bold cyan] |
|
|
|
|
|
[bold]Dataset:[/bold] {dataset_path} |
|
|
[bold]Model:[/bold] {model_name} |
|
|
[bold]Output Directory:[/bold] {output_dir} |
|
|
[bold]GPU ID:[/bold] {gpu_id} |
|
|
[bold]Atomic Evaluation:[/bold] {'β
Enabled' if use_atomic_evaluation else 'β Disabled'} |
|
|
[bold]Training Batch Size:[/bold] {per_device_train_batch_size} |
|
|
[bold]Evaluation Batch Size:[/bold] {per_device_eval_batch_size} |
|
|
[bold]Epochs:[/bold] {num_train_epochs} |
|
|
[bold]Learning Rate:[/bold] {learning_rate} |
|
|
[bold]Max Length:[/bold] {max_length}""", |
|
|
title="π Configuration", |
|
|
border_style="cyan" |
|
|
) |
|
|
|
|
|
console.print(config_panel) |
|
|
|
|
|
if use_atomic_evaluation: |
|
|
atomic_panel = Panel( |
|
|
"""[bold green]Atomic Evaluation Enabled[/bold green] |
|
|
|
|
|
β
True process isolation |
|
|
β
No device contamination |
|
|
β
Guaranteed single-GPU evaluation |
|
|
β
Clean environment separation""", |
|
|
title="π Atomic Evaluation", |
|
|
border_style="green" |
|
|
) |
|
|
console.print(atomic_panel) |
|
|
|
|
|
|
|
|
if not Confirm.ask("\n[bold]Proceed with pipeline execution?[/bold]"): |
|
|
console.print("[yellow]Pipeline cancelled by user[/yellow]") |
|
|
return |
|
|
|
|
|
|
|
|
config = { |
|
|
"dataset_path": dataset_path, |
|
|
"model_name": model_name, |
|
|
"output_dir": output_dir, |
|
|
"gpu_id": gpu_id, |
|
|
"use_atomic_evaluation": use_atomic_evaluation, |
|
|
"per_device_train_batch_size": per_device_train_batch_size, |
|
|
"per_device_eval_batch_size": per_device_eval_batch_size, |
|
|
"num_train_epochs": num_train_epochs, |
|
|
"learning_rate": learning_rate, |
|
|
"max_length": max_length, |
|
|
"warmup_steps": 100, |
|
|
"logging_steps": 10, |
|
|
"save_steps": 500, |
|
|
"eval_steps": 500, |
|
|
"save_total_limit": 3, |
|
|
"load_best_model_at_end": True, |
|
|
"metric_for_best_model": "eval_loss", |
|
|
"greater_is_better": False, |
|
|
"fp16": True, |
|
|
"dataloader_num_workers": 4, |
|
|
"remove_unused_columns": False, |
|
|
} |
|
|
|
|
|
try: |
|
|
|
|
|
console.print("\n[bold cyan]π Starting Humigence Pipeline...[/bold cyan]") |
|
|
|
|
|
pipeline = ProductionPipeline(config) |
|
|
result = pipeline.run() |
|
|
|
|
|
if result["status"] == "success": |
|
|
success_panel = Panel( |
|
|
f"""[bold green]Pipeline Completed Successfully![/bold green] |
|
|
|
|
|
[bold]Status:[/bold] {result['status']} |
|
|
[bold]Dataset Info:[/bold] {result.get('dataset_info', 'N/A')} |
|
|
[bold]Report Path:[/bold] {result.get('report_path', 'N/A')} |
|
|
|
|
|
[bold]Evaluation Results:[/bold] |
|
|
{_format_evaluation_results(result.get('evaluation_results', {}))}""", |
|
|
title="π Success", |
|
|
border_style="green" |
|
|
) |
|
|
console.print(success_panel) |
|
|
else: |
|
|
error_panel = Panel( |
|
|
f"""[bold red]Pipeline Failed[/bold red] |
|
|
|
|
|
[bold]Status:[/bold] {result['status']} |
|
|
[bold]Error:[/bold] {result.get('error', 'Unknown error')}""", |
|
|
title="β Error", |
|
|
border_style="red" |
|
|
) |
|
|
console.print(error_panel) |
|
|
|
|
|
except Exception as e: |
|
|
console.print(f"\n[bold red]β Pipeline execution failed: {e}[/bold red]") |
|
|
raise typer.Exit(1) |
|
|
|
|
|
def _format_evaluation_results(results): |
|
|
"""Format evaluation results for display""" |
|
|
if not results: |
|
|
return "No evaluation results available" |
|
|
|
|
|
formatted = [] |
|
|
for key, value in results.items(): |
|
|
if isinstance(value, dict): |
|
|
if 'eval_loss' in value and 'perplexity' in value: |
|
|
formatted.append(f" {key}: Loss={value['eval_loss']:.4f}, Perplexity={value['perplexity']:.2f}") |
|
|
else: |
|
|
formatted.append(f" {key}: {value}") |
|
|
else: |
|
|
formatted.append(f" {key}: {value}") |
|
|
|
|
|
return "\n".join(formatted) if formatted else "No metrics available" |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app() |
|
|
|