Spaces:
Sleeping
Sleeping
| """Configuration settings for model fine-tuning.""" | |
| from pathlib import Path | |
| # Directory paths | |
| FINE_TUNING_DIR = Path(__file__).parent | |
| MODELS_DIR = FINE_TUNING_DIR / "models" | |
| DATA_DIR = FINE_TUNING_DIR / "data" | |
| # Fine-tuning parameters | |
| DEFAULT_FINE_TUNING_CONFIG = { | |
| "min_training_samples": 1000, | |
| "training_interval_hours": 24, | |
| "epochs": 5, # Reduced from 10 to 5 epochs | |
| "batch_size": 64, # Increased from 32 to 64 for faster processing | |
| "learning_rate": 2e-5, | |
| "max_seq_length": 512, # Maximum sequence length for tokenization (Note: CrossEncoder uses its own model.max_length) | |
| "warmup_steps": 500, # Reduced from 1000 to 500 | |
| "use_mixed_precision": True, # Enable mixed precision training | |
| "gradient_accumulation_steps": 2, # Reduced from 4 to 2 for faster updates | |
| "dataloader_num_workers": 2, # Reduced from 4 to 2 to prevent memory issues | |
| "pin_memory": True, # Pin memory for faster data transfer to GPU | |
| "weight_decay": 0.01, # L2 regularization | |
| "num_cycles": 2, # Reduced from 3 to 2 | |
| "early_stopping_patience": 2, # Reduced from 3 to 2 | |
| "early_stopping_min_delta": 1e-4, # Minimum improvement for early stopping | |
| "adam_epsilon": 1e-8, # Adam optimizer epsilon | |
| "max_grad_norm": 1.0, # Maximum gradient norm for clipping | |
| "adam_betas": (0.9, 0.999), # Adam optimizer betas | |
| # Parameters for hard negative mining (names aligned with trainer.py's .get() calls) | |
| "hard_negatives_top_k": 2, # Reduced from 3 to 2 | |
| "hard_negatives_weight": 1.2, # Weight for hard negative examples | |
| # Note: The following parameters from the original config are not directly used by | |
| # the provided trainer.py or are redundant, so they have been removed for clarity: | |
| # embedding_batch_size, use_ipex, use_amp, scheduler, validation_split, | |
| # save_best_model, fp16_opt_level, scheduler_num_cycles, scheduler_power, | |
| # num_folds (trainer uses hardcoded 5), min_feedback_confidence, ensemble_aggregation, | |
| # feedback_weight_scale, min_samples_per_class, temperature, use_weighted_sampling, | |
| # augmentation_strength, confidence_threshold, ensemble_diversity_weight, | |
| # label_smoothing, dropout_rate, warmup_ratio, max_train_steps, eval_steps, | |
| # logging_steps, save_steps, save_total_limit (utils.py has MAX_OLD_MODELS). | |
| } | |
| # Model versioning | |
| MODEL_STATUS = { | |
| "BASE": "base", | |
| "FINE_TUNED": "fine_tuned", | |
| "TRAINING": "training" | |
| } | |
| # File names | |
| TRAINING_DATA_FILE = "reranker_training_data.jsonl" | |
| MODEL_METADATA_FILE = "model_metadata.json" | |
| USER_FEEDBACK_FILE = "user_feedback.jsonl" |