File size: 2,594 Bytes
9d76e23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""Configuration settings for model fine-tuning."""

from pathlib import Path

# Directory paths
FINE_TUNING_DIR = Path(__file__).parent
MODELS_DIR = FINE_TUNING_DIR / "models"
DATA_DIR = FINE_TUNING_DIR / "data"

# Fine-tuning parameters
DEFAULT_FINE_TUNING_CONFIG = {
    "min_training_samples": 1000,
    "training_interval_hours": 24,
    "epochs": 5,  # Reduced from 10 to 5 epochs
    "batch_size": 64,  # Increased from 32 to 64 for faster processing
    "learning_rate": 2e-5,
    "max_seq_length": 512, # Maximum sequence length for tokenization (Note: CrossEncoder uses its own model.max_length)
    "warmup_steps": 500,  # Reduced from 1000 to 500
    "use_mixed_precision": True,  # Enable mixed precision training
    "gradient_accumulation_steps": 2,  # Reduced from 4 to 2 for faster updates
    "dataloader_num_workers": 2,  # Reduced from 4 to 2 to prevent memory issues
    "pin_memory": True,  # Pin memory for faster data transfer to GPU
    "weight_decay": 0.01,  # L2 regularization
    "num_cycles": 2,  # Reduced from 3 to 2
    "early_stopping_patience": 2,  # Reduced from 3 to 2
    "early_stopping_min_delta": 1e-4,  # Minimum improvement for early stopping
    "adam_epsilon": 1e-8,  # Adam optimizer epsilon
    "max_grad_norm": 1.0,  # Maximum gradient norm for clipping
    "adam_betas": (0.9, 0.999),  # Adam optimizer betas

    # Parameters for hard negative mining (names aligned with trainer.py's .get() calls)
    "hard_negatives_top_k": 2,  # Reduced from 3 to 2
    "hard_negatives_weight": 1.2,  # Weight for hard negative examples

    # Note: The following parameters from the original config are not directly used by
    # the provided trainer.py or are redundant, so they have been removed for clarity:
    # embedding_batch_size, use_ipex, use_amp, scheduler, validation_split,
    # save_best_model, fp16_opt_level, scheduler_num_cycles, scheduler_power,
    # num_folds (trainer uses hardcoded 5), min_feedback_confidence, ensemble_aggregation,
    # feedback_weight_scale, min_samples_per_class, temperature, use_weighted_sampling,
    # augmentation_strength, confidence_threshold, ensemble_diversity_weight,
    # label_smoothing, dropout_rate, warmup_ratio, max_train_steps, eval_steps,
    # logging_steps, save_steps, save_total_limit (utils.py has MAX_OLD_MODELS).
}

# Model versioning
MODEL_STATUS = {
    "BASE": "base",
    "FINE_TUNED": "fine_tuned",
    "TRAINING": "training"
}

# File names
TRAINING_DATA_FILE = "reranker_training_data.jsonl"
MODEL_METADATA_FILE = "model_metadata.json" 
USER_FEEDBACK_FILE = "user_feedback.jsonl"