Rulga commited on
Commit
b47770f
·
1 Parent(s): c93708c

fine tuned

Browse files
Files changed (2) hide show
  1. config/settings.py +8 -3
  2. src/training/fine_tuner.py +35 -0
config/settings.py CHANGED
@@ -22,16 +22,21 @@ API_CONFIG = {
22
  DATASET_ID = "Rulga/status-law-knowledge-base"
23
  CHAT_HISTORY_PATH = "chat_history"
24
  VECTOR_STORE_PATH = "vector_store"
25
- FINE_TUNED_PATH = "fine_tuned_models" # новый путь
26
 
27
  # Paths configuration
28
  MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
29
- TRAINING_OUTPUT_DIR = os.path.join(CHAT_HISTORY_PATH, FINE_TUNED_PATH) # изменённый путь
30
 
31
  # Create necessary directories if they don't exist
32
  os.makedirs(MODEL_PATH, exist_ok=True)
33
  os.makedirs(TRAINING_OUTPUT_DIR, exist_ok=True)
34
- MODELS_REGISTRY_PATH = os.path.join(CHAT_HISTORY_PATH, "models_registry.json") # перемещаем registry.json
 
 
 
 
 
35
 
36
  # Models configuration with detailed information
37
  MODELS = {
 
22
  DATASET_ID = "Rulga/status-law-knowledge-base"
23
  CHAT_HISTORY_PATH = "chat_history"
24
  VECTOR_STORE_PATH = "vector_store"
25
+ FINE_TUNED_PATH = "fine_tuned_models"
26
 
27
  # Paths configuration
28
  MODEL_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
29
+ TRAINING_OUTPUT_DIR = os.path.join(CHAT_HISTORY_PATH, FINE_TUNED_PATH)
30
 
31
  # Create necessary directories if they don't exist
32
  os.makedirs(MODEL_PATH, exist_ok=True)
33
  os.makedirs(TRAINING_OUTPUT_DIR, exist_ok=True)
34
+
35
+ # Dataset paths
36
+ DATASET_CHAT_HISTORY_PATH = f"{DATASET_ID}/chat_history"
37
+ DATASET_VECTOR_STORE_PATH = f"{DATASET_ID}/vector_store"
38
+ DATASET_FINE_TUNED_PATH = f"{DATASET_ID}/fine_tuned_models"
39
+ MODELS_REGISTRY_PATH = os.path.join(CHAT_HISTORY_PATH, "models_registry.json")
40
 
41
  # Models configuration with detailed information
42
  MODELS = {
src/training/fine_tuner.py CHANGED
@@ -441,6 +441,41 @@ def finetune_from_chat_history(epochs: int = 3,
441
 
442
  return success, message
443
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  if __name__ == "__main__":
445
  # Usage example
446
  success, message = finetune_from_chat_history()
 
441
 
442
  return success, message
443
 
444
+ def finetune_from_file(
445
+ training_file: str,
446
+ epochs: int = 3,
447
+ batch_size: int = 4,
448
+ learning_rate: float = 2e-4
449
+ ) -> Tuple[bool, str]:
450
+ """
451
+ Fine-tune model using training data from file
452
+
453
+ Args:
454
+ training_file: Path to JSONL file with training data
455
+ epochs: Number of training epochs
456
+ batch_size: Batch size for training
457
+ learning_rate: Learning rate
458
+
459
+ Returns:
460
+ (success, message)
461
+ """
462
+ try:
463
+ # Create fine tuner instance
464
+ tuner = FineTuner()
465
+
466
+ # Start training process
467
+ success, message = tuner.train(
468
+ training_data_path=training_file,
469
+ num_train_epochs=epochs,
470
+ per_device_train_batch_size=batch_size,
471
+ learning_rate=learning_rate
472
+ )
473
+
474
+ return success, message
475
+
476
+ except Exception as e:
477
+ return False, f"Error during fine-tuning: {str(e)}"
478
+
479
  if __name__ == "__main__":
480
  # Usage example
481
  success, message = finetune_from_chat_history()