Rulga commited on
Commit
9b0f151
·
1 Parent(s): c564619

Refactor fine-tuning process: Update function to utilize evaluated chat history and improve error handling for training data preparation.

Browse files
app.py CHANGED
@@ -1233,7 +1233,7 @@ with gr.Blocks(css="""
1233
  interactive=False,
1234
  show_label=True
1235
  )
1236
- refresh_status_btn = gr.Button("Refresh Status")
1237
 
1238
  # Moved refresh status and evaluation report here
1239
  refresh_data_status = gr.Textbox(
 
1233
  interactive=False,
1234
  show_label=True
1235
  )
1236
+ refresh_status_btn = gr.Button("Refresh Status and Chat History")
1237
 
1238
  # Moved refresh status and evaluation report here
1239
  refresh_data_status = gr.Textbox(
src/analytics/chat_evaluator.py CHANGED
@@ -442,3 +442,4 @@ class ChatEvaluator:
442
  metrics["improvement_rate"] = (improved_count / len(annotations)) * 100
443
 
444
  return metrics
 
 
442
  metrics["improvement_rate"] = (improved_count / len(annotations)) * 100
443
 
444
  return metrics
445
+
src/training/fine_tuner.py CHANGED
@@ -391,7 +391,7 @@ def finetune_from_chat_history(epochs: int = 3,
391
  batch_size: int = 4,
392
  learning_rate: float = 2e-4) -> Tuple[bool, str]:
393
  """
394
- Function to start fine-tuning process based on chat history
395
 
396
  Args:
397
  epochs: Number of training epochs
@@ -401,45 +401,58 @@ def finetune_from_chat_history(epochs: int = 3,
401
  Returns:
402
  (success, message)
403
  """
404
- # Analyze chats and prepare data
405
- analyzer = ChatAnalyzer()
406
- report = analyzer.analyze_chats()
407
-
408
- if not report or "Failed to load chat history" in report:
409
- return False, "Failed to load chat history for training"
410
-
411
- # Extract QA pairs for training
412
- qa_pairs = analyzer.extract_question_answer_pairs()
413
-
414
- if len(qa_pairs) < 10:
415
- return False, f"Insufficient data for fine-tuning. Only {len(qa_pairs)} QA pairs found."
416
-
417
- # Create temporary file for training data
418
- with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.jsonl') as f:
419
- for pair in qa_pairs:
420
- json.dump({
421
- "messages": [
422
- {"role": "user", "content": pair["question"]},
423
- {"role": "assistant", "content": pair["answer"]}
424
- ]
425
- }, f, ensure_ascii=False)
426
- f.write('\n')
427
- training_data_path = f.name
428
-
429
- # Create and start fine-tuning process
430
- tuner = FineTuner()
431
- success, message = tuner.prepare_and_train(
432
- training_data_path=training_data_path,
433
- num_train_epochs=epochs,
434
- per_device_train_batch_size=batch_size,
435
- learning_rate=learning_rate
436
- )
437
-
438
- # Cleanup
439
- if os.path.exists(training_data_path):
440
- os.remove(training_data_path)
441
-
442
- return success, message
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
  def finetune_from_file(
445
  training_file: str,
 
391
  batch_size: int = 4,
392
  learning_rate: float = 2e-4) -> Tuple[bool, str]:
393
  """
394
+ Function to start fine-tuning process based on evaluated chat history
395
 
396
  Args:
397
  epochs: Number of training epochs
 
401
  Returns:
402
  (success, message)
403
  """
404
+ try:
405
+ # Create evaluator instance
406
+ evaluator = ChatEvaluator(
407
+ hf_token=HF_TOKEN,
408
+ dataset_id=DATASET_ID,
409
+ chat_history_path=CHAT_HISTORY_PATH
410
+ )
411
+
412
+ # Create temporary file for training data
413
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.jsonl') as f:
414
+ training_data_path = f.name
415
+
416
+ # Export evaluated data
417
+ success, message = evaluator.export_training_data(
418
+ output_file=training_data_path,
419
+ min_rating=3 # Используем более мягкий порог рейтинга
420
+ )
421
+
422
+ if not success:
423
+ if os.path.exists(training_data_path):
424
+ os.remove(training_data_path)
425
+ return False, f"Failed to prepare training data: {message}"
426
+
427
+ # Count examples
428
+ with open(training_data_path, 'r') as f:
429
+ example_count = sum(1 for _ in f)
430
+
431
+ if example_count == 0:
432
+ if os.path.exists(training_data_path):
433
+ os.remove(training_data_path)
434
+ return False, "No evaluated examples found for fine-tuning"
435
+
436
+ # Create and start fine-tuning process
437
+ tuner = FineTuner()
438
+ success, message = tuner.prepare_and_train(
439
+ training_data_path=training_data_path,
440
+ num_train_epochs=epochs,
441
+ per_device_train_batch_size=batch_size,
442
+ learning_rate=learning_rate
443
+ )
444
+
445
+ # Cleanup
446
+ if os.path.exists(training_data_path):
447
+ os.remove(training_data_path)
448
+
449
+ if success:
450
+ return True, f"Successfully fine-tuned model with {example_count} evaluated examples: {message}"
451
+ else:
452
+ return False, f"Fine-tuning failed: {message}"
453
+
454
+ except Exception as e:
455
+ return False, f"Error during fine-tuning: {str(e)}"
456
 
457
  def finetune_from_file(
458
  training_file: str,