Spaces:
Running
Running
Refactor fine-tuning process: Update function to utilize evaluated chat history and improve error handling for training data preparation.
Browse files- app.py +1 -1
- src/analytics/chat_evaluator.py +1 -0
- src/training/fine_tuner.py +53 -40
app.py
CHANGED
|
@@ -1233,7 +1233,7 @@ with gr.Blocks(css="""
|
|
| 1233 |
interactive=False,
|
| 1234 |
show_label=True
|
| 1235 |
)
|
| 1236 |
-
refresh_status_btn = gr.Button("Refresh Status")
|
| 1237 |
|
| 1238 |
# Moved refresh status and evaluation report here
|
| 1239 |
refresh_data_status = gr.Textbox(
|
|
|
|
| 1233 |
interactive=False,
|
| 1234 |
show_label=True
|
| 1235 |
)
|
| 1236 |
+
refresh_status_btn = gr.Button("Refresh Status and Chat History")
|
| 1237 |
|
| 1238 |
# Moved refresh status and evaluation report here
|
| 1239 |
refresh_data_status = gr.Textbox(
|
src/analytics/chat_evaluator.py
CHANGED
|
@@ -442,3 +442,4 @@ class ChatEvaluator:
|
|
| 442 |
metrics["improvement_rate"] = (improved_count / len(annotations)) * 100
|
| 443 |
|
| 444 |
return metrics
|
|
|
|
|
|
| 442 |
metrics["improvement_rate"] = (improved_count / len(annotations)) * 100
|
| 443 |
|
| 444 |
return metrics
|
| 445 |
+
|
src/training/fine_tuner.py
CHANGED
|
@@ -391,7 +391,7 @@ def finetune_from_chat_history(epochs: int = 3,
|
|
| 391 |
batch_size: int = 4,
|
| 392 |
learning_rate: float = 2e-4) -> Tuple[bool, str]:
|
| 393 |
"""
|
| 394 |
-
Function to start fine-tuning process based on chat history
|
| 395 |
|
| 396 |
Args:
|
| 397 |
epochs: Number of training epochs
|
|
@@ -401,45 +401,58 @@ def finetune_from_chat_history(epochs: int = 3,
|
|
| 401 |
Returns:
|
| 402 |
(success, message)
|
| 403 |
"""
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
|
| 444 |
def finetune_from_file(
|
| 445 |
training_file: str,
|
|
|
|
| 391 |
batch_size: int = 4,
|
| 392 |
learning_rate: float = 2e-4) -> Tuple[bool, str]:
|
| 393 |
"""
|
| 394 |
+
Function to start fine-tuning process based on evaluated chat history
|
| 395 |
|
| 396 |
Args:
|
| 397 |
epochs: Number of training epochs
|
|
|
|
| 401 |
Returns:
|
| 402 |
(success, message)
|
| 403 |
"""
|
| 404 |
+
try:
|
| 405 |
+
# Create evaluator instance
|
| 406 |
+
evaluator = ChatEvaluator(
|
| 407 |
+
hf_token=HF_TOKEN,
|
| 408 |
+
dataset_id=DATASET_ID,
|
| 409 |
+
chat_history_path=CHAT_HISTORY_PATH
|
| 410 |
+
)
|
| 411 |
+
|
| 412 |
+
# Create temporary file for training data
|
| 413 |
+
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.jsonl') as f:
|
| 414 |
+
training_data_path = f.name
|
| 415 |
+
|
| 416 |
+
# Export evaluated data
|
| 417 |
+
success, message = evaluator.export_training_data(
|
| 418 |
+
output_file=training_data_path,
|
| 419 |
+
min_rating=3 # Используем более мягкий порог рейтинга
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
if not success:
|
| 423 |
+
if os.path.exists(training_data_path):
|
| 424 |
+
os.remove(training_data_path)
|
| 425 |
+
return False, f"Failed to prepare training data: {message}"
|
| 426 |
+
|
| 427 |
+
# Count examples
|
| 428 |
+
with open(training_data_path, 'r') as f:
|
| 429 |
+
example_count = sum(1 for _ in f)
|
| 430 |
+
|
| 431 |
+
if example_count == 0:
|
| 432 |
+
if os.path.exists(training_data_path):
|
| 433 |
+
os.remove(training_data_path)
|
| 434 |
+
return False, "No evaluated examples found for fine-tuning"
|
| 435 |
+
|
| 436 |
+
# Create and start fine-tuning process
|
| 437 |
+
tuner = FineTuner()
|
| 438 |
+
success, message = tuner.prepare_and_train(
|
| 439 |
+
training_data_path=training_data_path,
|
| 440 |
+
num_train_epochs=epochs,
|
| 441 |
+
per_device_train_batch_size=batch_size,
|
| 442 |
+
learning_rate=learning_rate
|
| 443 |
+
)
|
| 444 |
+
|
| 445 |
+
# Cleanup
|
| 446 |
+
if os.path.exists(training_data_path):
|
| 447 |
+
os.remove(training_data_path)
|
| 448 |
+
|
| 449 |
+
if success:
|
| 450 |
+
return True, f"Successfully fine-tuned model with {example_count} evaluated examples: {message}"
|
| 451 |
+
else:
|
| 452 |
+
return False, f"Fine-tuning failed: {message}"
|
| 453 |
+
|
| 454 |
+
except Exception as e:
|
| 455 |
+
return False, f"Error during fine-tuning: {str(e)}"
|
| 456 |
|
| 457 |
def finetune_from_file(
|
| 458 |
training_file: str,
|