Rulga commited on
Commit
b0e22d6
·
1 Parent(s): 2984e21

Refactor FineTuner class: Remove commented-out code for model directory creation and the finetune_from_annotations function, clean up comments, and enhance code readability.

Browse files
Files changed (1) hide show
  1. src/training/fine_tuner.py +3 -73
src/training/fine_tuner.py CHANGED
@@ -47,7 +47,6 @@ class FineTuner:
47
  self.model = None
48
  self.chat_analyzer = ChatAnalyzer()
49
 
50
- # Создаём директорию для сохранения моделей в датасете
51
  os.makedirs(self.output_dir, exist_ok=True)
52
 
53
  def prepare_training_data(self, output_file: Optional[str] = None) -> str:
@@ -204,76 +203,7 @@ class FineTuner:
204
  )
205
 
206
  return tokenized_dataset
207
-
208
- # Добавить этот метод в класс fine_tuner.py или в функции модуля:
209
-
210
- def finetune_from_annotations(epochs=3, batch_size=4, learning_rate=2e-4, min_rating=4):
211
- """
212
- Fine-tune model using annotated QA pairs
213
-
214
- Args:
215
- epochs: Number of training epochs
216
- batch_size: Batch size for training
217
- learning_rate: Learning rate
218
- min_rating: Minimum average rating for including examples
219
-
220
- Returns:
221
- (success, message)
222
- """
223
- try:
224
- import tempfile
225
- import os
226
- from src.analytics.chat_evaluator import ChatEvaluator
227
- from config.settings import HF_TOKEN, DATASET_ID, CHAT_HISTORY_PATH
228
-
229
- # Create evaluator
230
- evaluator = ChatEvaluator(
231
- hf_token=HF_TOKEN,
232
- dataset_id=DATASET_ID,
233
- chat_history_path=CHAT_HISTORY_PATH
234
- )
235
-
236
- # Create temporary file for training data
237
- with tempfile.NamedTemporaryFile(mode='w+', suffix='.jsonl', delete=False) as temp_file:
238
- temp_path = temp_file.name
239
-
240
- # Export high-quality examples
241
- success, message = evaluator.export_training_data(temp_path, min_rating)
242
-
243
- if not success:
244
- return False, f"Failed to export training data: {message}"
245
 
246
- # Count examples
247
- with open(temp_path, 'r') as f:
248
- example_count = sum(1 for _ in f)
249
-
250
- if example_count == 0:
251
- return False, "No high-quality examples found for fine-tuning"
252
-
253
- # Run actual fine-tuning using the export file
254
- from src.training.fine_tuner import finetune_from_file
255
-
256
- success, message = finetune_from_file(
257
- training_file=temp_path,
258
- epochs=epochs,
259
- batch_size=batch_size,
260
- learning_rate=learning_rate
261
- )
262
-
263
- # Clean up temporary file
264
- try:
265
- os.unlink(temp_path)
266
- except:
267
- pass
268
-
269
- if success:
270
- return True, f"Successfully fine-tuned model with {example_count} annotated examples: {message}"
271
- else:
272
- return False, f"Fine-tuning failed: {message}"
273
-
274
- except Exception as e:
275
- return False, f"Error during fine-tuning from annotations: {str(e)}"
276
-
277
  def train(
278
  self,
279
  training_data_path: str,
@@ -286,7 +216,7 @@ def finetune_from_annotations(epochs=3, batch_size=4, learning_rate=2e-4, min_ra
286
  ) -> Tuple[bool, str]:
287
  """
288
  Train the model using provided data
289
-
290
  Args:
291
  training_data_path: Path to training data file
292
  num_train_epochs: Number of training epochs
@@ -295,7 +225,7 @@ def finetune_from_annotations(epochs=3, batch_size=4, learning_rate=2e-4, min_ra
295
  learning_rate: Learning rate
296
  logging_steps: Number of steps between logging
297
  save_strategy: When to save checkpoints
298
-
299
  Returns:
300
  (success, message)
301
  """
@@ -404,7 +334,7 @@ def finetune_from_chat_history(epochs: int = 3,
404
  (success, message)
405
  """
406
  try:
407
- # Create evaluator instance - убираем лишний параметр
408
  evaluator = ChatEvaluator(
409
  hf_token=HF_TOKEN,
410
  dataset_id=DATASET_ID
 
47
  self.model = None
48
  self.chat_analyzer = ChatAnalyzer()
49
 
 
50
  os.makedirs(self.output_dir, exist_ok=True)
51
 
52
  def prepare_training_data(self, output_file: Optional[str] = None) -> str:
 
203
  )
204
 
205
  return tokenized_dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  def train(
208
  self,
209
  training_data_path: str,
 
216
  ) -> Tuple[bool, str]:
217
  """
218
  Train the model using provided data
219
+
220
  Args:
221
  training_data_path: Path to training data file
222
  num_train_epochs: Number of training epochs
 
225
  learning_rate: Learning rate
226
  logging_steps: Number of steps between logging
227
  save_strategy: When to save checkpoints
228
+
229
  Returns:
230
  (success, message)
231
  """
 
334
  (success, message)
335
  """
336
  try:
337
+ # Create evaluator instance
338
  evaluator = ChatEvaluator(
339
  hf_token=HF_TOKEN,
340
  dataset_id=DATASET_ID