Spaces:

CreatorIQ-org
/

rlhf_docker

Sleeping

App Files Files Community

b2u commited on Nov 29, 2024

Commit

62c0df2

1 Parent(s): 41785d5

Instead of trying to parse the completions string, we use the Label Studio interface to get tasks directly

Browse files

Files changed (1) hide show

model.py +24 -21

model.py CHANGED Viewed

@@ -210,12 +210,19 @@ class BertClassifier(LabelStudioMLBase):
             # Extract training data
             texts, labels = [], []
-            # Process completions directly from Label Studio
             try:
-                for task in completions:
                     try:
                         # Get text from task
-                        text = task.get('data', {}).get('text', '')
                         if not text:
                             continue
@@ -231,26 +238,26 @@ class BertClassifier(LabelStudioMLBase):
                                 if not result:
                                     continue
-                                choices = result[0].get('value', {}).get('choices', [])
-                                if not choices:
-                                    continue
-                                label = choices[0]
-                                logger.info(f"Successfully extracted: Text='{text}', Label='{label}'")
-                                texts.append(text)
-                                labels.append(label)
                             except Exception as e:
                                 logger.error(f"Error processing annotation: {str(e)}")
                                 continue
                     except Exception as e:
                         logger.error(f"Error processing task: {str(e)}")
                         continue
             except Exception as e:
-                logger.error(f"Error processing completions: {str(e)}")
             logger.info(f"Prepared {len(texts)} examples for training")
@@ -280,13 +287,9 @@ class BertClassifier(LabelStudioMLBase):
             tokenized_dataset = train_dataset.map(tokenize_function, batched=True)
-            # Define output directory
-            output_dir = os.path.join(self.model_dir, "results")
-            os.makedirs(output_dir, exist_ok=True)
             # Define training arguments
             training_args = TrainingArguments(
-                output_dir=output_dir,
                 num_train_epochs=3,
                 per_device_train_batch_size=8,
                 per_device_eval_batch_size=8,
@@ -331,7 +334,7 @@ class BertClassifier(LabelStudioMLBase):
         except Exception as e:
             logger.error(f"Training failed: {str(e)}")
-            logger.error('Full error details:', exc_info=True)
             return {
                 'status': 'error',
                 'error': str(e),

             # Extract training data
             texts, labels = [], []
+            # Get annotations from Label Studio
             try:
+                # Get interface info
+                from_name, to_name, value = self.label_interface.get_first_tag_occurence('Choices', 'Text')
+                # Get tasks from Label Studio
+                tasks = self.label_interface.get_tasks()
+                logger.info(f"Found {len(tasks)} tasks")
+                for task in tasks:
                     try:
                         # Get text from task
+                        text = task.get('data', {}).get(value)
                         if not text:
                             continue
                                 if not result:
                                     continue
+                                for r in result:
+                                    if r.get('from_name') == from_name and r.get('to_name') == to_name:
+                                        choices = r.get('value', {}).get('choices', [])
+                                        if choices:
+                                            label = choices[0]
+                                            logger.info(f"Successfully extracted: Text='{text}', Label='{label}'")
+                                            texts.append(text)
+                                            labels.append(label)
+                                            break
                             except Exception as e:
                                 logger.error(f"Error processing annotation: {str(e)}")
                                 continue
                     except Exception as e:
                         logger.error(f"Error processing task: {str(e)}")
                         continue
             except Exception as e:
+                logger.error(f"Error getting tasks: {str(e)}")
             logger.info(f"Prepared {len(texts)} examples for training")
             tokenized_dataset = train_dataset.map(tokenize_function, batched=True)
             # Define training arguments
             training_args = TrainingArguments(
+                output_dir=os.path.join(self.model_dir, "results"),
                 num_train_epochs=3,
                 per_device_train_batch_size=8,
                 per_device_eval_batch_size=8,
         except Exception as e:
             logger.error(f"Training failed: {str(e)}")
+            logger.error("Full error details:", exc_info=True)
             return {
                 'status': 'error',
                 'error': str(e),