Spaces:

CreatorIQ-org
/

rlhf_docker

Sleeping

App Files Files Community

b2u commited on Nov 29, 2024

Commit

3178a98

1 Parent(s): fa5ac26

keep debugging

Browse files

Files changed (1) hide show

model.py +15 -7

model.py CHANGED Viewed

@@ -76,17 +76,17 @@ class BertClassifier(LabelStudioMLBase):
         logger.info(f"Initializing BertClassifier with project_id: {project_id}")
         logger.info(f"Label config: {label_config}")
         # Initialize Label Studio client
         self.label_studio_client = self.connect_to_label_studio()
         self.label_encoder = LabelEncoder()
-        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         self.instruction_template = os.getenv('MODEL_INSTRUCTIONS', '{text}')
-        # Define model directory
-        self.model_dir = os.path.join(os.path.dirname(__file__), 'model')
-        os.makedirs(self.model_dir, exist_ok=True)
         # Define your categories
         self.categories = [
             'affiliate_classification', 'brand', 'business_and_career',
@@ -166,7 +166,7 @@ class BertClassifier(LabelStudioMLBase):
                         },
                         'score': 0.5  # Confidence score between 0 and 1
                     }],
-                    'model_version': self.model_dir
                 })
         except Exception as e:
@@ -175,7 +175,7 @@ class BertClassifier(LabelStudioMLBase):
             # Return empty predictions in case of error
             predictions = [{
                 'result': [],
-                'model_version': self.model_dir
             } for _ in tasks]
         return predictions
@@ -206,6 +206,10 @@ class BertClassifier(LabelStudioMLBase):
         logger.info('Starting model training...')
         try:
             # Debug completions
             logger.info("=== DEBUG COMPLETIONS START ===")
             logger.info(f"Type of completions: {type(completions)}")
@@ -233,6 +237,10 @@ class BertClassifier(LabelStudioMLBase):
                     # Get annotations
                     annotations = task.get('annotations', [])
                     if annotations:
                         logger.info(f"Found {len(annotations)} annotations for task {task.get('id')}")
                         logger.info(f"Annotation content: {json.dumps(annotations[0], indent=2)}")

         logger.info(f"Initializing BertClassifier with project_id: {project_id}")
         logger.info(f"Label config: {label_config}")
+        # Initialize basic attributes
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.version = 'v0.0.1'  # Define version explicitly
+        self.model_dir = f'BertClassifier-{self.version}'  # Use versioned model directory
         # Initialize Label Studio client
         self.label_studio_client = self.connect_to_label_studio()
         self.label_encoder = LabelEncoder()
         self.instruction_template = os.getenv('MODEL_INSTRUCTIONS', '{text}')
         # Define your categories
         self.categories = [
             'affiliate_classification', 'brand', 'business_and_career',
                         },
                         'score': 0.5  # Confidence score between 0 and 1
                     }],
+                    'model_version': self.version
                 })
         except Exception as e:
             # Return empty predictions in case of error
             predictions = [{
                 'result': [],
+                'model_version': self.version
             } for _ in tasks]
         return predictions
         logger.info('Starting model training...')
         try:
+            # Get use_ground_truth parameter
+            use_ground_truth = kwargs.get('use_ground_truth', True)
+            logger.info(f"Training with use_ground_truth={use_ground_truth}")
             # Debug completions
             logger.info("=== DEBUG COMPLETIONS START ===")
             logger.info(f"Type of completions: {type(completions)}")
                     # Get annotations
                     annotations = task.get('annotations', [])
+                    if use_ground_truth:
+                        # Also include ground truth annotations
+                        annotations.extend(task.get('ground_truth', []))
                     if annotations:
                         logger.info(f"Found {len(annotations)} annotations for task {task.get('id')}")
                         logger.info(f"Annotation content: {json.dumps(annotations[0], indent=2)}")