Spaces:

CreatorIQ-org
/

rlhf_docker

Sleeping

b2u commited on Nov 29, 2024

Commit

6de324b

1 Parent(s): f234852

reverting the OOP updates. AI wasn't able to take care of module import

Files changed (4) hide show

__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-"""
-Root package initialization.
-"""

model.py CHANGED Viewed

@@ -10,13 +10,11 @@ from torch.optim import AdamW
 from sklearn.preprocessing import LabelEncoder
 import sys
 from pathlib import Path
 # Get the directory containing model.py
 current_dir = Path(__file__).parent
-# Import TextDataset directly from the file
-from utils.dataset import TextDataset
 logger = logging.getLogger(__name__)
 # Add these debug lines
@@ -26,6 +24,20 @@ logger.info(f"Current directory: {os.getcwd()}")
 logger.info(f"Directory contents: {os.listdir('.')}")
 logger.info("=== END DEBUG INFO ===")
 class BertClassifier(LabelStudioMLBase):
     def __init__(self, project_id=None, label_config=None, **kwargs):
         super(BertClassifier, self).__init__(project_id=project_id, label_config=label_config)

 from sklearn.preprocessing import LabelEncoder
 import sys
 from pathlib import Path
+from torch.utils.data import Dataset
 # Get the directory containing model.py
 current_dir = Path(__file__).parent
 logger = logging.getLogger(__name__)
 # Add these debug lines
 logger.info(f"Directory contents: {os.listdir('.')}")
 logger.info("=== END DEBUG INFO ===")
+# Move TextDataset class here
+class TextDataset(Dataset):
+    def __init__(self, texts, labels, tokenizer, max_length=128):
+        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_length)
+        self.labels = labels
+    def __getitem__(self, idx):
+        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
+        item['labels'] = torch.tensor(self.labels[idx])
+        return item
+    def __len__(self):
+        return len(self.labels)
 class BertClassifier(LabelStudioMLBase):
     def __init__(self, project_id=None, label_config=None, **kwargs):
         super(BertClassifier, self).__init__(project_id=project_id, label_config=label_config)

utils/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-"""
-Utils package initialization.
-Contains dataset utilities for BERT classifier.
-"""

utils/dataset.py DELETED Viewed

@@ -1,25 +0,0 @@
-import torch
-from torch.utils.data import Dataset
-class TextDataset(Dataset):
-    def __init__(self, texts, labels, tokenizer, max_length=128):
-        """
-        Initialize dataset for text classification
-        Args:
-            texts: list of input texts
-            labels: list of corresponding labels
-            tokenizer: HuggingFace tokenizer
-            max_length: maximum sequence length
-        """
-        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_length)
-        self.labels = labels
-    def __getitem__(self, idx):
-        """Return a single training example"""
-        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
-        item['labels'] = torch.tensor(self.labels[idx])
-        return item
-    def __len__(self):
-        """Return the number of examples in dataset"""
-        return len(self.labels)