b2u commited on
Commit
6de324b
·
1 Parent(s): f234852

reverting the OOP updates. AI wasn't able to take care of module import

Browse files
Files changed (4) hide show
  1. __init__.py +0 -3
  2. model.py +15 -3
  3. utils/__init__.py +0 -4
  4. utils/dataset.py +0 -25
__init__.py DELETED
@@ -1,3 +0,0 @@
1
- """
2
- Root package initialization.
3
- """
 
 
 
 
model.py CHANGED
@@ -10,13 +10,11 @@ from torch.optim import AdamW
10
  from sklearn.preprocessing import LabelEncoder
11
  import sys
12
  from pathlib import Path
 
13
 
14
  # Get the directory containing model.py
15
  current_dir = Path(__file__).parent
16
 
17
- # Import TextDataset directly from the file
18
- from utils.dataset import TextDataset
19
-
20
  logger = logging.getLogger(__name__)
21
 
22
  # Add these debug lines
@@ -26,6 +24,20 @@ logger.info(f"Current directory: {os.getcwd()}")
26
  logger.info(f"Directory contents: {os.listdir('.')}")
27
  logger.info("=== END DEBUG INFO ===")
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  class BertClassifier(LabelStudioMLBase):
30
  def __init__(self, project_id=None, label_config=None, **kwargs):
31
  super(BertClassifier, self).__init__(project_id=project_id, label_config=label_config)
 
10
  from sklearn.preprocessing import LabelEncoder
11
  import sys
12
  from pathlib import Path
13
+ from torch.utils.data import Dataset
14
 
15
  # Get the directory containing model.py
16
  current_dir = Path(__file__).parent
17
 
 
 
 
18
  logger = logging.getLogger(__name__)
19
 
20
  # Add these debug lines
 
24
  logger.info(f"Directory contents: {os.listdir('.')}")
25
  logger.info("=== END DEBUG INFO ===")
26
 
27
+ # Move TextDataset class here
28
+ class TextDataset(Dataset):
29
+ def __init__(self, texts, labels, tokenizer, max_length=128):
30
+ self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_length)
31
+ self.labels = labels
32
+
33
+ def __getitem__(self, idx):
34
+ item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
35
+ item['labels'] = torch.tensor(self.labels[idx])
36
+ return item
37
+
38
+ def __len__(self):
39
+ return len(self.labels)
40
+
41
  class BertClassifier(LabelStudioMLBase):
42
  def __init__(self, project_id=None, label_config=None, **kwargs):
43
  super(BertClassifier, self).__init__(project_id=project_id, label_config=label_config)
utils/__init__.py DELETED
@@ -1,4 +0,0 @@
1
- """
2
- Utils package initialization.
3
- Contains dataset utilities for BERT classifier.
4
- """
 
 
 
 
 
utils/dataset.py DELETED
@@ -1,25 +0,0 @@
1
- import torch
2
- from torch.utils.data import Dataset
3
-
4
- class TextDataset(Dataset):
5
- def __init__(self, texts, labels, tokenizer, max_length=128):
6
- """
7
- Initialize dataset for text classification
8
- Args:
9
- texts: list of input texts
10
- labels: list of corresponding labels
11
- tokenizer: HuggingFace tokenizer
12
- max_length: maximum sequence length
13
- """
14
- self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_length)
15
- self.labels = labels
16
-
17
- def __getitem__(self, idx):
18
- """Return a single training example"""
19
- item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
20
- item['labels'] = torch.tensor(self.labels[idx])
21
- return item
22
-
23
- def __len__(self):
24
- """Return the number of examples in dataset"""
25
- return len(self.labels)