Spaces:
Sleeping
Sleeping
reverting the OOP updates. AI wasn't able to take care of module import
Browse files- __init__.py +0 -3
- model.py +15 -3
- utils/__init__.py +0 -4
- utils/dataset.py +0 -25
__init__.py
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Root package initialization.
|
| 3 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
model.py
CHANGED
|
@@ -10,13 +10,11 @@ from torch.optim import AdamW
|
|
| 10 |
from sklearn.preprocessing import LabelEncoder
|
| 11 |
import sys
|
| 12 |
from pathlib import Path
|
|
|
|
| 13 |
|
| 14 |
# Get the directory containing model.py
|
| 15 |
current_dir = Path(__file__).parent
|
| 16 |
|
| 17 |
-
# Import TextDataset directly from the file
|
| 18 |
-
from utils.dataset import TextDataset
|
| 19 |
-
|
| 20 |
logger = logging.getLogger(__name__)
|
| 21 |
|
| 22 |
# Add these debug lines
|
|
@@ -26,6 +24,20 @@ logger.info(f"Current directory: {os.getcwd()}")
|
|
| 26 |
logger.info(f"Directory contents: {os.listdir('.')}")
|
| 27 |
logger.info("=== END DEBUG INFO ===")
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
class BertClassifier(LabelStudioMLBase):
|
| 30 |
def __init__(self, project_id=None, label_config=None, **kwargs):
|
| 31 |
super(BertClassifier, self).__init__(project_id=project_id, label_config=label_config)
|
|
|
|
| 10 |
from sklearn.preprocessing import LabelEncoder
|
| 11 |
import sys
|
| 12 |
from pathlib import Path
|
| 13 |
+
from torch.utils.data import Dataset
|
| 14 |
|
| 15 |
# Get the directory containing model.py
|
| 16 |
current_dir = Path(__file__).parent
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
logger = logging.getLogger(__name__)
|
| 19 |
|
| 20 |
# Add these debug lines
|
|
|
|
| 24 |
logger.info(f"Directory contents: {os.listdir('.')}")
|
| 25 |
logger.info("=== END DEBUG INFO ===")
|
| 26 |
|
| 27 |
+
# Move TextDataset class here
|
| 28 |
+
class TextDataset(Dataset):
|
| 29 |
+
def __init__(self, texts, labels, tokenizer, max_length=128):
|
| 30 |
+
self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_length)
|
| 31 |
+
self.labels = labels
|
| 32 |
+
|
| 33 |
+
def __getitem__(self, idx):
|
| 34 |
+
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
|
| 35 |
+
item['labels'] = torch.tensor(self.labels[idx])
|
| 36 |
+
return item
|
| 37 |
+
|
| 38 |
+
def __len__(self):
|
| 39 |
+
return len(self.labels)
|
| 40 |
+
|
| 41 |
class BertClassifier(LabelStudioMLBase):
|
| 42 |
def __init__(self, project_id=None, label_config=None, **kwargs):
|
| 43 |
super(BertClassifier, self).__init__(project_id=project_id, label_config=label_config)
|
utils/__init__.py
DELETED
|
@@ -1,4 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Utils package initialization.
|
| 3 |
-
Contains dataset utilities for BERT classifier.
|
| 4 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/dataset.py
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
from torch.utils.data import Dataset
|
| 3 |
-
|
| 4 |
-
class TextDataset(Dataset):
|
| 5 |
-
def __init__(self, texts, labels, tokenizer, max_length=128):
|
| 6 |
-
"""
|
| 7 |
-
Initialize dataset for text classification
|
| 8 |
-
Args:
|
| 9 |
-
texts: list of input texts
|
| 10 |
-
labels: list of corresponding labels
|
| 11 |
-
tokenizer: HuggingFace tokenizer
|
| 12 |
-
max_length: maximum sequence length
|
| 13 |
-
"""
|
| 14 |
-
self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_length)
|
| 15 |
-
self.labels = labels
|
| 16 |
-
|
| 17 |
-
def __getitem__(self, idx):
|
| 18 |
-
"""Return a single training example"""
|
| 19 |
-
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
|
| 20 |
-
item['labels'] = torch.tensor(self.labels[idx])
|
| 21 |
-
return item
|
| 22 |
-
|
| 23 |
-
def __len__(self):
|
| 24 |
-
"""Return the number of examples in dataset"""
|
| 25 |
-
return len(self.labels)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|