fix bad readme and remove warnings about uninitialized tensors
Browse files- README.md +8 -8
- modeling.py +18 -14
README.md
CHANGED
|
@@ -7,20 +7,20 @@ tags:
|
|
| 7 |
library_name: transformers
|
| 8 |
paper: https://arxiv.org/abs/2201.05601
|
| 9 |
---
|
| 10 |
-
```python
|
| 11 |
-
from transformers import AutoModel, AutoTokenizer
|
| 12 |
-
|
| 13 |
-
model = AutoModel.from_pretrained("mideind/IceBERT-PoS", trust_remote_code=True)
|
| 14 |
-
tokenizer = AutoTokenizer.from_pretrained("mideind/IceBERT-PoS")
|
| 15 |
-
|
| 16 |
## Prediction Methods
|
| 17 |
|
| 18 |
The model provides two prediction methods:
|
| 19 |
|
| 20 |
-
- **`predict_labels_from_text()`**: Returns structured predictions as (category, [attributes]) tuples.
|
| 21 |
- **`predict_ifd_labels_from_text()`**: Returns predictions in IFD (Icelandic Frequency Dictionary) format. Use this for evaluation against MIM-GOLD datasets or when you need compatibility with traditional Icelandic POS taggers.
|
| 22 |
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
sentence = "Ég veit að þú kemur í kvöld til mín ."
|
| 25 |
|
| 26 |
# Get predictions in (category, [attributes]) format
|
|
|
|
| 7 |
library_name: transformers
|
| 8 |
paper: https://arxiv.org/abs/2201.05601
|
| 9 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
## Prediction Methods
|
| 11 |
|
| 12 |
The model provides two prediction methods:
|
| 13 |
|
| 14 |
+
- **`predict_labels_from_text()`**: Returns structured predictions as (category, [attributes]) tuples. These can be slightly more readable and more suitable for some applications.
|
| 15 |
- **`predict_ifd_labels_from_text()`**: Returns predictions in IFD (Icelandic Frequency Dictionary) format. Use this for evaluation against MIM-GOLD datasets or when you need compatibility with traditional Icelandic POS taggers.
|
| 16 |
|
| 17 |
+
```python
|
| 18 |
+
from transformers import AutoModel, AutoTokenizer
|
| 19 |
+
|
| 20 |
+
model = AutoModel.from_pretrained("mideind/IceBERT-PoS", trust_remote_code=True)
|
| 21 |
+
tokenizer = AutoTokenizer.from_pretrained("mideind/IceBERT-PoS")
|
| 22 |
+
|
| 23 |
+
# Example sentence
|
| 24 |
sentence = "Ég veit að þú kemur í kvöld til mín ."
|
| 25 |
|
| 26 |
# Get predictions in (category, [attributes]) format
|
modeling.py
CHANGED
|
@@ -91,22 +91,26 @@ class IceBertPosForTokenClassification(PreTrainedModel):
|
|
| 91 |
"""Setup label mappings using schema methods."""
|
| 92 |
schema = self.config.label_schema
|
| 93 |
|
| 94 |
-
#
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
self.register_buffer("group_mask", schema.get_group_masks(device=device))
|
| 99 |
-
|
| 100 |
-
# Register group attribute indices as buffers
|
| 101 |
-
group_attr_indices = schema.get_group_name_to_group_attr_indices(device=device)
|
| 102 |
-
self.group_name_to_group_attr_indices = {}
|
| 103 |
-
for group_name, indices in group_attr_indices.items():
|
| 104 |
-
buffer_name = f"group_attr_indices_{group_name}"
|
| 105 |
-
self.register_buffer(buffer_name, indices)
|
| 106 |
-
self.group_name_to_group_attr_indices[group_name] = getattr(self, buffer_name)
|
| 107 |
-
|
| 108 |
# Category name to index mapping (regular dict, no device movement needed)
|
| 109 |
self.category_name_to_index = schema.get_category_name_to_index()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
def forward(
|
| 112 |
self,
|
|
|
|
| 91 |
"""Setup label mappings using schema methods."""
|
| 92 |
schema = self.config.label_schema
|
| 93 |
|
| 94 |
+
# Create tensors as regular attributes (not buffers to avoid init warnings)
|
| 95 |
+
self.group_mask = schema.get_group_masks()
|
| 96 |
+
self.group_name_to_group_attr_indices = schema.get_group_name_to_group_attr_indices()
|
| 97 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
# Category name to index mapping (regular dict, no device movement needed)
|
| 99 |
self.category_name_to_index = schema.get_category_name_to_index()
|
| 100 |
+
|
| 101 |
+
def _apply(self, fn):
|
| 102 |
+
"""Override _apply to move our custom tensors with the model."""
|
| 103 |
+
super()._apply(fn)
|
| 104 |
+
|
| 105 |
+
# Move our custom tensors when model.to(device) is called
|
| 106 |
+
if hasattr(self, 'group_mask'):
|
| 107 |
+
self.group_mask = fn(self.group_mask)
|
| 108 |
+
|
| 109 |
+
if hasattr(self, 'group_name_to_group_attr_indices'):
|
| 110 |
+
for group_name, tensor in self.group_name_to_group_attr_indices.items():
|
| 111 |
+
self.group_name_to_group_attr_indices[group_name] = fn(tensor)
|
| 112 |
+
|
| 113 |
+
return self
|
| 114 |
|
| 115 |
def forward(
|
| 116 |
self,
|