Initial upload of FreeChunk model with custom code
Browse files- config.json +1 -1
- encoder.py +8 -3
- freechunker.py +1 -1
config.json
CHANGED
|
@@ -27,6 +27,6 @@
|
|
| 27 |
"max_power": 4,
|
| 28 |
"auto_map": {
|
| 29 |
"AutoConfig": "configuration_freechunker.FreeChunkerConfig",
|
| 30 |
-
"AutoModel": "
|
| 31 |
}
|
| 32 |
}
|
|
|
|
| 27 |
"max_power": 4,
|
| 28 |
"auto_map": {
|
| 29 |
"AutoConfig": "configuration_freechunker.FreeChunkerConfig",
|
| 30 |
+
"AutoModel": "encoder.UnifiedEncoder"
|
| 31 |
}
|
| 32 |
}
|
encoder.py
CHANGED
|
@@ -9,9 +9,10 @@ import numpy as np
|
|
| 9 |
import pickle
|
| 10 |
import os
|
| 11 |
from typing import List, Tuple, Union
|
| 12 |
-
from sentenizer import Sentenceizer
|
| 13 |
-
from freechunker import FreeChunkerModel
|
| 14 |
-
from aggregator import TextAggregator
|
|
|
|
| 15 |
|
| 16 |
class UnifiedEncoder:
|
| 17 |
"""
|
|
@@ -80,6 +81,10 @@ class UnifiedEncoder:
|
|
| 80 |
|
| 81 |
return cls(model_name=model_name, model_name_or_path=model_name_or_path, **kwargs)
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
def encode(self, text: str, show_progress: bool = True) -> Tuple[List[str], np.ndarray, List[List[str]]]:
|
| 84 |
"""
|
| 85 |
Split text and encode, return results grouped by shift_matrix
|
|
|
|
| 9 |
import pickle
|
| 10 |
import os
|
| 11 |
from typing import List, Tuple, Union
|
| 12 |
+
from .sentenizer import Sentenceizer
|
| 13 |
+
from .freechunker import FreeChunkerModel
|
| 14 |
+
from .aggregator import TextAggregator
|
| 15 |
+
from . import utils
|
| 16 |
|
| 17 |
class UnifiedEncoder:
|
| 18 |
"""
|
|
|
|
| 81 |
|
| 82 |
return cls(model_name=model_name, model_name_or_path=model_name_or_path, **kwargs)
|
| 83 |
|
| 84 |
+
@classmethod
|
| 85 |
+
def register_for_auto_class(cls, auto_class="AutoModel"):
|
| 86 |
+
return
|
| 87 |
+
|
| 88 |
def encode(self, text: str, show_progress: bool = True) -> Tuple[List[str], np.ndarray, List[List[str]]]:
|
| 89 |
"""
|
| 90 |
Split text and encode, return results grouped by shift_matrix
|
freechunker.py
CHANGED
|
@@ -35,7 +35,7 @@ from transformers.utils import (
|
|
| 35 |
get_torch_version,
|
| 36 |
logging
|
| 37 |
)
|
| 38 |
-
from
|
| 39 |
|
| 40 |
|
| 41 |
logger = logging.get_logger(__name__)
|
|
|
|
| 35 |
get_torch_version,
|
| 36 |
logging
|
| 37 |
)
|
| 38 |
+
from .configuration_freechunker import FreeChunkerConfig
|
| 39 |
|
| 40 |
|
| 41 |
logger = logging.get_logger(__name__)
|