Sadjad Alikhani
commited on
Delete tokenizer.py
Browse files- tokenizer.py +0 -33
tokenizer.py
DELETED
|
@@ -1,33 +0,0 @@
|
|
| 1 |
-
# -*- coding: utf-8 -*-
|
| 2 |
-
"""
|
| 3 |
-
Created on Fri Sep 13 19:15:23 2024
|
| 4 |
-
|
| 5 |
-
@author: salikha4
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
from transformers import PreTrainedTokenizer
|
| 9 |
-
from input_preprocess import gen_tokens
|
| 10 |
-
|
| 11 |
-
class WirelessChannelTokenizer(PreTrainedTokenizer):
|
| 12 |
-
"""
|
| 13 |
-
A Hugging Face-compatible tokenizer for wireless channels.
|
| 14 |
-
It performs segmentation and masking for wireless channel data.
|
| 15 |
-
"""
|
| 16 |
-
def __init__(self, patch_size=16, max_len=129, **kwargs):
|
| 17 |
-
super().__init__(**kwargs)
|
| 18 |
-
self.patch_size = patch_size
|
| 19 |
-
self.max_len = max_len
|
| 20 |
-
|
| 21 |
-
def preprocess_channels(self, scenario_idxs):
|
| 22 |
-
# Call gen_tokens() for preprocessing the wireless channel data
|
| 23 |
-
preprocessed_data, sequence_length, element_length = gen_tokens(
|
| 24 |
-
scenario_idxs, patch_gen=True, patch_size=self.patch_size,
|
| 25 |
-
gen_deepMIMO_data=True, gen_raw=True, save_data=False
|
| 26 |
-
)
|
| 27 |
-
return preprocessed_data
|
| 28 |
-
|
| 29 |
-
def __call__(self, scenario_idxs):
|
| 30 |
-
return self.preprocess_channels(scenario_idxs)
|
| 31 |
-
|
| 32 |
-
def save_pretrained(self, save_directory):
|
| 33 |
-
super().save_pretrained(save_directory)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|