geologist387 commited on
Commit
eef0ae4
·
1 Parent(s): 8e5f345

Added an onnx model

Browse files
.gitattributes CHANGED
@@ -14,6 +14,7 @@
14
  *.npy filter=lfs diff=lfs merge=lfs -text
15
  *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
@@ -32,4 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
14
  *.npy filter=lfs diff=lfs merge=lfs -text
15
  *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.onnx.data filter=lfs diff=lfs merge=lfs -text
18
  *.ot filter=lfs diff=lfs merge=lfs -text
19
  *.parquet filter=lfs diff=lfs merge=lfs -text
20
  *.pb filter=lfs diff=lfs merge=lfs -text
 
33
  *.xz filter=lfs diff=lfs merge=lfs -text
34
  *.zip filter=lfs diff=lfs merge=lfs -text
35
  *.zst filter=lfs diff=lfs merge=lfs -text
36
+ *.jpg filter=lfs diff=lfs merge=lfs -text
37
  *tfevents* filter=lfs diff=lfs merge=lfs -text
38
+ onnx/frida-onnx/FRIDA.onnx.data filter=lfs diff=lfs merge=lfs -text
39
+ onnx/frida-onnx/FRIDA.onnx filter=lfs diff=lfs merge=lfs -text
40
+ img.jpg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+ .idea
12
+ uv.lock
13
+ onnx_to_trt.py
README.md CHANGED
@@ -1,3 +1,65 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - ru
5
+ - en
6
+ tags:
7
+ - mteb
8
+ - transformers
9
+ - sentence-transformers
10
+ base_model:
11
+ - ai-forever/FRIDA
12
+ pipeline_tag: feature-extraction
13
+ ---
14
+
15
+ # FRIDA transformed to onnx
16
+ Link to an [original](https://huggingface.co/ai-forever/FRIDA) repository for this model.
17
+ This onnx version has batching support
18
+
19
+ ### Transform FRIDA to onnx and tensorrt
20
+ This is a repository that contains FRIDA model in onnx (tensorrt upcoming) format.
21
+ Python transformation scripts for this model are here too. onnx_to_trt.py is untested as of now
22
+
23
+ # Model Card for FRIDA ONNX/TRT
24
+
25
+ <figure>
26
+ <img src="img.jpg">
27
+ </figure>
28
+
29
+ FRIDA is a full-scale finetuned general text embedding model inspired by denoising architecture based on T5. The model is based on the encoder part of [FRED-T5](https://arxiv.org/abs/2309.10931) model and continues research of text embedding models ([ruMTEB](https://arxiv.org/abs/2408.12503), [ru-en-RoSBERTa](https://huggingface.co/ai-forever/ru-en-RoSBERTa)). It has been pre-trained on a Russian-English dataset and fine-tuned for improved performance on the target task.
30
+
31
+ For more model details please refer to this [article](https://habr.com/ru/companies/sberdevices/articles/909924/) (RU).
32
+
33
+ ## Usage
34
+
35
+ The model can be used as is with prefixes. It is recommended to use CLS pooling. The choice of prefix and pooling depends on the task.
36
+
37
+ We use the following basic rules to choose a prefix:
38
+ - `"search_query: "` and `"search_document: "` prefixes are for answer or relevant paragraph retrieval
39
+ - `"paraphrase: "` prefix is for symmetric paraphrasing related tasks (STS, paraphrase mining, deduplication)
40
+ - `"categorize: "` prefix is for asymmetric matching of document title and body (e.g. news, scientific papers, social posts)
41
+ - `"categorize_sentiment: "` prefix is for any tasks that rely on sentiment features (e.g. hate, toxic, emotion)
42
+ - `"categorize_topic: "` prefix is intended for tasks where you need to group texts by topic
43
+ - `"categorize_entailment: "` prefix is for textual entailment task (NLI)
44
+
45
+ To better tailor the model to your needs, you can fine-tune it with relevant high-quality Russian and English datasets.
46
+
47
+ Below are examples of texts encoding using the Transformers and SentenceTransformers libraries.
48
+
49
+ ## Authors
50
+ + [SaluteDevices](https://sberdevices.ru/) AI for B2C RnD Team.
51
+ + Artem Snegirev: [HF profile](https://huggingface.co/artemsnegirev), [Github](https://github.com/artemsnegirev);
52
+ + Anna Maksimova [HF profile](https://huggingface.co/anpalmak);
53
+ + Aleksandr Abramov: [HF profile](https://huggingface.co/Andrilko), [Github](https://github.com/Ab1992ao), [Kaggle Competitions Master](https://www.kaggle.com/andrilko)
54
+
55
+
56
+ ## Citation
57
+
58
+ ```
59
+ @misc{TODO
60
+ }
61
+ ```
62
+
63
+ ## Limitations
64
+
65
+ The model is designed to process texts in Russian, the quality in English is unknown. Maximum input text length is limited to 512 tokens.
check_input_dims.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import onnxruntime as ort
2
+
3
+ session = ort.InferenceSession("onnx/frida-onnx/FRIDA.onnx")
4
+
5
+ for i, inp in enumerate(session.get_inputs()):
6
+ print(f"Input {i}:")
7
+ print(f" Name: {inp.name}")
8
+ print(f" Type: {inp.type}")
9
+ print(f" Shape: {inp.shape}")
10
+ print(f" Is dynamic? {'Yes' if -1 in inp.shape else 'No'}")
11
+
12
+ for i, out in enumerate(session.get_outputs()):
13
+ print(f"Output {i}:")
14
+ print(f" Name: {out.name}")
15
+ print(f" Type: {out.type}")
16
+ print(f" Shape: {out.shape}")
17
+ print(f" Is dynamic? {'Yes' if -1 in out.shape else 'No'}")
check_inputs.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import onnx
2
+
3
+ model = onnx.load("onnx/frida-onnx/FRIDA.onnx")
4
+ for inp in model.graph.input:
5
+ shape = [dim.dim_value if dim.dim_value > 0 else str(dim.dim_param) for dim in inp.type.tensor_type.shape.dim]
6
+ print(f"Input '{inp.name}': {shape}")
img.jpg ADDED

Git LFS Details

  • SHA256: e65e89ee08ce90245957919e1ae2ff2376ecede4ef48da4e03a9f2df48f0badf
  • Pointer size: 131 Bytes
  • Size of remote file: 107 kB
onnx/frida-onnx/FRIDA.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeaf461a810600c0489632bb3582b384bc531121cc3d7866a12ce0bcd6429461
3
+ size 2514640
onnx/frida-onnx/FRIDA.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c01127d7452d91a0a5e596d292db84205f9c5672ac8ca43a5bab28f72b712e7
3
+ size 3293628416
onnx/frida-onnx/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
onnx/frida-onnx/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
onnx/frida-onnx/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
onnx/frida-onnx/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<pad>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "extra_special_tokens": {},
51
+ "mask_token": "<mask>",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "repo_type": "model",
55
+ "sep_token": "</s>",
56
+ "tokenizer_class": "RobertaTokenizer",
57
+ "trim_offsets": true,
58
+ "unk_token": "<unk>"
59
+ }
onnx/frida-onnx/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "frida-transformed"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13, <3.14"
7
+ dependencies = [
8
+ 'onnx == 1.20.0',
9
+ 'onnxruntime == 1.23.2',
10
+ 'onnxscript == 0.5.7',
11
+ 'onnx-safetensors == 1.2.0',
12
+ 'torch == 2.9.1',
13
+ 'torchvision == 0.24.1',
14
+ 'transformers == 4.57.3',
15
+ 'tensorrt == 10.14.1.48.post1',
16
+ 'pycuda == 2025.1.2'
17
+ ]
18
+
19
+ [tool.uv.workspace]
20
+ members = [
21
+ "frida-transformed",
22
+ ]
safetensors_to_onnx.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch.export import Dim
3
+ from transformers import T5EncoderModel, AutoTokenizer
4
+ from pathlib import Path
5
+ import onnxruntime as ort
6
+ import numpy as np
7
+
8
+
9
+ # MODEL_SOURCE_ID = "ai-forever/FRIDA"
10
+ MODEL_SOURCE_ID = "../FRIDA"
11
+ MODEL_TARGET_PATH = Path("onnx/frida-onnx")
12
+ ONNX_FILE_NAME = "FRIDA.onnx"
13
+
14
+ print("="*50)
15
+ print(f"Подготовка директории: {MODEL_TARGET_PATH}")
16
+ MODEL_TARGET_PATH.mkdir(parents=True, exist_ok=True)
17
+
18
+ # 1. Загружаем модель и токенизатор
19
+ print(f"Загрузка модели и токенизатора из '{MODEL_SOURCE_ID}'...")
20
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_SOURCE_ID, repo_type="model")
21
+ model = T5EncoderModel.from_pretrained(MODEL_SOURCE_ID)
22
+ model.eval()
23
+
24
+ # 2. Создаем тестовые входы
25
+ print("Создание тестовых входных данных...")
26
+ test_texts = [
27
+ "paraphrase: В Ярославской области разрешили работу бань, но без посетителей",
28
+ "search_query: Сколько программистов нужно, чтобы вкрутить лампочку?",
29
+ "categorize_entailment: Женщину доставили в больницу, за ее жизнь сейчас борются врачи."
30
+ ]
31
+
32
+ dummy_inputs = tokenizer(
33
+ test_texts,
34
+ max_length=512,
35
+ padding="max_length",
36
+ truncation=True,
37
+ return_tensors="pt"
38
+ )
39
+
40
+ # 3. Экспорт с двумя входами
41
+ onnx_model_path = MODEL_TARGET_PATH / ONNX_FILE_NAME
42
+ print(f"Экспорт модели в ONNX формат: {onnx_model_path}")
43
+
44
+ # For dynamic_shapes
45
+ batch_size = Dim("batch_size", min=1, max=64) # Optional: add min/max constraints
46
+ sequence_length = Dim("sequence_length", min=2, max=512)
47
+
48
+ # dynamic_shapes = {
49
+ # "input_ids": {0: batch_size, 1: sequence_length},
50
+ # "attention_mask": {0: batch_size, 1: sequence_length},
51
+ # "last_hidden_state": {0: batch_size, 1: sequence_length}
52
+ # }
53
+
54
+ # In case of issues use dynamo_export instead of dynamo=True
55
+ torch.onnx.export(
56
+ model,
57
+ (dummy_inputs["input_ids"], dummy_inputs["attention_mask"]),
58
+ onnx_model_path.as_posix(),
59
+ input_names=["input_ids", "attention_mask"],
60
+ output_names=["last_hidden_state"],
61
+ opset_version=20, # Maybe update
62
+ dynamic_shapes = {
63
+ "input_ids": {0: batch_size, 1: sequence_length},
64
+ "attention_mask": {0: batch_size, 1: sequence_length}
65
+ },
66
+ verbose=False,
67
+ dynamo=True
68
+ )
69
+
70
+ # 4. Сохраняем токенизатор
71
+ print(f"Сохранение токенизатора в '{MODEL_TARGET_PATH}'...")
72
+ tokenizer.save_pretrained(MODEL_TARGET_PATH)
73
+
74
+ print("Конвертация завершена успешно!")
75
+
76
+ # 5. Тестирование и сравнение результатов
77
+ print("\n" + "="*50)
78
+ print("ТЕСТИРОВАНИЕ РЕЗУЛЬТАТОВ")
79
+
80
+ def cls_pooling(hidden_state, attention_mask):
81
+ """CLS pooling для получения эмбеддингов"""
82
+ return hidden_state[:, 0]
83
+
84
+ def normalize_embeddings(embeddings):
85
+ """Нормализация эмбеддингов"""
86
+ return embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
87
+
88
+ # Тест с оригинальной моделью
89
+ print("Тестирование оригинальной модели...")
90
+ with torch.no_grad():
91
+ original_inputs = tokenizer(
92
+ test_texts,
93
+ max_length=512,
94
+ padding=True,
95
+ truncation=True,
96
+ return_tensors="pt"
97
+ )
98
+ original_outputs = model(**original_inputs)
99
+ original_embeddings = cls_pooling(
100
+ original_outputs.last_hidden_state,
101
+ original_inputs["attention_mask"]
102
+ )
103
+ original_embeddings = torch.nn.functional.normalize(original_embeddings, p=2, dim=1)
104
+
105
+ # Тест с ONNX моделью
106
+ print("Тестирование ONNX модели...")
107
+ onnx_session = ort.InferenceSession(onnx_model_path.as_posix())
108
+
109
+ onnx_inputs = tokenizer(
110
+ test_texts,
111
+ max_length=512,
112
+ padding=True,
113
+ truncation=True,
114
+ return_tensors="np"
115
+ )
116
+
117
+
118
+ onnx_inputs_int64 = {
119
+ "input_ids": onnx_inputs["input_ids"].astype(np.int64),
120
+ "attention_mask": onnx_inputs["attention_mask"].astype(np.int64)
121
+ }
122
+
123
+ onnx_outputs = onnx_session.run(None, onnx_inputs_int64)[0]
124
+
125
+ onnx_embeddings = onnx_outputs[:, 0]
126
+ onnx_embeddings = normalize_embeddings(onnx_embeddings)
127
+
128
+ cosine_similarity = np.sum(original_embeddings.numpy() * onnx_embeddings, axis=1)
129
+ print(f"\nCosine similarity между оригинальной и ONNX моделью:")
130
+ for i, sim in enumerate(cosine_similarity):
131
+ print(f" Текст {i+1}: {sim:.6f}")
132
+ print(f"Средняя схожесть: {np.mean(cosine_similarity):.6f}")
133
+
134
+ print("\n" + "="*50)
135
+ print("ГОТОВО! Модель успешно конвертирована и протестирована.")
136
+ print(f"Путь к модели: {MODEL_TARGET_PATH.resolve()}")