Commit ·
ac03f85
1
Parent(s): d67226c
add model
Browse files- .gitattributes +5 -0
- 1_Pooling/config.json +7 -0
- README.md +126 -0
- config.json +32 -0
- config_sentence_transformers.json +7 -0
- modules.json +14 -0
- pytorch_model.bin +3 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +7 -0
- tokenizer.json +0 -0
- tokenizer_config.json +15 -0
- training_assets/2_train_sts_cross_bm25.py +43 -0
- training_assets/cross_silver_scores_v3.pkl +3 -0
- training_assets/gold_eval_dataloader.pkl +3 -0
- training_assets/gold_sample_index.txt +825 -0
- training_assets/gold_train_dataloader.pkl +3 -0
- training_assets/request_solr.py +64 -0
- training_assets/silver_cross_samples.pkl +3 -0
- training_assets/silver_data.pkl +3 -0
- training_assets/train_augmented_bert.ipynb +0 -0
- vocab.txt +0 -0
.gitattributes
CHANGED
|
@@ -25,3 +25,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
| 27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
| 27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
training_assets/cross_silver_scores_v3.pkl filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
training_assets/silver_cross_samples.pkl filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
training_assets/silver_data.pkl filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
training_assets/gold_eval_dataloader.pkl filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
training_assets/gold_train_dataloader.pkl filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false
|
| 7 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
pipeline_tag: sentence-similarity
|
| 3 |
+
tags:
|
| 4 |
+
- sentence-transformers
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- sentence-similarity
|
| 7 |
+
- transformers
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# {MODEL_NAME}
|
| 12 |
+
|
| 13 |
+
This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.
|
| 14 |
+
|
| 15 |
+
<!--- Describe your model here -->
|
| 16 |
+
|
| 17 |
+
## Usage (Sentence-Transformers)
|
| 18 |
+
|
| 19 |
+
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
|
| 20 |
+
|
| 21 |
+
```
|
| 22 |
+
pip install -U sentence-transformers
|
| 23 |
+
```
|
| 24 |
+
|
| 25 |
+
Then you can use the model like this:
|
| 26 |
+
|
| 27 |
+
```python
|
| 28 |
+
from sentence_transformers import SentenceTransformer
|
| 29 |
+
sentences = ["This is an example sentence", "Each sentence is converted"]
|
| 30 |
+
|
| 31 |
+
model = SentenceTransformer('{MODEL_NAME}')
|
| 32 |
+
embeddings = model.encode(sentences)
|
| 33 |
+
print(embeddings)
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
## Usage (HuggingFace Transformers)
|
| 39 |
+
Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
|
| 40 |
+
|
| 41 |
+
```python
|
| 42 |
+
from transformers import AutoTokenizer, AutoModel
|
| 43 |
+
import torch
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
#Mean Pooling - Take attention mask into account for correct averaging
|
| 47 |
+
def mean_pooling(model_output, attention_mask):
|
| 48 |
+
token_embeddings = model_output[0] #First element of model_output contains all token embeddings
|
| 49 |
+
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
| 50 |
+
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
# Sentences we want sentence embeddings for
|
| 54 |
+
sentences = ['This is an example sentence', 'Each sentence is converted']
|
| 55 |
+
|
| 56 |
+
# Load model from HuggingFace Hub
|
| 57 |
+
tokenizer = AutoTokenizer.from_pretrained('{MODEL_NAME}')
|
| 58 |
+
model = AutoModel.from_pretrained('{MODEL_NAME}')
|
| 59 |
+
|
| 60 |
+
# Tokenize sentences
|
| 61 |
+
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
|
| 62 |
+
|
| 63 |
+
# Compute token embeddings
|
| 64 |
+
with torch.no_grad():
|
| 65 |
+
model_output = model(**encoded_input)
|
| 66 |
+
|
| 67 |
+
# Perform pooling. In this case, mean pooling.
|
| 68 |
+
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
| 69 |
+
|
| 70 |
+
print("Sentence embeddings:")
|
| 71 |
+
print(sentence_embeddings)
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
## Evaluation Results
|
| 77 |
+
|
| 78 |
+
<!--- Describe how your model was evaluated -->
|
| 79 |
+
|
| 80 |
+
For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name={MODEL_NAME})
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
## Training
|
| 84 |
+
The model was trained with the parameters:
|
| 85 |
+
|
| 86 |
+
**DataLoader**:
|
| 87 |
+
|
| 88 |
+
`torch.utils.data.dataloader.DataLoader` of length 80805 with parameters:
|
| 89 |
+
```
|
| 90 |
+
{'batch_size': 8, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
**Loss**:
|
| 94 |
+
|
| 95 |
+
`sentence_transformers.losses.CosineSimilarityLoss.CosineSimilarityLoss`
|
| 96 |
+
|
| 97 |
+
Parameters of the fit()-Method:
|
| 98 |
+
```
|
| 99 |
+
{
|
| 100 |
+
"epochs": 3,
|
| 101 |
+
"evaluation_steps": 10000,
|
| 102 |
+
"evaluator": "__main__.EmbeddingSimilarityEvaluator",
|
| 103 |
+
"max_grad_norm": 1,
|
| 104 |
+
"optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
|
| 105 |
+
"optimizer_params": {
|
| 106 |
+
"lr": 2e-05
|
| 107 |
+
},
|
| 108 |
+
"scheduler": "WarmupLinear",
|
| 109 |
+
"steps_per_epoch": null,
|
| 110 |
+
"warmup_steps": 24242,
|
| 111 |
+
"weight_decay": 0.01
|
| 112 |
+
}
|
| 113 |
+
```
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
## Full Model Architecture
|
| 117 |
+
```
|
| 118 |
+
SentenceTransformer(
|
| 119 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
|
| 120 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
|
| 121 |
+
)
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
## Citing & Authors
|
| 125 |
+
|
| 126 |
+
<!--- Describe where people can find more information -->
|
config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/content/drive/MyDrive/Colab_Notebooks/Anatel-gdrive/anatel_train_sts_cross_bm25/augmented-bert-portuguese-anatel-last-train/",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertModel"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"directionality": "bidi",
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-12,
|
| 15 |
+
"max_position_embeddings": 512,
|
| 16 |
+
"model_type": "bert",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"output_past": true,
|
| 20 |
+
"pad_token_id": 0,
|
| 21 |
+
"pooler_fc_size": 768,
|
| 22 |
+
"pooler_num_attention_heads": 12,
|
| 23 |
+
"pooler_num_fc_layers": 3,
|
| 24 |
+
"pooler_size_per_head": 128,
|
| 25 |
+
"pooler_type": "first_token_transform",
|
| 26 |
+
"position_embedding_type": "absolute",
|
| 27 |
+
"torch_dtype": "float32",
|
| 28 |
+
"transformers_version": "4.21.0",
|
| 29 |
+
"type_vocab_size": 2,
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"vocab_size": 29794
|
| 32 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "2.2.2",
|
| 4 |
+
"transformers": "4.21.0",
|
| 5 |
+
"pytorch": "1.12.0+cu113"
|
| 6 |
+
}
|
| 7 |
+
}
|
modules.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
}
|
| 14 |
+
]
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4c080e8cd37d8c8cb7b923fd7e780bfb4ad3ea20ab40edc4083f8124c8e29bd
|
| 3 |
+
size 435761969
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"do_basic_tokenize": true,
|
| 4 |
+
"do_lower_case": false,
|
| 5 |
+
"mask_token": "[MASK]",
|
| 6 |
+
"name_or_path": "/content/drive/MyDrive/Colab_Notebooks/Anatel-gdrive/anatel_train_sts_cross_bm25/augmented-bert-portuguese-anatel-last-train/",
|
| 7 |
+
"never_split": null,
|
| 8 |
+
"pad_token": "[PAD]",
|
| 9 |
+
"sep_token": "[SEP]",
|
| 10 |
+
"special_tokens_map_file": "/root/.cache/huggingface/transformers/eecc45187d085a1169eed91017d358cc0e9cbdd5dc236bcd710059dbf0a2f816.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d",
|
| 11 |
+
"strip_accents": null,
|
| 12 |
+
"tokenize_chinese_chars": true,
|
| 13 |
+
"tokenizer_class": "BertTokenizer",
|
| 14 |
+
"unk_token": "[UNK]"
|
| 15 |
+
}
|
training_assets/2_train_sts_cross_bm25.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#%%
|
| 2 |
+
from tqdm import tqdm
|
| 3 |
+
from request_solr import SilverDataset
|
| 4 |
+
from sentence_transformers.cross_encoder import CrossEncoder
|
| 5 |
+
import joblib
|
| 6 |
+
from solr_query_params import params
|
| 7 |
+
|
| 8 |
+
############################################################################
|
| 9 |
+
#
|
| 10 |
+
# https://github.com/UKPLab/sentence-transformers/tree/master/examples/training/data_augmentation/train_sts_indomain_bm25.py
|
| 11 |
+
# Step 2: Label BM25 sampled STSb (silver dataset) using cross-encoder model
|
| 12 |
+
#
|
| 13 |
+
############################################################################
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
cross_encoder_path = 'ennioferreirab/cross-encoder-pt-anatel-metadados-assunto'
|
| 17 |
+
gold_sample_index = set()
|
| 18 |
+
with open('gold_sample_index.txt', 'r') as f:
|
| 19 |
+
for line in f:
|
| 20 |
+
gold_sample_index.add(line.strip())
|
| 21 |
+
7
|
| 22 |
+
try:
|
| 23 |
+
joblib.load('silver_data_v2.pkl')
|
| 24 |
+
except:
|
| 25 |
+
print('Creating silver data...')
|
| 26 |
+
silver_data = SilverDataset(query_params=params,duplicated=gold_sample_index).run()
|
| 27 |
+
joblib.dump(silver_data, 'silver_data_v2.pkl')
|
| 28 |
+
print('Done!')
|
| 29 |
+
|
| 30 |
+
sentences = [(sent_1,sent_2) for sent_1, sent_2, _ in silver_data]
|
| 31 |
+
|
| 32 |
+
cross_encoder = CrossEncoder(cross_encoder_path,max_length=512)
|
| 33 |
+
cross_silver_scores = []
|
| 34 |
+
for i in tqdm(sentences):
|
| 35 |
+
cross_silver_scores.append(cross_encoder.predict(i))
|
| 36 |
+
|
| 37 |
+
import numpy as np
|
| 38 |
+
cross_silver_data = np.c_[np.array(silver_data),np.array(cross_silver_scores)]
|
| 39 |
+
|
| 40 |
+
# All model predictions should be between [0,1]
|
| 41 |
+
assert all(0.0 <= score <= 1.0 for score in cross_silver_scores)
|
| 42 |
+
|
| 43 |
+
joblib.dump(cross_silver_data, 'cross_silver_scores_2.pkl')
|
training_assets/cross_silver_scores_v3.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd9d6a0296f0a1e9589ac8550d6095d9f53985ecd3fc3a8f1e4398426acb84d0
|
| 3 |
+
size 239383791
|
training_assets/gold_eval_dataloader.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8901155d353af2a1fad078daafb7eabab5bc6779d69ddb3768a359ac2b50bdad
|
| 3 |
+
size 127396
|
training_assets/gold_sample_index.txt
ADDED
|
@@ -0,0 +1,825 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
4460666
|
| 2 |
+
4646076
|
| 3 |
+
4740485
|
| 4 |
+
4841542
|
| 5 |
+
5137102
|
| 6 |
+
5433651
|
| 7 |
+
5619579
|
| 8 |
+
6730044
|
| 9 |
+
6960718
|
| 10 |
+
6983639
|
| 11 |
+
7331138
|
| 12 |
+
7460176
|
| 13 |
+
8137405
|
| 14 |
+
5133708
|
| 15 |
+
5591405
|
| 16 |
+
6098115
|
| 17 |
+
6200408
|
| 18 |
+
6816332
|
| 19 |
+
7073711
|
| 20 |
+
8252830
|
| 21 |
+
5333260
|
| 22 |
+
5545291
|
| 23 |
+
5903862
|
| 24 |
+
6070901
|
| 25 |
+
6269728
|
| 26 |
+
6391200
|
| 27 |
+
6710641
|
| 28 |
+
6763601
|
| 29 |
+
5808966
|
| 30 |
+
6164703
|
| 31 |
+
5809430
|
| 32 |
+
6379512
|
| 33 |
+
8182456
|
| 34 |
+
5177401
|
| 35 |
+
5510278
|
| 36 |
+
6083218
|
| 37 |
+
6270034
|
| 38 |
+
6543759
|
| 39 |
+
5369628
|
| 40 |
+
6099099
|
| 41 |
+
6174942
|
| 42 |
+
7902136
|
| 43 |
+
5340149
|
| 44 |
+
5401404
|
| 45 |
+
5981567
|
| 46 |
+
6465290
|
| 47 |
+
6643010
|
| 48 |
+
7835158
|
| 49 |
+
7889240
|
| 50 |
+
8084176
|
| 51 |
+
5433923
|
| 52 |
+
5498005
|
| 53 |
+
5516542
|
| 54 |
+
5809052
|
| 55 |
+
5855590
|
| 56 |
+
5985223
|
| 57 |
+
6110110
|
| 58 |
+
6151666
|
| 59 |
+
6153056
|
| 60 |
+
6439888
|
| 61 |
+
6525429
|
| 62 |
+
6556589
|
| 63 |
+
6635224
|
| 64 |
+
6986996
|
| 65 |
+
7080918
|
| 66 |
+
7114149
|
| 67 |
+
7128483
|
| 68 |
+
7886766
|
| 69 |
+
8131931
|
| 70 |
+
4682622
|
| 71 |
+
5164923
|
| 72 |
+
5503680
|
| 73 |
+
5920921
|
| 74 |
+
6209634
|
| 75 |
+
6422257
|
| 76 |
+
6872536
|
| 77 |
+
7427806
|
| 78 |
+
7986978
|
| 79 |
+
7994954
|
| 80 |
+
5774177
|
| 81 |
+
5989286
|
| 82 |
+
7508398
|
| 83 |
+
5591454
|
| 84 |
+
6102860
|
| 85 |
+
5637632
|
| 86 |
+
5859904
|
| 87 |
+
4370203
|
| 88 |
+
4949468
|
| 89 |
+
5426346
|
| 90 |
+
5859209
|
| 91 |
+
6266408
|
| 92 |
+
5387595
|
| 93 |
+
5465052
|
| 94 |
+
5521946
|
| 95 |
+
5991666
|
| 96 |
+
6209282
|
| 97 |
+
6491862
|
| 98 |
+
6548583
|
| 99 |
+
7494666
|
| 100 |
+
7595318
|
| 101 |
+
8133086
|
| 102 |
+
5634836
|
| 103 |
+
5850384
|
| 104 |
+
5863288
|
| 105 |
+
6398949
|
| 106 |
+
6635184
|
| 107 |
+
6904804
|
| 108 |
+
6975680
|
| 109 |
+
7969196
|
| 110 |
+
8163454
|
| 111 |
+
6887423
|
| 112 |
+
4479704
|
| 113 |
+
5804564
|
| 114 |
+
4646057
|
| 115 |
+
4649560
|
| 116 |
+
4895445
|
| 117 |
+
5182219
|
| 118 |
+
5205994
|
| 119 |
+
5504036
|
| 120 |
+
5864758
|
| 121 |
+
6960731
|
| 122 |
+
7337160
|
| 123 |
+
8158609
|
| 124 |
+
8165279
|
| 125 |
+
8192206
|
| 126 |
+
5766461
|
| 127 |
+
6112431
|
| 128 |
+
8179460
|
| 129 |
+
3674750
|
| 130 |
+
5230043
|
| 131 |
+
6781048
|
| 132 |
+
6862078
|
| 133 |
+
8133237
|
| 134 |
+
5608073
|
| 135 |
+
5640398
|
| 136 |
+
5987322
|
| 137 |
+
6103319
|
| 138 |
+
7348722
|
| 139 |
+
7599312
|
| 140 |
+
7889440
|
| 141 |
+
7954697
|
| 142 |
+
8151933
|
| 143 |
+
6081741
|
| 144 |
+
5261625
|
| 145 |
+
5856321
|
| 146 |
+
6464888
|
| 147 |
+
6520857
|
| 148 |
+
6779057
|
| 149 |
+
6913683
|
| 150 |
+
7090008
|
| 151 |
+
7348871
|
| 152 |
+
7410868
|
| 153 |
+
8091065
|
| 154 |
+
8131423
|
| 155 |
+
8231168
|
| 156 |
+
5550518
|
| 157 |
+
8163535
|
| 158 |
+
7783020
|
| 159 |
+
8150022
|
| 160 |
+
5992730
|
| 161 |
+
6866793
|
| 162 |
+
8139859
|
| 163 |
+
7016421
|
| 164 |
+
8213148
|
| 165 |
+
4740150
|
| 166 |
+
5805453
|
| 167 |
+
4646084
|
| 168 |
+
5174687
|
| 169 |
+
5550543
|
| 170 |
+
6010237
|
| 171 |
+
6875009
|
| 172 |
+
7030819
|
| 173 |
+
7421517
|
| 174 |
+
7492466
|
| 175 |
+
7493941
|
| 176 |
+
7522041
|
| 177 |
+
7602469
|
| 178 |
+
7650684
|
| 179 |
+
7659691
|
| 180 |
+
7991553
|
| 181 |
+
8090644
|
| 182 |
+
4486736
|
| 183 |
+
5482615
|
| 184 |
+
5551413
|
| 185 |
+
5208352
|
| 186 |
+
5546726
|
| 187 |
+
5683883
|
| 188 |
+
5689258
|
| 189 |
+
5871416
|
| 190 |
+
5920031
|
| 191 |
+
4574013
|
| 192 |
+
4888346
|
| 193 |
+
5183619
|
| 194 |
+
5504640
|
| 195 |
+
5984725
|
| 196 |
+
6026816
|
| 197 |
+
6415398
|
| 198 |
+
8141389
|
| 199 |
+
7114643
|
| 200 |
+
4620965
|
| 201 |
+
5164684
|
| 202 |
+
5406485
|
| 203 |
+
5586960
|
| 204 |
+
5855047
|
| 205 |
+
6209691
|
| 206 |
+
6392602
|
| 207 |
+
8183810
|
| 208 |
+
5276276
|
| 209 |
+
5795308
|
| 210 |
+
5327818
|
| 211 |
+
4596269
|
| 212 |
+
5272298
|
| 213 |
+
5430104
|
| 214 |
+
5613420
|
| 215 |
+
5779154
|
| 216 |
+
6294837
|
| 217 |
+
6860757
|
| 218 |
+
8154715
|
| 219 |
+
8163261
|
| 220 |
+
8258448
|
| 221 |
+
4007242
|
| 222 |
+
5326368
|
| 223 |
+
6035394
|
| 224 |
+
6108765
|
| 225 |
+
6317765
|
| 226 |
+
6791523
|
| 227 |
+
6974773
|
| 228 |
+
8152457
|
| 229 |
+
8190516
|
| 230 |
+
6712858
|
| 231 |
+
6465138
|
| 232 |
+
6664825
|
| 233 |
+
8163904
|
| 234 |
+
7892776
|
| 235 |
+
5756069
|
| 236 |
+
8205392
|
| 237 |
+
7203259
|
| 238 |
+
5599430
|
| 239 |
+
5350003
|
| 240 |
+
5855756
|
| 241 |
+
6270070
|
| 242 |
+
7145585
|
| 243 |
+
5091384
|
| 244 |
+
5876381
|
| 245 |
+
5957940
|
| 246 |
+
6108724
|
| 247 |
+
6421665
|
| 248 |
+
6620134
|
| 249 |
+
7209796
|
| 250 |
+
8151360
|
| 251 |
+
4087034
|
| 252 |
+
4531381
|
| 253 |
+
4596045
|
| 254 |
+
4855347
|
| 255 |
+
5162839
|
| 256 |
+
5235040
|
| 257 |
+
5253056
|
| 258 |
+
5327778
|
| 259 |
+
5329186
|
| 260 |
+
5401428
|
| 261 |
+
5617203
|
| 262 |
+
5625555
|
| 263 |
+
5675553
|
| 264 |
+
5773205
|
| 265 |
+
5843647
|
| 266 |
+
5872380
|
| 267 |
+
5989087
|
| 268 |
+
6215135
|
| 269 |
+
6439752
|
| 270 |
+
6482537
|
| 271 |
+
6537688
|
| 272 |
+
6604972
|
| 273 |
+
6729495
|
| 274 |
+
8133210
|
| 275 |
+
5272072
|
| 276 |
+
5516186
|
| 277 |
+
7629938
|
| 278 |
+
7423420
|
| 279 |
+
5159992
|
| 280 |
+
6980613
|
| 281 |
+
6627386
|
| 282 |
+
3913260
|
| 283 |
+
5515029
|
| 284 |
+
7630311
|
| 285 |
+
5685194
|
| 286 |
+
6033758
|
| 287 |
+
6409793
|
| 288 |
+
4595837
|
| 289 |
+
3862776
|
| 290 |
+
5540750
|
| 291 |
+
6032434
|
| 292 |
+
5930452
|
| 293 |
+
5971699
|
| 294 |
+
6548457
|
| 295 |
+
5511835
|
| 296 |
+
5563594
|
| 297 |
+
6405254
|
| 298 |
+
6958561
|
| 299 |
+
5408482
|
| 300 |
+
5502073
|
| 301 |
+
5659569
|
| 302 |
+
6072736
|
| 303 |
+
6418523
|
| 304 |
+
7126761
|
| 305 |
+
4418008
|
| 306 |
+
4460858
|
| 307 |
+
5345610
|
| 308 |
+
1825442
|
| 309 |
+
2051705
|
| 310 |
+
7650052
|
| 311 |
+
5314776
|
| 312 |
+
5571539
|
| 313 |
+
4165786
|
| 314 |
+
4551806
|
| 315 |
+
3973194
|
| 316 |
+
4304284
|
| 317 |
+
4663854
|
| 318 |
+
5211035
|
| 319 |
+
5935558
|
| 320 |
+
5852416
|
| 321 |
+
3866461
|
| 322 |
+
1984797
|
| 323 |
+
4348571
|
| 324 |
+
2667288
|
| 325 |
+
2875287
|
| 326 |
+
2963171
|
| 327 |
+
3961144
|
| 328 |
+
4096818
|
| 329 |
+
4209438
|
| 330 |
+
4409058
|
| 331 |
+
4602656
|
| 332 |
+
5238188
|
| 333 |
+
5240637
|
| 334 |
+
5481769
|
| 335 |
+
5486463
|
| 336 |
+
5497117
|
| 337 |
+
5502194
|
| 338 |
+
5502389
|
| 339 |
+
5502390
|
| 340 |
+
5627659
|
| 341 |
+
5641429
|
| 342 |
+
5673797
|
| 343 |
+
5674040
|
| 344 |
+
5677426
|
| 345 |
+
5722966
|
| 346 |
+
5858953
|
| 347 |
+
6030890
|
| 348 |
+
6057740
|
| 349 |
+
6081452
|
| 350 |
+
6092841
|
| 351 |
+
6397111
|
| 352 |
+
6758807
|
| 353 |
+
7760525
|
| 354 |
+
7889990
|
| 355 |
+
8187830
|
| 356 |
+
6733763
|
| 357 |
+
5256783
|
| 358 |
+
5454894
|
| 359 |
+
6254439
|
| 360 |
+
5430395
|
| 361 |
+
5564934
|
| 362 |
+
5790594
|
| 363 |
+
7407966
|
| 364 |
+
8120458
|
| 365 |
+
3809160
|
| 366 |
+
3947410
|
| 367 |
+
4280956
|
| 368 |
+
4950329
|
| 369 |
+
5159954
|
| 370 |
+
7245076
|
| 371 |
+
7994916
|
| 372 |
+
5476366
|
| 373 |
+
5392611
|
| 374 |
+
7296149
|
| 375 |
+
4675474
|
| 376 |
+
7771746
|
| 377 |
+
4162522
|
| 378 |
+
4540151
|
| 379 |
+
5516359
|
| 380 |
+
5586556
|
| 381 |
+
5512844
|
| 382 |
+
5557217
|
| 383 |
+
4712460
|
| 384 |
+
6593592
|
| 385 |
+
8050832
|
| 386 |
+
8235082
|
| 387 |
+
8283799
|
| 388 |
+
7236674
|
| 389 |
+
5887216
|
| 390 |
+
5990578
|
| 391 |
+
7523737
|
| 392 |
+
7880724
|
| 393 |
+
8199567
|
| 394 |
+
5637600
|
| 395 |
+
4201008
|
| 396 |
+
4406618
|
| 397 |
+
8249957
|
| 398 |
+
5591666
|
| 399 |
+
6022264
|
| 400 |
+
1827408
|
| 401 |
+
2605566
|
| 402 |
+
3091273
|
| 403 |
+
3691667
|
| 404 |
+
5807405
|
| 405 |
+
5034084
|
| 406 |
+
6428700
|
| 407 |
+
5568958
|
| 408 |
+
6021279
|
| 409 |
+
6275467
|
| 410 |
+
7075117
|
| 411 |
+
6088741
|
| 412 |
+
6446079
|
| 413 |
+
3678407
|
| 414 |
+
3892052
|
| 415 |
+
3802952
|
| 416 |
+
4660263
|
| 417 |
+
5523325
|
| 418 |
+
5476893
|
| 419 |
+
5956550
|
| 420 |
+
6074852
|
| 421 |
+
6391212
|
| 422 |
+
0888264
|
| 423 |
+
1226636
|
| 424 |
+
1325045
|
| 425 |
+
1796335
|
| 426 |
+
3863377
|
| 427 |
+
4190087
|
| 428 |
+
4872263
|
| 429 |
+
5639415
|
| 430 |
+
4598502
|
| 431 |
+
4938680
|
| 432 |
+
5394225
|
| 433 |
+
5941839
|
| 434 |
+
5986252
|
| 435 |
+
6831056
|
| 436 |
+
7389491
|
| 437 |
+
5518440
|
| 438 |
+
5328013
|
| 439 |
+
5795782
|
| 440 |
+
6405189
|
| 441 |
+
6910804
|
| 442 |
+
7428552
|
| 443 |
+
3912783
|
| 444 |
+
4334946
|
| 445 |
+
4750544
|
| 446 |
+
5428246
|
| 447 |
+
5534921
|
| 448 |
+
8232931
|
| 449 |
+
5627612
|
| 450 |
+
5882930
|
| 451 |
+
5924285
|
| 452 |
+
5564068
|
| 453 |
+
6115268
|
| 454 |
+
6603458
|
| 455 |
+
5503797
|
| 456 |
+
5591747
|
| 457 |
+
6009845
|
| 458 |
+
8161801
|
| 459 |
+
4287397
|
| 460 |
+
6508273
|
| 461 |
+
6707346
|
| 462 |
+
8196196
|
| 463 |
+
5608601
|
| 464 |
+
6020852
|
| 465 |
+
6105662
|
| 466 |
+
6529304
|
| 467 |
+
4024170
|
| 468 |
+
8132569
|
| 469 |
+
5236024
|
| 470 |
+
5403472
|
| 471 |
+
5397274
|
| 472 |
+
6410230
|
| 473 |
+
6939796
|
| 474 |
+
8182990
|
| 475 |
+
6538571
|
| 476 |
+
3717823
|
| 477 |
+
4141332
|
| 478 |
+
4475895
|
| 479 |
+
6015699
|
| 480 |
+
6090450
|
| 481 |
+
6594521
|
| 482 |
+
6969505
|
| 483 |
+
8131792
|
| 484 |
+
7759380
|
| 485 |
+
5532712
|
| 486 |
+
5886827
|
| 487 |
+
3174130
|
| 488 |
+
3714303
|
| 489 |
+
5016494
|
| 490 |
+
5745123
|
| 491 |
+
6091768
|
| 492 |
+
8164430
|
| 493 |
+
5878425
|
| 494 |
+
4791647
|
| 495 |
+
7517583
|
| 496 |
+
5662890
|
| 497 |
+
3974603
|
| 498 |
+
4086799
|
| 499 |
+
4282564
|
| 500 |
+
5344043
|
| 501 |
+
5414706
|
| 502 |
+
5416591
|
| 503 |
+
5808227
|
| 504 |
+
0975861
|
| 505 |
+
1538893
|
| 506 |
+
1591036
|
| 507 |
+
1838447
|
| 508 |
+
5138501
|
| 509 |
+
6715682
|
| 510 |
+
5863991
|
| 511 |
+
5135182
|
| 512 |
+
5221262
|
| 513 |
+
4922845
|
| 514 |
+
6731643
|
| 515 |
+
7907513
|
| 516 |
+
8053350
|
| 517 |
+
5228986
|
| 518 |
+
6415154
|
| 519 |
+
5141202
|
| 520 |
+
4646096
|
| 521 |
+
5167166
|
| 522 |
+
4772438
|
| 523 |
+
5309058
|
| 524 |
+
5342240
|
| 525 |
+
6400759
|
| 526 |
+
6881639
|
| 527 |
+
6075097
|
| 528 |
+
6614341
|
| 529 |
+
5468988
|
| 530 |
+
5551297
|
| 531 |
+
3503104
|
| 532 |
+
3963399
|
| 533 |
+
4274953
|
| 534 |
+
6825998
|
| 535 |
+
7434061
|
| 536 |
+
7668958
|
| 537 |
+
4870966
|
| 538 |
+
5110222
|
| 539 |
+
5418746
|
| 540 |
+
5418901
|
| 541 |
+
5489424
|
| 542 |
+
6005690
|
| 543 |
+
6106680
|
| 544 |
+
6450841
|
| 545 |
+
6022525
|
| 546 |
+
5171532
|
| 547 |
+
7504807
|
| 548 |
+
7752278
|
| 549 |
+
6420131
|
| 550 |
+
4779688
|
| 551 |
+
4898526
|
| 552 |
+
5134283
|
| 553 |
+
5262888
|
| 554 |
+
4648476
|
| 555 |
+
7521721
|
| 556 |
+
5208120
|
| 557 |
+
6792659
|
| 558 |
+
8085896
|
| 559 |
+
4848964
|
| 560 |
+
7435248
|
| 561 |
+
6161277
|
| 562 |
+
7887662
|
| 563 |
+
6742588
|
| 564 |
+
5626956
|
| 565 |
+
5618086
|
| 566 |
+
5734885
|
| 567 |
+
5090949
|
| 568 |
+
5116239
|
| 569 |
+
5124741
|
| 570 |
+
5159732
|
| 571 |
+
5219306
|
| 572 |
+
5591519
|
| 573 |
+
5676606
|
| 574 |
+
5690219
|
| 575 |
+
5786819
|
| 576 |
+
6041528
|
| 577 |
+
6072192
|
| 578 |
+
7884624
|
| 579 |
+
7981332
|
| 580 |
+
8084930
|
| 581 |
+
5612301
|
| 582 |
+
4731346
|
| 583 |
+
5623910
|
| 584 |
+
6091875
|
| 585 |
+
6281436
|
| 586 |
+
5213783
|
| 587 |
+
6009557
|
| 588 |
+
7039607
|
| 589 |
+
7679918
|
| 590 |
+
5176028
|
| 591 |
+
5560512
|
| 592 |
+
5874435
|
| 593 |
+
6520636
|
| 594 |
+
5468132
|
| 595 |
+
5556839
|
| 596 |
+
6075388
|
| 597 |
+
6167589
|
| 598 |
+
6402939
|
| 599 |
+
6439732
|
| 600 |
+
6604418
|
| 601 |
+
5205819
|
| 602 |
+
5335110
|
| 603 |
+
5384794
|
| 604 |
+
6173780
|
| 605 |
+
7613196
|
| 606 |
+
5161886
|
| 607 |
+
5426013
|
| 608 |
+
5520987
|
| 609 |
+
5559035
|
| 610 |
+
5931478
|
| 611 |
+
4540213
|
| 612 |
+
5167812
|
| 613 |
+
5364524
|
| 614 |
+
5400717
|
| 615 |
+
5547227
|
| 616 |
+
5605539
|
| 617 |
+
5622937
|
| 618 |
+
5639412
|
| 619 |
+
5795790
|
| 620 |
+
6906068
|
| 621 |
+
6947670
|
| 622 |
+
7963695
|
| 623 |
+
6178278
|
| 624 |
+
5101120
|
| 625 |
+
5862886
|
| 626 |
+
5470473
|
| 627 |
+
6002283
|
| 628 |
+
5186421
|
| 629 |
+
5196824
|
| 630 |
+
5226107
|
| 631 |
+
5231329
|
| 632 |
+
5235885
|
| 633 |
+
5245418
|
| 634 |
+
5247129
|
| 635 |
+
5328934
|
| 636 |
+
5338196
|
| 637 |
+
5344932
|
| 638 |
+
5347088
|
| 639 |
+
5355720
|
| 640 |
+
5362812
|
| 641 |
+
5365107
|
| 642 |
+
5376109
|
| 643 |
+
5376117
|
| 644 |
+
5380762
|
| 645 |
+
5393873
|
| 646 |
+
5394467
|
| 647 |
+
5395955
|
| 648 |
+
5415840
|
| 649 |
+
5425280
|
| 650 |
+
5449732
|
| 651 |
+
5503636
|
| 652 |
+
5541083
|
| 653 |
+
5743983
|
| 654 |
+
5747840
|
| 655 |
+
5792689
|
| 656 |
+
5794851
|
| 657 |
+
5839502
|
| 658 |
+
5887460
|
| 659 |
+
5926330
|
| 660 |
+
6009768
|
| 661 |
+
7039563
|
| 662 |
+
8044928
|
| 663 |
+
8216267
|
| 664 |
+
3730523
|
| 665 |
+
4786125
|
| 666 |
+
5211966
|
| 667 |
+
5469180
|
| 668 |
+
6897698
|
| 669 |
+
5138518
|
| 670 |
+
5401485
|
| 671 |
+
5753393
|
| 672 |
+
6180767
|
| 673 |
+
4106645
|
| 674 |
+
6164679
|
| 675 |
+
6391213
|
| 676 |
+
6401090
|
| 677 |
+
7373471
|
| 678 |
+
6916874
|
| 679 |
+
5103812
|
| 680 |
+
5744044
|
| 681 |
+
6021351
|
| 682 |
+
8010147
|
| 683 |
+
4813869
|
| 684 |
+
7949719
|
| 685 |
+
5349964
|
| 686 |
+
5846693
|
| 687 |
+
8064168
|
| 688 |
+
6812336
|
| 689 |
+
5674311
|
| 690 |
+
5153043
|
| 691 |
+
5159853
|
| 692 |
+
5399855
|
| 693 |
+
5424931
|
| 694 |
+
5444239
|
| 695 |
+
5727887
|
| 696 |
+
5945666
|
| 697 |
+
6092852
|
| 698 |
+
2010236
|
| 699 |
+
2097934
|
| 700 |
+
3756637
|
| 701 |
+
3874160
|
| 702 |
+
3976877
|
| 703 |
+
4301951
|
| 704 |
+
5091159
|
| 705 |
+
5420402
|
| 706 |
+
5466778
|
| 707 |
+
5476112
|
| 708 |
+
6158436
|
| 709 |
+
5545661
|
| 710 |
+
6726156
|
| 711 |
+
5920353
|
| 712 |
+
6538842
|
| 713 |
+
6519579
|
| 714 |
+
6880547
|
| 715 |
+
5206272
|
| 716 |
+
6106472
|
| 717 |
+
6516584
|
| 718 |
+
6732221
|
| 719 |
+
3872348
|
| 720 |
+
5116412
|
| 721 |
+
5563281
|
| 722 |
+
3856904
|
| 723 |
+
4428505
|
| 724 |
+
5345839
|
| 725 |
+
6094897
|
| 726 |
+
6419864
|
| 727 |
+
5945139
|
| 728 |
+
4706163
|
| 729 |
+
3725580
|
| 730 |
+
4003539
|
| 731 |
+
4027979
|
| 732 |
+
4736809
|
| 733 |
+
5025370
|
| 734 |
+
5084254
|
| 735 |
+
5211319
|
| 736 |
+
5257759
|
| 737 |
+
5429093
|
| 738 |
+
5441857
|
| 739 |
+
5505327
|
| 740 |
+
5560235
|
| 741 |
+
5602619
|
| 742 |
+
5606581
|
| 743 |
+
5793811
|
| 744 |
+
5807776
|
| 745 |
+
6010825
|
| 746 |
+
6059919
|
| 747 |
+
6081869
|
| 748 |
+
6137061
|
| 749 |
+
6549955
|
| 750 |
+
6614527
|
| 751 |
+
7246382
|
| 752 |
+
4741669
|
| 753 |
+
0908100
|
| 754 |
+
4486339
|
| 755 |
+
5162489
|
| 756 |
+
5493532
|
| 757 |
+
5636837
|
| 758 |
+
6049972
|
| 759 |
+
4047135
|
| 760 |
+
4385386
|
| 761 |
+
8248665
|
| 762 |
+
1402356
|
| 763 |
+
4967941
|
| 764 |
+
5092529
|
| 765 |
+
5214722
|
| 766 |
+
5228115
|
| 767 |
+
5291887
|
| 768 |
+
5519267
|
| 769 |
+
5537326
|
| 770 |
+
5795976
|
| 771 |
+
5796440
|
| 772 |
+
5803348
|
| 773 |
+
5810185
|
| 774 |
+
5852507
|
| 775 |
+
5909067
|
| 776 |
+
5977933
|
| 777 |
+
6796500
|
| 778 |
+
6863557
|
| 779 |
+
7428664
|
| 780 |
+
8152906
|
| 781 |
+
4404049
|
| 782 |
+
2607396
|
| 783 |
+
3183641
|
| 784 |
+
3184217
|
| 785 |
+
3449359
|
| 786 |
+
3727592
|
| 787 |
+
3734172
|
| 788 |
+
3966722
|
| 789 |
+
4137035
|
| 790 |
+
4224913
|
| 791 |
+
4902962
|
| 792 |
+
4969387
|
| 793 |
+
5163826
|
| 794 |
+
5206856
|
| 795 |
+
5239793
|
| 796 |
+
5269996
|
| 797 |
+
5311495
|
| 798 |
+
5338031
|
| 799 |
+
5370069
|
| 800 |
+
5399410
|
| 801 |
+
5425394
|
| 802 |
+
5440378
|
| 803 |
+
5446751
|
| 804 |
+
5456903
|
| 805 |
+
5458292
|
| 806 |
+
5476100
|
| 807 |
+
5482266
|
| 808 |
+
5504203
|
| 809 |
+
5504582
|
| 810 |
+
5524801
|
| 811 |
+
5551055
|
| 812 |
+
5553070
|
| 813 |
+
5554819
|
| 814 |
+
5567170
|
| 815 |
+
5568809
|
| 816 |
+
5669114
|
| 817 |
+
5717111
|
| 818 |
+
5992402
|
| 819 |
+
6000491
|
| 820 |
+
6059906
|
| 821 |
+
6146140
|
| 822 |
+
6351943
|
| 823 |
+
6467278
|
| 824 |
+
6780508
|
| 825 |
+
8197798
|
training_assets/gold_train_dataloader.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c53ce2f26aab328f08d2db38d11718bb3579048ced91a9acb6a607b95228eaa2
|
| 3 |
+
size 3586422
|
training_assets/request_solr.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import httpx
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
import tqdm
|
| 4 |
+
'''
|
| 5 |
+
gold_sample_index = set()
|
| 6 |
+
with open('gold_sample_index.txt', 'r') as f:
|
| 7 |
+
for line in f:
|
| 8 |
+
gold_sample_index.add(line.strip())
|
| 9 |
+
'''
|
| 10 |
+
|
| 11 |
+
class SilverDataset(BaseModel):
|
| 12 |
+
'''
|
| 13 |
+
Classe para retornar o dataset silver para ser utilizado pela estratégia do Augmented SBERT
|
| 14 |
+
ref: https://github.com/UKPLab/sentence-transformers/tree/master/examples/training/data_augmentation/train_sts_indomain_bm25.py
|
| 15 |
+
'''
|
| 16 |
+
query_params: dict
|
| 17 |
+
duplicated: set = set()
|
| 18 |
+
solr_url: str = "http://localhost:8983/solr/sei_similaridade_augmented_sbert"
|
| 19 |
+
|
| 20 |
+
def get_ids_list(self):
|
| 21 |
+
query = f'{self.solr_url}/select?q=*&fl=id&rows=999999999'
|
| 22 |
+
r = httpx.get(query).json()['response']['docs']
|
| 23 |
+
return [doc['id'] for doc in r]
|
| 24 |
+
|
| 25 |
+
def get_data(self,id):
|
| 26 |
+
q_id = f"id:{id}"
|
| 27 |
+
self.query_params['q'] = q_id
|
| 28 |
+
r = httpx.post(f'{self.solr_url}/mlt', data=self.query_params).json()
|
| 29 |
+
maxscore = r['response']['maxScore']
|
| 30 |
+
response_docs = r['response']['docs']
|
| 31 |
+
response_docs = self.remove_duplicated(response_docs)
|
| 32 |
+
return {'query_id': id,
|
| 33 |
+
'query_doc': r['match']['docs'][0][self.query_params['mlt.qf']],
|
| 34 |
+
'docs': response_docs,
|
| 35 |
+
'maxscore': maxscore}
|
| 36 |
+
|
| 37 |
+
def remove_duplicated(self,docs):
|
| 38 |
+
'''
|
| 39 |
+
remove os documentos que são iguais aos documentos do dataset Gold
|
| 40 |
+
'''
|
| 41 |
+
return [doc for doc in docs if doc['id'] not in self.duplicated]
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@staticmethod
|
| 45 |
+
def create_sentence_pairs(queries):
|
| 46 |
+
'''
|
| 47 |
+
cria os pares de frases para o dataset silver
|
| 48 |
+
'''
|
| 49 |
+
pairs = set()
|
| 50 |
+
for query in queries:
|
| 51 |
+
for doc in query['docs']:
|
| 52 |
+
pairs.add(
|
| 53 |
+
(query['query_doc'],
|
| 54 |
+
doc['assunto_text'],
|
| 55 |
+
doc['score']/query['maxscore']))
|
| 56 |
+
return pairs
|
| 57 |
+
|
| 58 |
+
def run(self):
|
| 59 |
+
queries = []
|
| 60 |
+
list_ids = self.get_ids_list()
|
| 61 |
+
for id in tqdm.tqdm(list_ids):
|
| 62 |
+
queries.append(self.get_data(id))
|
| 63 |
+
pairs = self.create_sentence_pairs(queries)
|
| 64 |
+
return pairs
|
training_assets/silver_cross_samples.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6a9119d292328000dd27b5f674e0cf86c708d1b9042a9b8911c03a6726c2e50
|
| 3 |
+
size 239072747
|
training_assets/silver_data.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68e0f9acd5aea86b2e45ae6eee6840c1df4f70705b21b1b9a535ecae0580e5fc
|
| 3 |
+
size 303024365
|
training_assets/train_augmented_bert.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|