test
Browse files- data/._data_text_default-952efb7fdd4e970d_0.0.0_21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.lock +0 -0
- data/._data_text_default-a93e22393ff62787_0.0.0_21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.lock +0 -0
- data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.incomplete_info.lock +0 -0
- data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/cache-53e75324430950d8.arrow +3 -0
- data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/dataset_info.json +1 -0
- data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/text-train.arrow +3 -0
- data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad_builder.lock +0 -0
- data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.incomplete_info.lock +0 -0
- data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/cache-0b2eee437c5abe6d.arrow +3 -0
- data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/dataset_info.json +1 -0
- data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/text-train.arrow +3 -0
- data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad_builder.lock +0 -0
- run_unsup_GPU.sh +31 -0
data/._data_text_default-952efb7fdd4e970d_0.0.0_21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.lock
ADDED
|
File without changes
|
data/._data_text_default-a93e22393ff62787_0.0.0_21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.lock
ADDED
|
File without changes
|
data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.incomplete_info.lock
ADDED
|
File without changes
|
data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/cache-53e75324430950d8.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21163fe350f8123f395134d1e35fa04005898f9dd27b9964d000823314013cdd
|
| 3 |
+
size 325905944
|
data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/dataset_info.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"description": "", "citation": "", "homepage": "", "license": "", "features": {"text": {"dtype": "string", "_type": "Value"}}, "builder_name": "text", "config_name": "default", "version": {"version_str": "0.0.0", "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 123038621, "num_examples": 1000000, "dataset_name": "text"}}, "download_checksums": {"/home/perk/models/SimCSE-test/data/wiki1m_for_simcse.txt": {"num_bytes": 120038621, "checksum": "7b1825863a99aa76479b0456f7c210539dfaeeb69598b41fb4de4f524dd5a706"}}, "download_size": 120038621, "dataset_size": 123038621, "size_in_bytes": 243077242}
|
data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/text-train.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4cd7d749ccccf59a58dc1f2c4349440ee844c40554214559b7a1f91638f6051
|
| 3 |
+
size 123059952
|
data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad_builder.lock
ADDED
|
File without changes
|
data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.incomplete_info.lock
ADDED
|
File without changes
|
data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/cache-0b2eee437c5abe6d.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17254ba04c4c3369f474d8f1cff7af78edd23370a34463384dd511100f3ede4b
|
| 3 |
+
size 320928696
|
data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/dataset_info.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"description": "", "citation": "", "homepage": "", "license": "", "features": {"text": {"dtype": "string", "_type": "Value"}}, "builder_name": "text", "config_name": "default", "version": {"version_str": "0.0.0", "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 94417493, "num_examples": 1000000, "dataset_name": "text"}}, "download_checksums": {"/home/perk/models/SimCSE-test/data/nor_news_1998_2019_sentences_1M.txt": {"num_bytes": 91417493, "checksum": "a11b0487ea13419b7902c6890723469a3c540ec0143053e82312642509adfaae"}}, "download_size": 91417493, "dataset_size": 94417493, "size_in_bytes": 185834986}
|
data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/text-train.arrow
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c4d0f3761a50480d487ee5fd4c43ffc579d0158e0ada0823207dbad44e97d75
|
| 3 |
+
size 94438104
|
data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad_builder.lock
ADDED
|
File without changes
|
run_unsup_GPU.sh
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# In this example, we show how to train SimCSE on unsupervised Wikipedia data.
|
| 4 |
+
# If you want to train it with multiple GPU cards, see "run_sup_example.sh"
|
| 5 |
+
# about how to use PyTorch's distributed data parallel.
|
| 6 |
+
export ZONE=us-central2-b
|
| 7 |
+
export XRT_TPU_CONFIG="localservice;0;localhost:51011"
|
| 8 |
+
export TPU_NUM_DEVICES=4
|
| 9 |
+
export ALLOW_MULTIPLE_LIBTPU_LOAD=1
|
| 10 |
+
|
| 11 |
+
python3 ../../SimCSE/train.py \
|
| 12 |
+
--model_name_or_path NbAiLab/nb-bert-base \
|
| 13 |
+
--train_file data/wiki1m_for_simcse.txt \
|
| 14 |
+
--output_dir result/unsup-simcse-nb-bert-bert-base-GPU \
|
| 15 |
+
--num_train_epochs 1 \
|
| 16 |
+
--per_device_train_batch_size 64 \
|
| 17 |
+
--learning_rate 3e-5 \
|
| 18 |
+
--max_seq_length 32 \
|
| 19 |
+
--evaluation_strategy steps \
|
| 20 |
+
--metric_for_best_model stsb_spearman \
|
| 21 |
+
--load_best_model_at_end \
|
| 22 |
+
--eval_steps 125 \
|
| 23 |
+
--pooler_type cls \
|
| 24 |
+
--mlp_only_train \
|
| 25 |
+
--overwrite_output_dir \
|
| 26 |
+
--temp 0.05 \
|
| 27 |
+
--do_train \
|
| 28 |
+
--do_eval \
|
| 29 |
+
--fp16 \
|
| 30 |
+
"$@"
|
| 31 |
+
|