pere commited on
Commit
607790d
·
1 Parent(s): e049c30
Files changed (13) hide show
  1. data/._data_text_default-952efb7fdd4e970d_0.0.0_21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.lock +0 -0
  2. data/._data_text_default-a93e22393ff62787_0.0.0_21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.lock +0 -0
  3. data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.incomplete_info.lock +0 -0
  4. data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/cache-53e75324430950d8.arrow +3 -0
  5. data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/dataset_info.json +1 -0
  6. data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/text-train.arrow +3 -0
  7. data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad_builder.lock +0 -0
  8. data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.incomplete_info.lock +0 -0
  9. data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/cache-0b2eee437c5abe6d.arrow +3 -0
  10. data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/dataset_info.json +1 -0
  11. data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/text-train.arrow +3 -0
  12. data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad_builder.lock +0 -0
  13. run_unsup_GPU.sh +31 -0
data/._data_text_default-952efb7fdd4e970d_0.0.0_21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.lock ADDED
File without changes
data/._data_text_default-a93e22393ff62787_0.0.0_21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.lock ADDED
File without changes
data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.incomplete_info.lock ADDED
File without changes
data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/cache-53e75324430950d8.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21163fe350f8123f395134d1e35fa04005898f9dd27b9964d000823314013cdd
3
+ size 325905944
data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/dataset_info.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"description": "", "citation": "", "homepage": "", "license": "", "features": {"text": {"dtype": "string", "_type": "Value"}}, "builder_name": "text", "config_name": "default", "version": {"version_str": "0.0.0", "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 123038621, "num_examples": 1000000, "dataset_name": "text"}}, "download_checksums": {"/home/perk/models/SimCSE-test/data/wiki1m_for_simcse.txt": {"num_bytes": 120038621, "checksum": "7b1825863a99aa76479b0456f7c210539dfaeeb69598b41fb4de4f524dd5a706"}}, "download_size": 120038621, "dataset_size": 123038621, "size_in_bytes": 243077242}
data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/text-train.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4cd7d749ccccf59a58dc1f2c4349440ee844c40554214559b7a1f91638f6051
3
+ size 123059952
data/text/default-952efb7fdd4e970d/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad_builder.lock ADDED
File without changes
data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad.incomplete_info.lock ADDED
File without changes
data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/cache-0b2eee437c5abe6d.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17254ba04c4c3369f474d8f1cff7af78edd23370a34463384dd511100f3ede4b
3
+ size 320928696
data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/dataset_info.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"description": "", "citation": "", "homepage": "", "license": "", "features": {"text": {"dtype": "string", "_type": "Value"}}, "builder_name": "text", "config_name": "default", "version": {"version_str": "0.0.0", "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 94417493, "num_examples": 1000000, "dataset_name": "text"}}, "download_checksums": {"/home/perk/models/SimCSE-test/data/nor_news_1998_2019_sentences_1M.txt": {"num_bytes": 91417493, "checksum": "a11b0487ea13419b7902c6890723469a3c540ec0143053e82312642509adfaae"}}, "download_size": 91417493, "dataset_size": 94417493, "size_in_bytes": 185834986}
data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad/text-train.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c4d0f3761a50480d487ee5fd4c43ffc579d0158e0ada0823207dbad44e97d75
3
+ size 94438104
data/text/default-a93e22393ff62787/0.0.0/21a506d1b2b34316b1e82d0bd79066905d846e5d7e619823c0dd338d6f1fa6ad_builder.lock ADDED
File without changes
run_unsup_GPU.sh ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # In this example, we show how to train SimCSE on unsupervised Wikipedia data.
4
+ # If you want to train it with multiple GPU cards, see "run_sup_example.sh"
5
+ # about how to use PyTorch's distributed data parallel.
6
+ export ZONE=us-central2-b
7
+ export XRT_TPU_CONFIG="localservice;0;localhost:51011"
8
+ export TPU_NUM_DEVICES=4
9
+ export ALLOW_MULTIPLE_LIBTPU_LOAD=1
10
+
11
+ python3 ../../SimCSE/train.py \
12
+ --model_name_or_path NbAiLab/nb-bert-base \
13
+ --train_file data/wiki1m_for_simcse.txt \
14
+ --output_dir result/unsup-simcse-nb-bert-bert-base-GPU \
15
+ --num_train_epochs 1 \
16
+ --per_device_train_batch_size 64 \
17
+ --learning_rate 3e-5 \
18
+ --max_seq_length 32 \
19
+ --evaluation_strategy steps \
20
+ --metric_for_best_model stsb_spearman \
21
+ --load_best_model_at_end \
22
+ --eval_steps 125 \
23
+ --pooler_type cls \
24
+ --mlp_only_train \
25
+ --overwrite_output_dir \
26
+ --temp 0.05 \
27
+ --do_train \
28
+ --do_eval \
29
+ --fp16 \
30
+ "$@"
31
+