koichi12 commited on
Commit
aa4fca9
·
verified ·
1 Parent(s): e26479f

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. llm_tutorial/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document.idx +0 -0
  3. llm_tutorial/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document/cache/GPTDataset_indices/80d7dc88bf2d24bda8780c0ade9232eb-GPTDataset-document_index.npy +3 -0
  4. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document.bin +3 -0
  5. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document.idx +3 -0
  6. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/02eca412ecb6292af266f09e6facab79-GPTDataset-description.txt +10 -0
  7. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/02eca412ecb6292af266f09e6facab79-GPTDataset-document_index.npy +3 -0
  8. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/02eca412ecb6292af266f09e6facab79-GPTDataset-sample_index.npy +3 -0
  9. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/02eca412ecb6292af266f09e6facab79-GPTDataset-shuffle_index.npy +3 -0
  10. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/08ff1eb63b2207c65cce43be54c00e54-GPTDataset-description.txt +10 -0
  11. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/08ff1eb63b2207c65cce43be54c00e54-GPTDataset-document_index.npy +3 -0
  12. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/08ff1eb63b2207c65cce43be54c00e54-GPTDataset-sample_index.npy +3 -0
  13. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/08ff1eb63b2207c65cce43be54c00e54-GPTDataset-shuffle_index.npy +3 -0
  14. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/33ded67e093b2391ae3c697433b4284f-GPTDataset-description.txt +10 -0
  15. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/33ded67e093b2391ae3c697433b4284f-GPTDataset-document_index.npy +3 -0
  16. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/33ded67e093b2391ae3c697433b4284f-GPTDataset-sample_index.npy +3 -0
  17. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/33ded67e093b2391ae3c697433b4284f-GPTDataset-shuffle_index.npy +3 -0
  18. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/350fe6b65ba7fd88a7114cedb7cbd446-GPTDataset-description.txt +10 -0
  19. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/350fe6b65ba7fd88a7114cedb7cbd446-GPTDataset-document_index.npy +3 -0
  20. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/350fe6b65ba7fd88a7114cedb7cbd446-GPTDataset-sample_index.npy +3 -0
  21. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/350fe6b65ba7fd88a7114cedb7cbd446-GPTDataset-shuffle_index.npy +3 -0
  22. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/46d01ceb7d2cf1ad9e20542382e986d1-GPTDataset-description.txt +10 -0
  23. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/46d01ceb7d2cf1ad9e20542382e986d1-GPTDataset-document_index.npy +3 -0
  24. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/46d01ceb7d2cf1ad9e20542382e986d1-GPTDataset-sample_index.npy +3 -0
  25. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/46d01ceb7d2cf1ad9e20542382e986d1-GPTDataset-shuffle_index.npy +3 -0
  26. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/48d213b6ea09663ec4b8fe09aadea1d0-GPTDataset-description.txt +10 -0
  27. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/48d213b6ea09663ec4b8fe09aadea1d0-GPTDataset-document_index.npy +3 -0
  28. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/48d213b6ea09663ec4b8fe09aadea1d0-GPTDataset-sample_index.npy +3 -0
  29. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/48d213b6ea09663ec4b8fe09aadea1d0-GPTDataset-shuffle_index.npy +3 -0
  30. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/592590386f9491eebb2140b0b7a5a7aa-GPTDataset-description.txt +10 -0
  31. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/5fde86a30edbf3f3b847378216442630-GPTDataset-description.txt +10 -0
  32. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/5fde86a30edbf3f3b847378216442630-GPTDataset-document_index.npy +3 -0
  33. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/5fde86a30edbf3f3b847378216442630-GPTDataset-sample_index.npy +3 -0
  34. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/5fde86a30edbf3f3b847378216442630-GPTDataset-shuffle_index.npy +3 -0
  35. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/676905a6d4ca63142a18409e64a50828-GPTDataset-description.txt +10 -0
  36. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/676905a6d4ca63142a18409e64a50828-GPTDataset-document_index.npy +3 -0
  37. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/676905a6d4ca63142a18409e64a50828-GPTDataset-sample_index.npy +3 -0
  38. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/676905a6d4ca63142a18409e64a50828-GPTDataset-shuffle_index.npy +3 -0
  39. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6f81b6b1e641d082a358ab714ec3ef80-GPTDataset-description.txt +10 -0
  40. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6f81b6b1e641d082a358ab714ec3ef80-GPTDataset-document_index.npy +3 -0
  41. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6f81b6b1e641d082a358ab714ec3ef80-GPTDataset-sample_index.npy +3 -0
  42. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6f81b6b1e641d082a358ab714ec3ef80-GPTDataset-shuffle_index.npy +3 -0
  43. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6fea953a1d3b9960c6ae25d0b4b50cff-GPTDataset-description.txt +10 -0
  44. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6fea953a1d3b9960c6ae25d0b4b50cff-GPTDataset-document_index.npy +3 -0
  45. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6fea953a1d3b9960c6ae25d0b4b50cff-GPTDataset-sample_index.npy +3 -0
  46. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6fea953a1d3b9960c6ae25d0b4b50cff-GPTDataset-shuffle_index.npy +3 -0
  47. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/7dad590c16d66ac471a2be17aa829a7d-GPTDataset-description.txt +10 -0
  48. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/7dad590c16d66ac471a2be17aa829a7d-GPTDataset-document_index.npy +3 -0
  49. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/7dad590c16d66ac471a2be17aa829a7d-GPTDataset-sample_index.npy +3 -0
  50. llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/7dad590c16d66ac471a2be17aa829a7d-GPTDataset-shuffle_index.npy +3 -0
.gitattributes CHANGED
@@ -287,3 +287,7 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cublas/lib
287
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/lib/libtorch_cpu.so filter=lfs diff=lfs merge=lfs -text
288
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/triton/_C/libtriton.so filter=lfs diff=lfs merge=lfs -text
289
  llm_tutorial/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document.idx filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
287
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/lib/libtorch_cpu.so filter=lfs diff=lfs merge=lfs -text
288
  tuning-competition-baseline/.venv/lib/python3.11/site-packages/triton/_C/libtriton.so filter=lfs diff=lfs merge=lfs -text
289
  llm_tutorial/llm_recipes/datasets/bin/sample/llm_jp_corpus_v1_ja_wiki_train_0/data_text_document.idx filter=lfs diff=lfs merge=lfs -text
290
+ llm_tutorial/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train_0/data_text_document.idx filter=lfs diff=lfs merge=lfs -text
291
+ llm_tutorial/llm_recipes/datasets/bin/sample/llm_jp_corpus_v2_ja_wiki_train/data_text_document.idx filter=lfs diff=lfs merge=lfs -text
292
+ llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document.idx filter=lfs diff=lfs merge=lfs -text
293
+ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/lib/libtorch_cuda.so filter=lfs diff=lfs merge=lfs -text
llm_tutorial/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document.idx ADDED
Binary file (22.7 kB). View file
 
llm_tutorial/llm_recipes/datasets/bin/baseline/llm_jp_corpus_v2_ja_wiki_validation_0/data_text_document/cache/GPTDataset_indices/80d7dc88bf2d24bda8780c0ade9232eb-GPTDataset-document_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3984439db692dbc4f2333e39e8ac90fec1b0ccd9290e92c23d230827b4856ed4
3
+ size 5747240
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f6b7e6187060ef902222fca36be65f2185555aa2ad5e1d207d37675d4610010
3
+ size 8594368
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document.idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90dbef9fe4b0c6019720513510a42844aa61217422985834df4c8057b8774db8
3
+ size 200042
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/02eca412ecb6292af266f09e6facab79-GPTDataset-description.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "GPTDataset",
3
+ "path_prefix": "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document",
4
+ "num_samples": 1624080,
5
+ "index_split": "valid",
6
+ "random_seed": 1234,
7
+ "sequence_length": 512,
8
+ "split": null,
9
+ "split_matrix": null
10
+ }
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/02eca412ecb6292af266f09e6facab79-GPTDataset-document_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:012599946530e516a7037d31471e5df17effc857633ae66d5d717e7b5e685724
3
+ size 8280128
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/02eca412ecb6292af266f09e6facab79-GPTDataset-sample_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7652a8e1ba313956df814b7241d4a30fcf870ed1c21b2211d3a6745d59c6a6b6
3
+ size 13013776
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/02eca412ecb6292af266f09e6facab79-GPTDataset-shuffle_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dd70dd1e3a94cd995adbfe62a08a6e186c91acbb870eb214187c674d0c54a85
3
+ size 6506948
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/08ff1eb63b2207c65cce43be54c00e54-GPTDataset-description.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "GPTDataset",
3
+ "path_prefix": "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document",
4
+ "num_samples": 35376,
5
+ "index_split": "valid",
6
+ "random_seed": 1234,
7
+ "sequence_length": 256,
8
+ "split": null,
9
+ "split_matrix": null
10
+ }
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/08ff1eb63b2207c65cce43be54c00e54-GPTDataset-document_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ae24d87e59589370fafb89f6d1375718dee305d8245efb6606f621e51b66bf6
3
+ size 120128
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/08ff1eb63b2207c65cce43be54c00e54-GPTDataset-sample_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2344c62fcaeb89a2655bb78c74df2063328636d16326cfdec87bbf69943e1399
3
+ size 377336
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/08ff1eb63b2207c65cce43be54c00e54-GPTDataset-shuffle_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54fe5d057a74f2128db38752566ba125e84b3930f7b7af54825279305f90ad04
3
+ size 188728
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/33ded67e093b2391ae3c697433b4284f-GPTDataset-description.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "GPTDataset",
3
+ "path_prefix": "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document",
4
+ "num_samples": 16080,
5
+ "index_split": "test",
6
+ "random_seed": 1234,
7
+ "sequence_length": 512,
8
+ "split": null,
9
+ "split_matrix": null
10
+ }
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/33ded67e093b2391ae3c697433b4284f-GPTDataset-document_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ae24d87e59589370fafb89f6d1375718dee305d8245efb6606f621e51b66bf6
3
+ size 120128
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/33ded67e093b2391ae3c697433b4284f-GPTDataset-sample_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deda1d81893431f8c565b3b3c1827c6173655253304b825a8fca32d65a35e161
3
+ size 188736
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/33ded67e093b2391ae3c697433b4284f-GPTDataset-shuffle_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96712567ccc2b29d6f0f88bd0554a49fe6c8012b511ae5e7b85e158f87710235
3
+ size 94428
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/350fe6b65ba7fd88a7114cedb7cbd446-GPTDataset-description.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "GPTDataset",
3
+ "path_prefix": "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document",
4
+ "num_samples": 324816,
5
+ "index_split": "valid",
6
+ "random_seed": 1234,
7
+ "sequence_length": 512,
8
+ "split": null,
9
+ "split_matrix": null
10
+ }
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/350fe6b65ba7fd88a7114cedb7cbd446-GPTDataset-document_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efbef7b827bba33d87aaa0ff3b4e2f730f7a8306df5b555e98ee0371fef932ae
3
+ size 1680128
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/350fe6b65ba7fd88a7114cedb7cbd446-GPTDataset-sample_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:459258f66f7eca3b35144e1e79eb59de4af4d24f37900b2c6ffb81700d08f00b
3
+ size 2640584
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/350fe6b65ba7fd88a7114cedb7cbd446-GPTDataset-shuffle_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00d334cba2f4a54e8b7e52d256efaa416a2ef753c03c0950579f080158fd189d
3
+ size 1320352
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/46d01ceb7d2cf1ad9e20542382e986d1-GPTDataset-description.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "GPTDataset",
3
+ "path_prefix": "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document",
4
+ "num_samples": 35376,
5
+ "index_split": "valid",
6
+ "random_seed": 1234,
7
+ "sequence_length": 512,
8
+ "split": null,
9
+ "split_matrix": null
10
+ }
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/46d01ceb7d2cf1ad9e20542382e986d1-GPTDataset-document_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05f2718b1e39405c4204e595ffc50244888e2db41a3bc28b8294d85af9170e0e
3
+ size 200128
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/46d01ceb7d2cf1ad9e20542382e986d1-GPTDataset-sample_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa533a35db6b87804daeaf99bcfcb19a055f2a53ea42d1bb1c3dca126066720b
3
+ size 314472
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/46d01ceb7d2cf1ad9e20542382e986d1-GPTDataset-shuffle_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a9288446077edc3a2cf85ee4f0cef902811cf283bb22e91eaa289b0427ca7db
3
+ size 157296
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/48d213b6ea09663ec4b8fe09aadea1d0-GPTDataset-description.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "GPTDataset",
3
+ "path_prefix": "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document",
4
+ "num_samples": 324816,
5
+ "index_split": "valid",
6
+ "random_seed": 1234,
7
+ "sequence_length": 1024,
8
+ "split": null,
9
+ "split_matrix": null
10
+ }
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/48d213b6ea09663ec4b8fe09aadea1d0-GPTDataset-document_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb77b72fa1e026bfbc648e6df847c9919a1100ae35fd50ca62293a0769053d51
3
+ size 3320128
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/48d213b6ea09663ec4b8fe09aadea1d0-GPTDataset-sample_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d80b241c69ca9e0ff16b845b1b1f6265591ff1164be0c7f2a6741ba2511b1dd
3
+ size 2609144
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/48d213b6ea09663ec4b8fe09aadea1d0-GPTDataset-shuffle_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bfcbccee470988632a4121c64f60cb34c67d710e629d1170516e037159f2de7
3
+ size 1304632
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/592590386f9491eebb2140b0b7a5a7aa-GPTDataset-description.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "GPTDataset",
3
+ "path_prefix": "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document",
4
+ "num_samples": 32160000,
5
+ "index_split": "train",
6
+ "random_seed": 1234,
7
+ "sequence_length": 512,
8
+ "split": null,
9
+ "split_matrix": null
10
+ }
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/5fde86a30edbf3f3b847378216442630-GPTDataset-description.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "GPTDataset",
3
+ "path_prefix": "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document",
4
+ "num_samples": 643200,
5
+ "index_split": "train",
6
+ "random_seed": 1234,
7
+ "sequence_length": 512,
8
+ "split": null,
9
+ "split_matrix": null
10
+ }
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/5fde86a30edbf3f3b847378216442630-GPTDataset-document_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0348455e23ca090e51bf303077cf89f1c59054eb39bfd847009b2955c6da7a20
3
+ size 3280128
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/5fde86a30edbf3f3b847378216442630-GPTDataset-sample_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f31c6f7f94616a03e11b54dd04cbf5ed1b0da88dabe4d2f53130d8c96dd5e60
3
+ size 5155296
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/5fde86a30edbf3f3b847378216442630-GPTDataset-shuffle_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:634556f26bc65185bb848178f9d6444661384c0b2c9321b58dfcdb59460329fe
3
+ size 2577708
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/676905a6d4ca63142a18409e64a50828-GPTDataset-description.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "GPTDataset",
3
+ "path_prefix": "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document",
4
+ "num_samples": 3216,
5
+ "index_split": "test",
6
+ "random_seed": 1234,
7
+ "sequence_length": 1024,
8
+ "split": null,
9
+ "split_matrix": null
10
+ }
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/676905a6d4ca63142a18409e64a50828-GPTDataset-document_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae60cf8fc9161a5ccff34c147058dbfd2fd1c3b44ddd68fcf42619465a8570e
3
+ size 40128
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/676905a6d4ca63142a18409e64a50828-GPTDataset-sample_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68751056019b52d36647f552bf0f10341b0299a5737d7e06f31599494e403dc
3
+ size 31568
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/676905a6d4ca63142a18409e64a50828-GPTDataset-shuffle_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9b2dbdf4a1a9e6fbc7fce63ad51b5684cef1fafb381bfa21d33ade05456e1c4
3
+ size 15844
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6f81b6b1e641d082a358ab714ec3ef80-GPTDataset-description.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "GPTDataset",
3
+ "path_prefix": "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document",
4
+ "num_samples": 3216,
5
+ "index_split": "test",
6
+ "random_seed": 1234,
7
+ "sequence_length": 256,
8
+ "split": null,
9
+ "split_matrix": null
10
+ }
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6f81b6b1e641d082a358ab714ec3ef80-GPTDataset-document_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae60cf8fc9161a5ccff34c147058dbfd2fd1c3b44ddd68fcf42619465a8570e
3
+ size 40128
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6f81b6b1e641d082a358ab714ec3ef80-GPTDataset-sample_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6d07bdb6e4934dfaae5110ca54e8bf1230288c6d0531524a1e0b9787d493522
3
+ size 125864
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6f81b6b1e641d082a358ab714ec3ef80-GPTDataset-shuffle_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9253864331017f64dbb28cf31ece07df6945b178c2fd17aa8193e18a80476797
3
+ size 62992
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6fea953a1d3b9960c6ae25d0b4b50cff-GPTDataset-description.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "GPTDataset",
3
+ "path_prefix": "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document",
4
+ "num_samples": 3216,
5
+ "index_split": "test",
6
+ "random_seed": 1234,
7
+ "sequence_length": 512,
8
+ "split": null,
9
+ "split_matrix": null
10
+ }
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6fea953a1d3b9960c6ae25d0b4b50cff-GPTDataset-document_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae60cf8fc9161a5ccff34c147058dbfd2fd1c3b44ddd68fcf42619465a8570e
3
+ size 40128
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6fea953a1d3b9960c6ae25d0b4b50cff-GPTDataset-sample_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bdbba1d7493cd4c8e9d879258b79824068b00f60667f12647328483747a15bb
3
+ size 63000
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/6fea953a1d3b9960c6ae25d0b4b50cff-GPTDataset-shuffle_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a63d260926e2777d393dc245cf26ae95dc0f69e8bd34d8c3adb61703d5d10234
3
+ size 31560
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/7dad590c16d66ac471a2be17aa829a7d-GPTDataset-description.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "class": "GPTDataset",
3
+ "path_prefix": "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document",
4
+ "num_samples": 51456,
5
+ "index_split": "valid",
6
+ "random_seed": 1234,
7
+ "sequence_length": 512,
8
+ "split": null,
9
+ "split_matrix": null
10
+ }
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/7dad590c16d66ac471a2be17aa829a7d-GPTDataset-document_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:531827519c1334cec7467a1e027b2215d93cb2ff1dbbb171398c6a770b503ee9
3
+ size 280128
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/7dad590c16d66ac471a2be17aa829a7d-GPTDataset-sample_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e5c1213e055d47372cce2197b396527f98f7e73419cc0621a75a1f2e4c9f892
3
+ size 440208
llm_tutorial/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document/cache/GPTDataset_indices/7dad590c16d66ac471a2be17aa829a7d-GPTDataset-shuffle_index.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f040b32caeb937b0b42461a0237b0c0cef30a6e9c01db4efca3f6769da429712
3
+ size 220164