geoore Claude Sonnet 4.6 commited on
Commit
a0d4fce
·
1 Parent(s): 43a2621

Add data, outputs, docs, and notebooks folders

Browse files

- Track large files (*.pt, *.jsonl, *.arrow, etc.) via git-lfs
- Add training data (raw datasets, processed splits, STEM data)
- Add model checkpoints and GRPO output
- Add docs/ and notebooks/ placeholders

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (48) hide show
  1. .gitattributes +7 -0
  2. .gitignore +0 -5
  3. data/json_library/code_patterns.json +1 -0
  4. data/json_library/conversation_history.json +24 -0
  5. data/json_library/important_facts.json +22 -0
  6. data/json_library/learned_skills.json +1 -0
  7. data/json_library/project_context.json +1 -0
  8. data/json_library/user_preferences.json +1 -0
  9. data/processed/grpo_prompts.jsonl +3 -0
  10. data/processed/stats.json +6 -0
  11. data/processed/train.jsonl +3 -0
  12. data/processed/train_large.jsonl +3 -0
  13. data/processed/val.jsonl +3 -0
  14. data/processed/val_large.jsonl +3 -0
  15. data/raw/codealpaca/data-00000-of-00001.arrow +3 -0
  16. data/raw/codealpaca/dataset_info.json +52 -0
  17. data/raw/codealpaca/state.json +13 -0
  18. data/raw/codealpaca_full/data-00000-of-00001.arrow +3 -0
  19. data/raw/codealpaca_full/dataset_info.json +52 -0
  20. data/raw/codealpaca_full/state.json +13 -0
  21. data/raw/gsm8k/data-00000-of-00001.arrow +3 -0
  22. data/raw/gsm8k/dataset_info.json +52 -0
  23. data/raw/gsm8k/state.json +13 -0
  24. data/raw/gsm8k_full/data-00000-of-00001.arrow +3 -0
  25. data/raw/gsm8k_full/dataset_info.json +52 -0
  26. data/raw/gsm8k_full/state.json +13 -0
  27. data/raw/mathinstruct_50k/data-00000-of-00001.arrow +3 -0
  28. data/raw/mathinstruct_50k/dataset_info.json +46 -0
  29. data/raw/mathinstruct_50k/state.json +13 -0
  30. data/raw/openorca_50k/data-00000-of-00001.arrow +3 -0
  31. data/raw/openorca_50k/dataset_info.json +59 -0
  32. data/raw/openorca_50k/state.json +13 -0
  33. data/raw/simple_math.json +1 -0
  34. data/raw/wikitext/data-00000-of-00001.arrow +3 -0
  35. data/raw/wikitext/dataset_info.json +58 -0
  36. data/raw/wikitext/state.json +13 -0
  37. data/stem/stem_train.jsonl +3 -0
  38. data/stem/stem_val.jsonl +3 -0
  39. docs/.gitkeep +0 -0
  40. notebooks/.gitkeep +0 -0
  41. outputs/checkpoints/epoch_1.pt +3 -0
  42. outputs/final_4bit/shorekeeper-4b.pt +3 -0
  43. outputs/grpo/shorekeeper-4b-grpo.pt +3 -0
  44. outputs/shorekeeper-4b-final.pt +3 -0
  45. outputs/shorekeeper_epoch_1.pt +3 -0
  46. outputs/shorekeeper_epoch_2.pt +3 -0
  47. outputs/shorekeeper_epoch_3.pt +3 -0
  48. outputs/shorekeeper_final.pt +3 -0
.gitattributes ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ *.pt filter=lfs diff=lfs merge=lfs -text
2
+ *.pth filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
5
+ *.jsonl filter=lfs diff=lfs merge=lfs -text
6
+ *.arrow filter=lfs diff=lfs merge=lfs -text
7
+ *.parquet filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -1,12 +1,7 @@
1
  venv/
2
  .venv/
3
- outputs/
4
- data/
5
  __pycache__/
6
  *.py[cod]
7
- *.pth
8
- *.pt
9
- *.bin
10
  .env
11
  *.egg-info/
12
  dist/
 
1
  venv/
2
  .venv/
 
 
3
  __pycache__/
4
  *.py[cod]
 
 
 
5
  .env
6
  *.egg-info/
7
  dist/
data/json_library/code_patterns.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
data/json_library/conversation_history.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "b64a9728",
4
+ "timestamp": "2026-03-30T02:24:23.382877",
5
+ "content": {
6
+ "user": "Hello! What can you do?",
7
+ "assistant": "I remember our past conversations. How can I help you with: Hello! What can you do?"
8
+ },
9
+ "category": "conversation_history",
10
+ "access_count": 0,
11
+ "last_accessed": null
12
+ },
13
+ {
14
+ "id": "6519e6c2",
15
+ "timestamp": "2026-03-30T02:25:33.563521",
16
+ "content": {
17
+ "user": "exit",
18
+ "assistant": "I remember our past conversations. How can I help you with: exit"
19
+ },
20
+ "category": "conversation_history",
21
+ "access_count": 0,
22
+ "last_accessed": null
23
+ }
24
+ ]
data/json_library/important_facts.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "fb99f3c9",
4
+ "timestamp": "2026-03-30T02:23:06.121987",
5
+ "content": {
6
+ "fact": "Test fact: SHOREKEEPER is working"
7
+ },
8
+ "category": "important_facts",
9
+ "access_count": 0,
10
+ "last_accessed": null
11
+ },
12
+ {
13
+ "id": "0fa3a9e1",
14
+ "timestamp": "2026-03-30T02:23:44.130766",
15
+ "content": {
16
+ "fact": "Test fact: SHOREKEEPER is working"
17
+ },
18
+ "category": "important_facts",
19
+ "access_count": 0,
20
+ "last_accessed": null
21
+ }
22
+ ]
data/json_library/learned_skills.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
data/json_library/project_context.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
data/json_library/user_preferences.json ADDED
@@ -0,0 +1 @@
 
 
1
+ []
data/processed/grpo_prompts.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e113ebf3334858a25aa21f563456479c9e174e7ddbb48b69ec0dbc5beb150056
3
+ size 236237
data/processed/stats.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "total_examples": 1361,
3
+ "train_size": 1224,
4
+ "val_size": 137,
5
+ "grpo_prompts": 1000
6
+ }
data/processed/train.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b994e1d48172708a533641d23d00d23333ac8b1a5f6a41f3ddb5fe77426ee7e
3
+ size 388679
data/processed/train_large.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c9aa6b985d4a68d200062cb3852c03e545c274f255e8465eba6ef3435aa871e
3
+ size 173758516
data/processed/val.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c693d015c7579cbb9fd97846dc8ada7ea04da641f0f26fb97d55a2600958d0b0
3
+ size 44082
data/processed/val_large.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0303b0d1260090208d9e3e32e150f07dde16a69c23be7705e4212c106416ebe9
3
+ size 19152642
data/raw/codealpaca/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33978d77e122e41e4dd9b9e0664eb228e0852151ac9746510c3ba64236bfdad2
3
+ size 153912
data/raw/codealpaca/dataset_info.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "code_alpaca_20k",
6
+ "dataset_size": 6072584,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/HuggingFaceH4/CodeAlpaca_20k@798c567f69c8f4b12fc191015e59ee34e9afe00d/data/train-00000-of-00001.parquet": {
10
+ "num_bytes": 3008277,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/HuggingFaceH4/CodeAlpaca_20k@798c567f69c8f4b12fc191015e59ee34e9afe00d/data/test-00000-of-00001.parquet": {
14
+ "num_bytes": 336110,
15
+ "checksum": null
16
+ }
17
+ },
18
+ "download_size": 3344387,
19
+ "features": {
20
+ "prompt": {
21
+ "dtype": "string",
22
+ "_type": "Value"
23
+ },
24
+ "completion": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ }
28
+ },
29
+ "homepage": "",
30
+ "license": "",
31
+ "size_in_bytes": 9416971,
32
+ "splits": {
33
+ "train": {
34
+ "name": "train",
35
+ "num_bytes": 5463335,
36
+ "num_examples": 18019,
37
+ "dataset_name": "code_alpaca_20k"
38
+ },
39
+ "test": {
40
+ "name": "test",
41
+ "num_bytes": 609249,
42
+ "num_examples": 2003,
43
+ "dataset_name": "code_alpaca_20k"
44
+ }
45
+ },
46
+ "version": {
47
+ "version_str": "0.0.0",
48
+ "major": 0,
49
+ "minor": 0,
50
+ "patch": 0
51
+ }
52
+ }
data/raw/codealpaca/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "03390ebff9b9115e",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train[:500]"
13
+ }
data/raw/codealpaca_full/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8d67c57ea8c5b07b0676548920dfed2b8c8cd78fc9bf9df10e8f3950bb72acc
3
+ size 5469328
data/raw/codealpaca_full/dataset_info.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "code_alpaca_20k",
6
+ "dataset_size": 6072584,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/HuggingFaceH4/CodeAlpaca_20k@798c567f69c8f4b12fc191015e59ee34e9afe00d/data/train-00000-of-00001.parquet": {
10
+ "num_bytes": 3008277,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/HuggingFaceH4/CodeAlpaca_20k@798c567f69c8f4b12fc191015e59ee34e9afe00d/data/test-00000-of-00001.parquet": {
14
+ "num_bytes": 336110,
15
+ "checksum": null
16
+ }
17
+ },
18
+ "download_size": 3344387,
19
+ "features": {
20
+ "prompt": {
21
+ "dtype": "string",
22
+ "_type": "Value"
23
+ },
24
+ "completion": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ }
28
+ },
29
+ "homepage": "",
30
+ "license": "",
31
+ "size_in_bytes": 9416971,
32
+ "splits": {
33
+ "train": {
34
+ "name": "train",
35
+ "num_bytes": 5463335,
36
+ "num_examples": 18019,
37
+ "dataset_name": "code_alpaca_20k"
38
+ },
39
+ "test": {
40
+ "name": "test",
41
+ "num_bytes": 609249,
42
+ "num_examples": 2003,
43
+ "dataset_name": "code_alpaca_20k"
44
+ }
45
+ },
46
+ "version": {
47
+ "version_str": "0.0.0",
48
+ "major": 0,
49
+ "minor": 0,
50
+ "patch": 0
51
+ }
52
+ }
data/raw/codealpaca_full/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "9a3f5b2c63ed1e38",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train"
13
+ }
data/raw/gsm8k/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b770fa2bb318b721f8c7213f0eeb2598a344a1dccc026b0a5538b87d58c93e
3
+ size 527760
data/raw/gsm8k/dataset_info.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "main",
5
+ "dataset_name": "gsm8k",
6
+ "dataset_size": 4676934,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/gsm8k@740312add88f781978c0658806c59bc2815b9866/main/train-00000-of-00001.parquet": {
10
+ "num_bytes": 2306545,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/gsm8k@740312add88f781978c0658806c59bc2815b9866/main/test-00000-of-00001.parquet": {
14
+ "num_bytes": 419088,
15
+ "checksum": null
16
+ }
17
+ },
18
+ "download_size": 2725633,
19
+ "features": {
20
+ "question": {
21
+ "dtype": "string",
22
+ "_type": "Value"
23
+ },
24
+ "answer": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ }
28
+ },
29
+ "homepage": "",
30
+ "license": "",
31
+ "size_in_bytes": 7402567,
32
+ "splits": {
33
+ "train": {
34
+ "name": "train",
35
+ "num_bytes": 3963202,
36
+ "num_examples": 7473,
37
+ "dataset_name": "gsm8k"
38
+ },
39
+ "test": {
40
+ "name": "test",
41
+ "num_bytes": 713732,
42
+ "num_examples": 1319,
43
+ "dataset_name": "gsm8k"
44
+ }
45
+ },
46
+ "version": {
47
+ "version_str": "0.0.0",
48
+ "major": 0,
49
+ "minor": 0,
50
+ "patch": 0
51
+ }
52
+ }
data/raw/gsm8k/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "2286e380143bffe2",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train[:1000]"
13
+ }
data/raw/gsm8k_full/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6146bca074f4c6c5e07de7e96cc968adab3833dbdafeda21830f27558eea3d9b
3
+ size 3965528
data/raw/gsm8k_full/dataset_info.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "main",
5
+ "dataset_name": "gsm8k",
6
+ "dataset_size": 4676934,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/gsm8k@740312add88f781978c0658806c59bc2815b9866/main/train-00000-of-00001.parquet": {
10
+ "num_bytes": 2306545,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/gsm8k@740312add88f781978c0658806c59bc2815b9866/main/test-00000-of-00001.parquet": {
14
+ "num_bytes": 419088,
15
+ "checksum": null
16
+ }
17
+ },
18
+ "download_size": 2725633,
19
+ "features": {
20
+ "question": {
21
+ "dtype": "string",
22
+ "_type": "Value"
23
+ },
24
+ "answer": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ }
28
+ },
29
+ "homepage": "",
30
+ "license": "",
31
+ "size_in_bytes": 7402567,
32
+ "splits": {
33
+ "train": {
34
+ "name": "train",
35
+ "num_bytes": 3963202,
36
+ "num_examples": 7473,
37
+ "dataset_name": "gsm8k"
38
+ },
39
+ "test": {
40
+ "name": "test",
41
+ "num_bytes": 713732,
42
+ "num_examples": 1319,
43
+ "dataset_name": "gsm8k"
44
+ }
45
+ },
46
+ "version": {
47
+ "version_str": "0.0.0",
48
+ "major": 0,
49
+ "minor": 0,
50
+ "patch": 0
51
+ }
52
+ }
data/raw/gsm8k_full/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "097157d8187d2b97",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train"
13
+ }
data/raw/mathinstruct_50k/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7290bfbf206127ee2eaed048b4ff91d001f331124a733b13338ca92c99c6de88
3
+ size 36119416
data/raw/mathinstruct_50k/dataset_info.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "json",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "math_instruct",
6
+ "dataset_size": 188742872,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/TIGER-Lab/MathInstruct@b4fdc323a7be1379c9c7c0b67b1de72dfee2111a/MathInstruct.json": {
10
+ "num_bytes": 212488891,
11
+ "checksum": null
12
+ }
13
+ },
14
+ "download_size": 212488891,
15
+ "features": {
16
+ "source": {
17
+ "dtype": "string",
18
+ "_type": "Value"
19
+ },
20
+ "output": {
21
+ "dtype": "string",
22
+ "_type": "Value"
23
+ },
24
+ "instruction": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ }
28
+ },
29
+ "homepage": "",
30
+ "license": "",
31
+ "size_in_bytes": 401231763,
32
+ "splits": {
33
+ "train": {
34
+ "name": "train",
35
+ "num_bytes": 188742872,
36
+ "num_examples": 262039,
37
+ "dataset_name": "math_instruct"
38
+ }
39
+ },
40
+ "version": {
41
+ "version_str": "0.0.0",
42
+ "major": 0,
43
+ "minor": 0,
44
+ "patch": 0
45
+ }
46
+ }
data/raw/mathinstruct_50k/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "5f2d23d7ec624632",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train[:50000]"
13
+ }
data/raw/openorca_50k/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc6d5cd1f0bd0bceb6d0a973544c42c83382bee7556616ef758cbbdf69690865
3
+ size 90351984
data/raw/openorca_50k/dataset_info.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "default",
5
+ "dataset_name": "open_orca",
6
+ "dataset_size": 7221299707,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/Open-Orca/OpenOrca@e9c87b4abb2609913751f9b26553fdb9c061796c/1M-GPT4-Augmented.parquet": {
10
+ "num_bytes": 1008442855,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/Open-Orca/OpenOrca@e9c87b4abb2609913751f9b26553fdb9c061796c/3_5M-GPT3_5-Augmented.parquet": {
14
+ "num_bytes": 3090560834,
15
+ "checksum": null
16
+ }
17
+ },
18
+ "download_size": 4099003689,
19
+ "features": {
20
+ "id": {
21
+ "dtype": "string",
22
+ "_type": "Value"
23
+ },
24
+ "system_prompt": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ },
28
+ "question": {
29
+ "dtype": "string",
30
+ "_type": "Value"
31
+ },
32
+ "response": {
33
+ "dtype": "string",
34
+ "_type": "Value"
35
+ }
36
+ },
37
+ "homepage": "",
38
+ "license": "",
39
+ "size_in_bytes": 11320303396,
40
+ "splits": {
41
+ "train": {
42
+ "name": "train",
43
+ "num_bytes": 7221299707,
44
+ "num_examples": 4233923,
45
+ "shard_lengths": [
46
+ 994896,
47
+ 1947133,
48
+ 1291894
49
+ ],
50
+ "dataset_name": "open_orca"
51
+ }
52
+ },
53
+ "version": {
54
+ "version_str": "0.0.0",
55
+ "major": 0,
56
+ "minor": 0,
57
+ "patch": 0
58
+ }
59
+ }
data/raw/openorca_50k/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "d893d5f4d0bf5ad0",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train[:50000]"
13
+ }
data/raw/simple_math.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"question": "What is 1 + 1?", "answer": "2"}, {"question": "What is 1 + 2?", "answer": "3"}, {"question": "What is 1 + 3?", "answer": "4"}, {"question": "What is 1 + 4?", "answer": "5"}, {"question": "What is 1 + 5?", "answer": "6"}, {"question": "What is 1 + 6?", "answer": "7"}, {"question": "What is 1 + 7?", "answer": "8"}, {"question": "What is 1 + 8?", "answer": "9"}, {"question": "What is 1 + 9?", "answer": "10"}, {"question": "What is 1 + 10?", "answer": "11"}, {"question": "What is 1 + 11?", "answer": "12"}, {"question": "What is 1 + 12?", "answer": "13"}, {"question": "What is 1 + 13?", "answer": "14"}, {"question": "What is 1 + 14?", "answer": "15"}, {"question": "What is 1 + 15?", "answer": "16"}, {"question": "What is 1 + 16?", "answer": "17"}, {"question": "What is 1 + 17?", "answer": "18"}, {"question": "What is 1 + 18?", "answer": "19"}, {"question": "What is 1 + 19?", "answer": "20"}, {"question": "What is 2 + 1?", "answer": "3"}, {"question": "What is 2 + 2?", "answer": "4"}, {"question": "What is 2 + 3?", "answer": "5"}, {"question": "What is 2 + 4?", "answer": "6"}, {"question": "What is 2 + 5?", "answer": "7"}, {"question": "What is 2 + 6?", "answer": "8"}, {"question": "What is 2 + 7?", "answer": "9"}, {"question": "What is 2 + 8?", "answer": "10"}, {"question": "What is 2 + 9?", "answer": "11"}, {"question": "What is 2 + 10?", "answer": "12"}, {"question": "What is 2 + 11?", "answer": "13"}, {"question": "What is 2 + 12?", "answer": "14"}, {"question": "What is 2 + 13?", "answer": "15"}, {"question": "What is 2 + 14?", "answer": "16"}, {"question": "What is 2 + 15?", "answer": "17"}, {"question": "What is 2 + 16?", "answer": "18"}, {"question": "What is 2 + 17?", "answer": "19"}, {"question": "What is 2 + 18?", "answer": "20"}, {"question": "What is 2 + 19?", "answer": "21"}, {"question": "What is 3 + 1?", "answer": "4"}, {"question": "What is 3 + 2?", "answer": "5"}, {"question": "What is 3 + 3?", "answer": "6"}, {"question": "What is 3 + 4?", "answer": "7"}, {"question": "What is 3 + 5?", "answer": "8"}, {"question": "What is 3 + 6?", "answer": "9"}, {"question": "What is 3 + 7?", "answer": "10"}, {"question": "What is 3 + 8?", "answer": "11"}, {"question": "What is 3 + 9?", "answer": "12"}, {"question": "What is 3 + 10?", "answer": "13"}, {"question": "What is 3 + 11?", "answer": "14"}, {"question": "What is 3 + 12?", "answer": "15"}, {"question": "What is 3 + 13?", "answer": "16"}, {"question": "What is 3 + 14?", "answer": "17"}, {"question": "What is 3 + 15?", "answer": "18"}, {"question": "What is 3 + 16?", "answer": "19"}, {"question": "What is 3 + 17?", "answer": "20"}, {"question": "What is 3 + 18?", "answer": "21"}, {"question": "What is 3 + 19?", "answer": "22"}, {"question": "What is 4 + 1?", "answer": "5"}, {"question": "What is 4 + 2?", "answer": "6"}, {"question": "What is 4 + 3?", "answer": "7"}, {"question": "What is 4 + 4?", "answer": "8"}, {"question": "What is 4 + 5?", "answer": "9"}, {"question": "What is 4 + 6?", "answer": "10"}, {"question": "What is 4 + 7?", "answer": "11"}, {"question": "What is 4 + 8?", "answer": "12"}, {"question": "What is 4 + 9?", "answer": "13"}, {"question": "What is 4 + 10?", "answer": "14"}, {"question": "What is 4 + 11?", "answer": "15"}, {"question": "What is 4 + 12?", "answer": "16"}, {"question": "What is 4 + 13?", "answer": "17"}, {"question": "What is 4 + 14?", "answer": "18"}, {"question": "What is 4 + 15?", "answer": "19"}, {"question": "What is 4 + 16?", "answer": "20"}, {"question": "What is 4 + 17?", "answer": "21"}, {"question": "What is 4 + 18?", "answer": "22"}, {"question": "What is 4 + 19?", "answer": "23"}, {"question": "What is 5 + 1?", "answer": "6"}, {"question": "What is 5 + 2?", "answer": "7"}, {"question": "What is 5 + 3?", "answer": "8"}, {"question": "What is 5 + 4?", "answer": "9"}, {"question": "What is 5 + 5?", "answer": "10"}, {"question": "What is 5 + 6?", "answer": "11"}, {"question": "What is 5 + 7?", "answer": "12"}, {"question": "What is 5 + 8?", "answer": "13"}, {"question": "What is 5 + 9?", "answer": "14"}, {"question": "What is 5 + 10?", "answer": "15"}, {"question": "What is 5 + 11?", "answer": "16"}, {"question": "What is 5 + 12?", "answer": "17"}, {"question": "What is 5 + 13?", "answer": "18"}, {"question": "What is 5 + 14?", "answer": "19"}, {"question": "What is 5 + 15?", "answer": "20"}, {"question": "What is 5 + 16?", "answer": "21"}, {"question": "What is 5 + 17?", "answer": "22"}, {"question": "What is 5 + 18?", "answer": "23"}, {"question": "What is 5 + 19?", "answer": "24"}, {"question": "What is 6 + 1?", "answer": "7"}, {"question": "What is 6 + 2?", "answer": "8"}, {"question": "What is 6 + 3?", "answer": "9"}, {"question": "What is 6 + 4?", "answer": "10"}, {"question": "What is 6 + 5?", "answer": "11"}, {"question": "What is 6 + 6?", "answer": "12"}, {"question": "What is 6 + 7?", "answer": "13"}, {"question": "What is 6 + 8?", "answer": "14"}, {"question": "What is 6 + 9?", "answer": "15"}, {"question": "What is 6 + 10?", "answer": "16"}, {"question": "What is 6 + 11?", "answer": "17"}, {"question": "What is 6 + 12?", "answer": "18"}, {"question": "What is 6 + 13?", "answer": "19"}, {"question": "What is 6 + 14?", "answer": "20"}, {"question": "What is 6 + 15?", "answer": "21"}, {"question": "What is 6 + 16?", "answer": "22"}, {"question": "What is 6 + 17?", "answer": "23"}, {"question": "What is 6 + 18?", "answer": "24"}, {"question": "What is 6 + 19?", "answer": "25"}, {"question": "What is 7 + 1?", "answer": "8"}, {"question": "What is 7 + 2?", "answer": "9"}, {"question": "What is 7 + 3?", "answer": "10"}, {"question": "What is 7 + 4?", "answer": "11"}, {"question": "What is 7 + 5?", "answer": "12"}, {"question": "What is 7 + 6?", "answer": "13"}, {"question": "What is 7 + 7?", "answer": "14"}, {"question": "What is 7 + 8?", "answer": "15"}, {"question": "What is 7 + 9?", "answer": "16"}, {"question": "What is 7 + 10?", "answer": "17"}, {"question": "What is 7 + 11?", "answer": "18"}, {"question": "What is 7 + 12?", "answer": "19"}, {"question": "What is 7 + 13?", "answer": "20"}, {"question": "What is 7 + 14?", "answer": "21"}, {"question": "What is 7 + 15?", "answer": "22"}, {"question": "What is 7 + 16?", "answer": "23"}, {"question": "What is 7 + 17?", "answer": "24"}, {"question": "What is 7 + 18?", "answer": "25"}, {"question": "What is 7 + 19?", "answer": "26"}, {"question": "What is 8 + 1?", "answer": "9"}, {"question": "What is 8 + 2?", "answer": "10"}, {"question": "What is 8 + 3?", "answer": "11"}, {"question": "What is 8 + 4?", "answer": "12"}, {"question": "What is 8 + 5?", "answer": "13"}, {"question": "What is 8 + 6?", "answer": "14"}, {"question": "What is 8 + 7?", "answer": "15"}, {"question": "What is 8 + 8?", "answer": "16"}, {"question": "What is 8 + 9?", "answer": "17"}, {"question": "What is 8 + 10?", "answer": "18"}, {"question": "What is 8 + 11?", "answer": "19"}, {"question": "What is 8 + 12?", "answer": "20"}, {"question": "What is 8 + 13?", "answer": "21"}, {"question": "What is 8 + 14?", "answer": "22"}, {"question": "What is 8 + 15?", "answer": "23"}, {"question": "What is 8 + 16?", "answer": "24"}, {"question": "What is 8 + 17?", "answer": "25"}, {"question": "What is 8 + 18?", "answer": "26"}, {"question": "What is 8 + 19?", "answer": "27"}, {"question": "What is 9 + 1?", "answer": "10"}, {"question": "What is 9 + 2?", "answer": "11"}, {"question": "What is 9 + 3?", "answer": "12"}, {"question": "What is 9 + 4?", "answer": "13"}, {"question": "What is 9 + 5?", "answer": "14"}, {"question": "What is 9 + 6?", "answer": "15"}, {"question": "What is 9 + 7?", "answer": "16"}, {"question": "What is 9 + 8?", "answer": "17"}, {"question": "What is 9 + 9?", "answer": "18"}, {"question": "What is 9 + 10?", "answer": "19"}, {"question": "What is 9 + 11?", "answer": "20"}, {"question": "What is 9 + 12?", "answer": "21"}, {"question": "What is 9 + 13?", "answer": "22"}, {"question": "What is 9 + 14?", "answer": "23"}, {"question": "What is 9 + 15?", "answer": "24"}, {"question": "What is 9 + 16?", "answer": "25"}, {"question": "What is 9 + 17?", "answer": "26"}, {"question": "What is 9 + 18?", "answer": "27"}, {"question": "What is 9 + 19?", "answer": "28"}, {"question": "What is 10 + 1?", "answer": "11"}, {"question": "What is 10 + 2?", "answer": "12"}, {"question": "What is 10 + 3?", "answer": "13"}, {"question": "What is 10 + 4?", "answer": "14"}, {"question": "What is 10 + 5?", "answer": "15"}, {"question": "What is 10 + 6?", "answer": "16"}, {"question": "What is 10 + 7?", "answer": "17"}, {"question": "What is 10 + 8?", "answer": "18"}, {"question": "What is 10 + 9?", "answer": "19"}, {"question": "What is 10 + 10?", "answer": "20"}, {"question": "What is 10 + 11?", "answer": "21"}, {"question": "What is 10 + 12?", "answer": "22"}, {"question": "What is 10 + 13?", "answer": "23"}, {"question": "What is 10 + 14?", "answer": "24"}, {"question": "What is 10 + 15?", "answer": "25"}, {"question": "What is 10 + 16?", "answer": "26"}, {"question": "What is 10 + 17?", "answer": "27"}, {"question": "What is 10 + 18?", "answer": "28"}, {"question": "What is 10 + 19?", "answer": "29"}, {"question": "What is 11 + 1?", "answer": "12"}, {"question": "What is 11 + 2?", "answer": "13"}, {"question": "What is 11 + 3?", "answer": "14"}, {"question": "What is 11 + 4?", "answer": "15"}, {"question": "What is 11 + 5?", "answer": "16"}, {"question": "What is 11 + 6?", "answer": "17"}, {"question": "What is 11 + 7?", "answer": "18"}, {"question": "What is 11 + 8?", "answer": "19"}, {"question": "What is 11 + 9?", "answer": "20"}, {"question": "What is 11 + 10?", "answer": "21"}, {"question": "What is 11 + 11?", "answer": "22"}, {"question": "What is 11 + 12?", "answer": "23"}, {"question": "What is 11 + 13?", "answer": "24"}, {"question": "What is 11 + 14?", "answer": "25"}, {"question": "What is 11 + 15?", "answer": "26"}, {"question": "What is 11 + 16?", "answer": "27"}, {"question": "What is 11 + 17?", "answer": "28"}, {"question": "What is 11 + 18?", "answer": "29"}, {"question": "What is 11 + 19?", "answer": "30"}, {"question": "What is 12 + 1?", "answer": "13"}, {"question": "What is 12 + 2?", "answer": "14"}, {"question": "What is 12 + 3?", "answer": "15"}, {"question": "What is 12 + 4?", "answer": "16"}, {"question": "What is 12 + 5?", "answer": "17"}, {"question": "What is 12 + 6?", "answer": "18"}, {"question": "What is 12 + 7?", "answer": "19"}, {"question": "What is 12 + 8?", "answer": "20"}, {"question": "What is 12 + 9?", "answer": "21"}, {"question": "What is 12 + 10?", "answer": "22"}, {"question": "What is 12 + 11?", "answer": "23"}, {"question": "What is 12 + 12?", "answer": "24"}, {"question": "What is 12 + 13?", "answer": "25"}, {"question": "What is 12 + 14?", "answer": "26"}, {"question": "What is 12 + 15?", "answer": "27"}, {"question": "What is 12 + 16?", "answer": "28"}, {"question": "What is 12 + 17?", "answer": "29"}, {"question": "What is 12 + 18?", "answer": "30"}, {"question": "What is 12 + 19?", "answer": "31"}, {"question": "What is 13 + 1?", "answer": "14"}, {"question": "What is 13 + 2?", "answer": "15"}, {"question": "What is 13 + 3?", "answer": "16"}, {"question": "What is 13 + 4?", "answer": "17"}, {"question": "What is 13 + 5?", "answer": "18"}, {"question": "What is 13 + 6?", "answer": "19"}, {"question": "What is 13 + 7?", "answer": "20"}, {"question": "What is 13 + 8?", "answer": "21"}, {"question": "What is 13 + 9?", "answer": "22"}, {"question": "What is 13 + 10?", "answer": "23"}, {"question": "What is 13 + 11?", "answer": "24"}, {"question": "What is 13 + 12?", "answer": "25"}, {"question": "What is 13 + 13?", "answer": "26"}, {"question": "What is 13 + 14?", "answer": "27"}, {"question": "What is 13 + 15?", "answer": "28"}, {"question": "What is 13 + 16?", "answer": "29"}, {"question": "What is 13 + 17?", "answer": "30"}, {"question": "What is 13 + 18?", "answer": "31"}, {"question": "What is 13 + 19?", "answer": "32"}, {"question": "What is 14 + 1?", "answer": "15"}, {"question": "What is 14 + 2?", "answer": "16"}, {"question": "What is 14 + 3?", "answer": "17"}, {"question": "What is 14 + 4?", "answer": "18"}, {"question": "What is 14 + 5?", "answer": "19"}, {"question": "What is 14 + 6?", "answer": "20"}, {"question": "What is 14 + 7?", "answer": "21"}, {"question": "What is 14 + 8?", "answer": "22"}, {"question": "What is 14 + 9?", "answer": "23"}, {"question": "What is 14 + 10?", "answer": "24"}, {"question": "What is 14 + 11?", "answer": "25"}, {"question": "What is 14 + 12?", "answer": "26"}, {"question": "What is 14 + 13?", "answer": "27"}, {"question": "What is 14 + 14?", "answer": "28"}, {"question": "What is 14 + 15?", "answer": "29"}, {"question": "What is 14 + 16?", "answer": "30"}, {"question": "What is 14 + 17?", "answer": "31"}, {"question": "What is 14 + 18?", "answer": "32"}, {"question": "What is 14 + 19?", "answer": "33"}, {"question": "What is 15 + 1?", "answer": "16"}, {"question": "What is 15 + 2?", "answer": "17"}, {"question": "What is 15 + 3?", "answer": "18"}, {"question": "What is 15 + 4?", "answer": "19"}, {"question": "What is 15 + 5?", "answer": "20"}, {"question": "What is 15 + 6?", "answer": "21"}, {"question": "What is 15 + 7?", "answer": "22"}, {"question": "What is 15 + 8?", "answer": "23"}, {"question": "What is 15 + 9?", "answer": "24"}, {"question": "What is 15 + 10?", "answer": "25"}, {"question": "What is 15 + 11?", "answer": "26"}, {"question": "What is 15 + 12?", "answer": "27"}, {"question": "What is 15 + 13?", "answer": "28"}, {"question": "What is 15 + 14?", "answer": "29"}, {"question": "What is 15 + 15?", "answer": "30"}, {"question": "What is 15 + 16?", "answer": "31"}, {"question": "What is 15 + 17?", "answer": "32"}, {"question": "What is 15 + 18?", "answer": "33"}, {"question": "What is 15 + 19?", "answer": "34"}, {"question": "What is 16 + 1?", "answer": "17"}, {"question": "What is 16 + 2?", "answer": "18"}, {"question": "What is 16 + 3?", "answer": "19"}, {"question": "What is 16 + 4?", "answer": "20"}, {"question": "What is 16 + 5?", "answer": "21"}, {"question": "What is 16 + 6?", "answer": "22"}, {"question": "What is 16 + 7?", "answer": "23"}, {"question": "What is 16 + 8?", "answer": "24"}, {"question": "What is 16 + 9?", "answer": "25"}, {"question": "What is 16 + 10?", "answer": "26"}, {"question": "What is 16 + 11?", "answer": "27"}, {"question": "What is 16 + 12?", "answer": "28"}, {"question": "What is 16 + 13?", "answer": "29"}, {"question": "What is 16 + 14?", "answer": "30"}, {"question": "What is 16 + 15?", "answer": "31"}, {"question": "What is 16 + 16?", "answer": "32"}, {"question": "What is 16 + 17?", "answer": "33"}, {"question": "What is 16 + 18?", "answer": "34"}, {"question": "What is 16 + 19?", "answer": "35"}, {"question": "What is 17 + 1?", "answer": "18"}, {"question": "What is 17 + 2?", "answer": "19"}, {"question": "What is 17 + 3?", "answer": "20"}, {"question": "What is 17 + 4?", "answer": "21"}, {"question": "What is 17 + 5?", "answer": "22"}, {"question": "What is 17 + 6?", "answer": "23"}, {"question": "What is 17 + 7?", "answer": "24"}, {"question": "What is 17 + 8?", "answer": "25"}, {"question": "What is 17 + 9?", "answer": "26"}, {"question": "What is 17 + 10?", "answer": "27"}, {"question": "What is 17 + 11?", "answer": "28"}, {"question": "What is 17 + 12?", "answer": "29"}, {"question": "What is 17 + 13?", "answer": "30"}, {"question": "What is 17 + 14?", "answer": "31"}, {"question": "What is 17 + 15?", "answer": "32"}, {"question": "What is 17 + 16?", "answer": "33"}, {"question": "What is 17 + 17?", "answer": "34"}, {"question": "What is 17 + 18?", "answer": "35"}, {"question": "What is 17 + 19?", "answer": "36"}, {"question": "What is 18 + 1?", "answer": "19"}, {"question": "What is 18 + 2?", "answer": "20"}, {"question": "What is 18 + 3?", "answer": "21"}, {"question": "What is 18 + 4?", "answer": "22"}, {"question": "What is 18 + 5?", "answer": "23"}, {"question": "What is 18 + 6?", "answer": "24"}, {"question": "What is 18 + 7?", "answer": "25"}, {"question": "What is 18 + 8?", "answer": "26"}, {"question": "What is 18 + 9?", "answer": "27"}, {"question": "What is 18 + 10?", "answer": "28"}, {"question": "What is 18 + 11?", "answer": "29"}, {"question": "What is 18 + 12?", "answer": "30"}, {"question": "What is 18 + 13?", "answer": "31"}, {"question": "What is 18 + 14?", "answer": "32"}, {"question": "What is 18 + 15?", "answer": "33"}, {"question": "What is 18 + 16?", "answer": "34"}, {"question": "What is 18 + 17?", "answer": "35"}, {"question": "What is 18 + 18?", "answer": "36"}, {"question": "What is 18 + 19?", "answer": "37"}, {"question": "What is 19 + 1?", "answer": "20"}, {"question": "What is 19 + 2?", "answer": "21"}, {"question": "What is 19 + 3?", "answer": "22"}, {"question": "What is 19 + 4?", "answer": "23"}, {"question": "What is 19 + 5?", "answer": "24"}, {"question": "What is 19 + 6?", "answer": "25"}, {"question": "What is 19 + 7?", "answer": "26"}, {"question": "What is 19 + 8?", "answer": "27"}, {"question": "What is 19 + 9?", "answer": "28"}, {"question": "What is 19 + 10?", "answer": "29"}, {"question": "What is 19 + 11?", "answer": "30"}, {"question": "What is 19 + 12?", "answer": "31"}, {"question": "What is 19 + 13?", "answer": "32"}, {"question": "What is 19 + 14?", "answer": "33"}, {"question": "What is 19 + 15?", "answer": "34"}, {"question": "What is 19 + 16?", "answer": "35"}, {"question": "What is 19 + 17?", "answer": "36"}, {"question": "What is 19 + 18?", "answer": "37"}, {"question": "What is 19 + 19?", "answer": "38"}]
data/raw/wikitext/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57947bc7b58df4b19662c0609cc30651bc84328dab5fd588860b752072911789
3
+ size 11068328
data/raw/wikitext/dataset_info.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builder_name": "parquet",
3
+ "citation": "",
4
+ "config_name": "wikitext-2-raw-v1",
5
+ "dataset_name": "wikitext",
6
+ "dataset_size": 13526093,
7
+ "description": "",
8
+ "download_checksums": {
9
+ "hf://datasets/wikitext@b08601e04326c79dfdd32d625aee71d232d685c3/wikitext-2-raw-v1/test-00000-of-00001.parquet": {
10
+ "num_bytes": 732610,
11
+ "checksum": null
12
+ },
13
+ "hf://datasets/wikitext@b08601e04326c79dfdd32d625aee71d232d685c3/wikitext-2-raw-v1/train-00000-of-00001.parquet": {
14
+ "num_bytes": 6357543,
15
+ "checksum": null
16
+ },
17
+ "hf://datasets/wikitext@b08601e04326c79dfdd32d625aee71d232d685c3/wikitext-2-raw-v1/validation-00000-of-00001.parquet": {
18
+ "num_bytes": 657209,
19
+ "checksum": null
20
+ }
21
+ },
22
+ "download_size": 7747362,
23
+ "features": {
24
+ "text": {
25
+ "dtype": "string",
26
+ "_type": "Value"
27
+ }
28
+ },
29
+ "homepage": "",
30
+ "license": "",
31
+ "size_in_bytes": 21273455,
32
+ "splits": {
33
+ "test": {
34
+ "name": "test",
35
+ "num_bytes": 1305088,
36
+ "num_examples": 4358,
37
+ "dataset_name": "wikitext"
38
+ },
39
+ "train": {
40
+ "name": "train",
41
+ "num_bytes": 11061717,
42
+ "num_examples": 36718,
43
+ "dataset_name": "wikitext"
44
+ },
45
+ "validation": {
46
+ "name": "validation",
47
+ "num_bytes": 1159288,
48
+ "num_examples": 3760,
49
+ "dataset_name": "wikitext"
50
+ }
51
+ },
52
+ "version": {
53
+ "version_str": "0.0.0",
54
+ "major": 0,
55
+ "minor": 0,
56
+ "patch": 0
57
+ }
58
+ }
data/raw/wikitext/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "7c4dea6941cc4a0a",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": "train"
13
+ }
data/stem/stem_train.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9273a08acbc7c35740813694f1f2705703f33beb09be8aedb3faf7384adfc340
3
+ size 646826787
data/stem/stem_val.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3597f1551001dc80acdf2fb156d451b98c80055faecbd95273ddd4c8fc25930
3
+ size 39374834
docs/.gitkeep ADDED
File without changes
notebooks/.gitkeep ADDED
File without changes
outputs/checkpoints/epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:927816b86d7b8452b9da6c693e105a6ded8199376996ff336c388c3351ed3a38
3
+ size 4572887957
outputs/final_4bit/shorekeeper-4b.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3264cb52109436ec263792bdacf4b987bfad24977c4afd55668c39e4c885e926
3
+ size 735146232
outputs/grpo/shorekeeper-4b-grpo.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:209e4e114f3b53b3537a076b6f90a573ea0ced763a6da19e9fe12010be4a44b9
3
+ size 1524313515
outputs/shorekeeper-4b-final.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04471d1cab1d4fd82f30cfb2932acb936f557562c6e9ea2c14f3c971f16cf90
3
+ size 1524313585
outputs/shorekeeper_epoch_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bb6cc9d1795522a29a51e1a2d3d4cc76bdf86bca40a2fb9843457d4854c1078
3
+ size 1524313515
outputs/shorekeeper_epoch_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a42ee999d895fa0f3801fc71a4caaa76581f468b654bc8bc650fee05c73c165
3
+ size 1524313515
outputs/shorekeeper_epoch_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e86ed5d4983b66bc6d829848cac2ae7340f45761d6490df63fed488736bee8d
3
+ size 1524313515
outputs/shorekeeper_final.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4abc6b966fed96baaec91e4fe51b01fc0f46a5777f4e530068248d147d4a9c66
3
+ size 1524313375