Add files using upload-large-folder tool
Browse files- delta_net-1.3B-100B/.gitattributes +35 -0
- delta_net-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_drop.jsonl +0 -0
- delta_net-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_drop.jsonl +0 -0
- delta_net-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_swde.jsonl +0 -0
- delta_net-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_swde.jsonl +0 -0
- delta_net-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_triviaqa.jsonl +0 -0
- delta_net-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_triviaqa.jsonl +0 -0
- gla-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_drop.jsonl +0 -0
- gla-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_drop.jsonl +0 -0
- gla-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_swde.jsonl +0 -0
- gla-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_swde.jsonl +0 -0
- gla-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_triviaqa.jsonl +0 -0
- hgrn2-1.3B-100B/.hfd/last_download_command +1 -0
- hgrn2-1.3B-100B/.hfd/repo_metadata.json +1 -0
- hgrn2-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_drop.jsonl +0 -0
- hgrn2-1.3B-100B/based_squad/results.json +55 -0
- hgrn2-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_swde.jsonl +0 -0
- hgrn2-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_triviaqa.jsonl +0 -0
- hgrn2-1.3B-100B/based_triviaqa/results.json +55 -0
- mamba-1.3B-100B/.hfd/aria2c_urls.txt +0 -0
- mamba-1.3B-100B/.hfd/last_download_command +1 -0
- mamba-1.3B-100B/.hfd/repo_metadata.json +1 -0
- retnet-1.3B-100B/.hfd/aria2c_urls.txt +0 -0
- retnet-1.3B-100B/.hfd/last_download_command +1 -0
- retnet-1.3B-100B/.hfd/repo_metadata.json +1 -0
- retnet-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__download_model__retnet-1.3B-100B_based_drop.jsonl +0 -0
- retnet-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__retnet-1.3B-100B_based_drop.jsonl +0 -0
- retnet-1.3B-100B/based_drop/results.json +51 -0
- retnet-1.3B-100B/based_fda/results.json +51 -0
- retnet-1.3B-100B/based_nq_2048/results.json +51 -0
- retnet-1.3B-100B/based_squad/results.json +55 -0
- retnet-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__download_model__retnet-1.3B-100B_based_swde.jsonl +0 -0
- retnet-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__retnet-1.3B-100B_based_swde.jsonl +0 -0
- retnet-1.3B-100B/based_swde/results.json +51 -0
- retnet-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__download_model__retnet-1.3B-100B_based_triviaqa.jsonl +0 -0
- retnet-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__retnet-1.3B-100B_based_triviaqa.jsonl +0 -0
- retnet-1.3B-100B/based_triviaqa/results.json +55 -0
- transformer-1.3B-100B/.hfd/aria2c_urls.txt +0 -0
- transformer-1.3B-100B/.hfd/last_download_command +1 -0
- transformer-1.3B-100B/.hfd/repo_metadata.json +1 -0
- transformer-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__download_model__transformer-1.3B-100B_based_drop.jsonl +0 -0
- transformer-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__transformer-1.3B-100B_based_drop.jsonl +0 -0
- transformer-1.3B-100B/based_drop/results.json +51 -0
- transformer-1.3B-100B/based_fda/results.json +51 -0
- transformer-1.3B-100B/based_nq_2048/results.json +51 -0
- transformer-1.3B-100B/based_squad/results.json +55 -0
- transformer-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__download_model__transformer-1.3B-100B_based_swde.jsonl +0 -0
- transformer-1.3B-100B/based_swde/results.json +51 -0
- transformer-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__download_model__transformer-1.3B-100B_based_triviaqa.jsonl +0 -0
- transformer-1.3B-100B/tokenizer.json +0 -0
delta_net-1.3B-100B/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
delta_net-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_drop.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
delta_net-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_drop.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
delta_net-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_swde.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
delta_net-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_swde.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
delta_net-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__delta_net-1.3B-100B_based_triviaqa.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
delta_net-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__download_model__delta_net-1.3B-100B_based_triviaqa.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gla-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_drop.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gla-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_drop.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gla-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_swde.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gla-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__gla-1.3B-100B_based_swde.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gla-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__download_model__gla-1.3B-100B_based_triviaqa.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hgrn2-1.3B-100B/.hfd/last_download_command
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
REPO_ID=fla-hub/hgrn2-1.3B-100B TOOL=aria2c INCLUDE_PATTERNS= EXCLUDE_PATTERNS= DATASET=0 HF_USERNAME= HF_TOKEN= HF_TOKEN=https://huggingface.co REVISION=main
|
hgrn2-1.3B-100B/.hfd/repo_metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_id":"661e7a0396752f96298054a6","id":"fla-hub/hgrn2-1.3B-100B","private":false,"pipeline_tag":"text-generation","library_name":"fla","tags":["fla","safetensors","hgrn2","text-generation","en","dataset:cerebras/SlimPajama-627B","license:mit","region:us"],"downloads":17,"likes":0,"modelId":"fla-hub/hgrn2-1.3B-100B","author":"fla-hub","sha":"2f413dd9b63591b9b177bbf940942ea7eb70abfe","lastModified":"2025-02-09T15:21:37.000Z","gated":false,"disabled":false,"widgetData":[{"text":"My name is Julien and I like to"},{"text":"I like traveling by train because"},{"text":"Paris is an amazing place to visit,"},{"text":"Once upon a time,"}],"model-index":null,"config":{"architectures":["HGRN2ForCausalLM"],"model_type":"hgrn2","tokenizer_config":{"bos_token":"<s>","eos_token":"</s>","pad_token":null,"unk_token":"<unk>","use_default_system_prompt":false}},"cardData":{"language":["en"],"tags":["text-generation","hgrn2"],"license":"mit","datasets":["cerebras/SlimPajama-627B"],"library_name":"fla"},"siblings":[{"rfilename":".gitattributes"},{"rfilename":"README.md"},{"rfilename":"config.json"},{"rfilename":"generation_config.json"},{"rfilename":"model.safetensors"},{"rfilename":"special_tokens_map.json"},{"rfilename":"tokenizer.json"},{"rfilename":"tokenizer.model"},{"rfilename":"tokenizer_config.json"}],"spaces":[],"createdAt":"2024-04-16T13:15:47.000Z","safetensors":{"parameters":{"BF16":1364396032},"total":1364396032},"usedStorage":5886756059}
|
hgrn2-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_drop.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hgrn2-1.3B-100B/based_squad/results.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_squad": {
|
| 4 |
+
"exact,none": 3.459858918374202,
|
| 5 |
+
"exact_stderr,none": "N/A",
|
| 6 |
+
"f1,none": 13.328299517219987,
|
| 7 |
+
"f1_stderr,none": "N/A",
|
| 8 |
+
"contains,none": 0.32885455156197513,
|
| 9 |
+
"contains_stderr,none": "N/A",
|
| 10 |
+
"alias": "based_squad"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"configs": {
|
| 14 |
+
"based_squad": {
|
| 15 |
+
"description": "",
|
| 16 |
+
"target_delimiter": " ",
|
| 17 |
+
"fewshot_delimiter": "\n\n",
|
| 18 |
+
"num_fewshot": 0,
|
| 19 |
+
"output_type": "generate_until",
|
| 20 |
+
"generation_kwargs": {
|
| 21 |
+
"until": [
|
| 22 |
+
"\n\n"
|
| 23 |
+
],
|
| 24 |
+
"do_sample": false
|
| 25 |
+
},
|
| 26 |
+
"repeats": 1,
|
| 27 |
+
"should_decontaminate": false,
|
| 28 |
+
"metadata": {
|
| 29 |
+
"version": 0
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"versions": {
|
| 34 |
+
"based_squad": 0
|
| 35 |
+
},
|
| 36 |
+
"n-shot": {
|
| 37 |
+
"based_squad": 0
|
| 38 |
+
},
|
| 39 |
+
"config": {
|
| 40 |
+
"model": "lm_eval_model",
|
| 41 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/hgrn2-1.3B-100B",
|
| 42 |
+
"batch_size": "8",
|
| 43 |
+
"batch_sizes": [],
|
| 44 |
+
"device": "cuda:0",
|
| 45 |
+
"use_cache": null,
|
| 46 |
+
"limit": null,
|
| 47 |
+
"bootstrap_iters": 100000,
|
| 48 |
+
"gen_kwargs": null
|
| 49 |
+
},
|
| 50 |
+
"git_hash": null,
|
| 51 |
+
"context_length": 2000,
|
| 52 |
+
"answer_length": 48,
|
| 53 |
+
"cutting_context": true,
|
| 54 |
+
"decode_mode": "default"
|
| 55 |
+
}
|
hgrn2-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_swde.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hgrn2-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__download_model__hgrn2-1.3B-100B_based_triviaqa.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hgrn2-1.3B-100B/based_triviaqa/results.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_triviaqa": {
|
| 4 |
+
"exact,none": 1.3033175355450237,
|
| 5 |
+
"exact_stderr,none": "N/A",
|
| 6 |
+
"f1,none": 7.426522831126713,
|
| 7 |
+
"f1_stderr,none": "N/A",
|
| 8 |
+
"contains,none": 0.5550947867298578,
|
| 9 |
+
"contains_stderr,none": "N/A",
|
| 10 |
+
"alias": "based_triviaqa"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"configs": {
|
| 14 |
+
"based_triviaqa": {
|
| 15 |
+
"description": "",
|
| 16 |
+
"target_delimiter": " ",
|
| 17 |
+
"fewshot_delimiter": "\n\n",
|
| 18 |
+
"num_fewshot": 0,
|
| 19 |
+
"output_type": "generate_until",
|
| 20 |
+
"generation_kwargs": {
|
| 21 |
+
"until": [
|
| 22 |
+
"\n\n"
|
| 23 |
+
],
|
| 24 |
+
"do_sample": false
|
| 25 |
+
},
|
| 26 |
+
"repeats": 1,
|
| 27 |
+
"should_decontaminate": false,
|
| 28 |
+
"metadata": {
|
| 29 |
+
"version": "default"
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"versions": {
|
| 34 |
+
"based_triviaqa": "default"
|
| 35 |
+
},
|
| 36 |
+
"n-shot": {
|
| 37 |
+
"based_triviaqa": 0
|
| 38 |
+
},
|
| 39 |
+
"config": {
|
| 40 |
+
"model": "lm_eval_model",
|
| 41 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/hgrn2-1.3B-100B",
|
| 42 |
+
"batch_size": "8",
|
| 43 |
+
"batch_sizes": [],
|
| 44 |
+
"device": "cuda:0",
|
| 45 |
+
"use_cache": null,
|
| 46 |
+
"limit": null,
|
| 47 |
+
"bootstrap_iters": 100000,
|
| 48 |
+
"gen_kwargs": null
|
| 49 |
+
},
|
| 50 |
+
"git_hash": null,
|
| 51 |
+
"context_length": 2000,
|
| 52 |
+
"answer_length": 48,
|
| 53 |
+
"cutting_context": true,
|
| 54 |
+
"decode_mode": "default"
|
| 55 |
+
}
|
mamba-1.3B-100B/.hfd/aria2c_urls.txt
ADDED
|
File without changes
|
mamba-1.3B-100B/.hfd/last_download_command
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
REPO_ID=fla-hub/mamba-1.3B-100B TOOL=aria2c INCLUDE_PATTERNS= EXCLUDE_PATTERNS= DATASET=0 HF_USERNAME= HF_TOKEN= HF_TOKEN=https://huggingface.co REVISION=main
|
mamba-1.3B-100B/.hfd/repo_metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_id":"661a218fb76bfe174c7e8c10","id":"fla-hub/mamba-1.3B-100B","private":false,"pipeline_tag":"text-generation","library_name":"transformers","tags":["transformers","safetensors","mamba","text-generation","text-generation-inference","endpoints_compatible","region:us"],"downloads":17,"likes":0,"modelId":"fla-hub/mamba-1.3B-100B","author":"fla-hub","sha":"49d177eaa9fedd6ff74aab256a02140299df5e99","lastModified":"2024-08-31T11:18:56.000Z","gated":false,"disabled":false,"widgetData":[{"text":"My name is Julien and I like to"},{"text":"I like traveling by train because"},{"text":"Paris is an amazing place to visit,"},{"text":"Once upon a time,"}],"config":{"architectures":["MambaForCausalLM"],"model_type":"mamba","tokenizer_config":{"bos_token":"<s>","eos_token":"</s>","pad_token":null,"unk_token":"<unk>","use_default_system_prompt":false}},"transformersInfo":{"auto_model":"AutoModelForCausalLM","pipeline_tag":"text-generation","processor":"AutoTokenizer"},"siblings":[{"rfilename":".gitattributes"},{"rfilename":"config.json"},{"rfilename":"generation_config.json"},{"rfilename":"model.safetensors"},{"rfilename":"special_tokens_map.json"},{"rfilename":"tokenizer.json"},{"rfilename":"tokenizer.model"},{"rfilename":"tokenizer_config.json"}],"spaces":[],"createdAt":"2024-04-13T06:09:19.000Z","safetensors":{"parameters":{"F32":3145728,"BF16":1331595264},"total":1334740992},"usedStorage":2676320059}
|
retnet-1.3B-100B/.hfd/aria2c_urls.txt
ADDED
|
File without changes
|
retnet-1.3B-100B/.hfd/last_download_command
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
REPO_ID=fla-hub/retnet-1.3B-100B TOOL=aria2c INCLUDE_PATTERNS= EXCLUDE_PATTERNS= DATASET=0 HF_USERNAME= HF_TOKEN= HF_TOKEN=https://huggingface.co REVISION=main
|
retnet-1.3B-100B/.hfd/repo_metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_id":"660b10cc3ef451aa2b974d25","id":"fla-hub/retnet-1.3B-100B","private":false,"pipeline_tag":"text-generation","library_name":"fla","tags":["fla","safetensors","retnet","text-generation","en","dataset:cerebras/SlimPajama-627B","license:mit","region:us"],"downloads":91,"likes":1,"modelId":"fla-hub/retnet-1.3B-100B","author":"fla-hub","sha":"7fddefc4d5e196a8d1f076bb7612d54321b3effe","lastModified":"2025-02-09T14:48:04.000Z","gated":false,"disabled":false,"widgetData":[{"text":"My name is Julien and I like to"},{"text":"I like traveling by train because"},{"text":"Paris is an amazing place to visit,"},{"text":"Once upon a time,"}],"model-index":null,"config":{"architectures":["RetNetForCausalLM"],"model_type":"retnet","tokenizer_config":{"bos_token":"<s>","eos_token":"</s>","pad_token":null,"unk_token":"<unk>","use_default_system_prompt":false}},"cardData":{"language":["en"],"tags":["text-generation","retnet"],"license":"mit","datasets":["cerebras/SlimPajama-627B"],"library_name":"fla"},"siblings":[{"rfilename":".gitattributes"},{"rfilename":"README.md"},{"rfilename":"config.json"},{"rfilename":"generation_config.json"},{"rfilename":"model.safetensors"},{"rfilename":"special_tokens_map.json"},{"rfilename":"tokenizer.json"},{"rfilename":"tokenizer.model"},{"rfilename":"tokenizer_config.json"}],"spaces":[],"createdAt":"2024-04-01T19:53:48.000Z","safetensors":{"parameters":{"BF16":1351727104},"total":1351727104},"usedStorage":8110939899}
|
retnet-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__download_model__retnet-1.3B-100B_based_drop.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retnet-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__retnet-1.3B-100B_based_drop.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retnet-1.3B-100B/based_drop/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_drop": {
|
| 4 |
+
"contains,none": 0.19789171058936272,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_drop"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_drop": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": "default"
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_drop": "default"
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_drop": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/retnet-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
retnet-1.3B-100B/based_fda/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_fda": {
|
| 4 |
+
"contains,none": 0.20072661217075385,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_fda"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_fda": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": 0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_fda": 0
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_fda": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/retnet-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
retnet-1.3B-100B/based_nq_2048/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_nq_2048": {
|
| 4 |
+
"contains,none": 0.164079822616408,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_nq_2048"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_nq_2048": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": "default"
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_nq_2048": "default"
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_nq_2048": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/retnet-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
retnet-1.3B-100B/based_squad/results.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_squad": {
|
| 4 |
+
"exact,none": 5.206583809203897,
|
| 5 |
+
"exact_stderr,none": "N/A",
|
| 6 |
+
"f1,none": 15.394859838317938,
|
| 7 |
+
"f1_stderr,none": "N/A",
|
| 8 |
+
"contains,none": 0.33456499832045683,
|
| 9 |
+
"contains_stderr,none": "N/A",
|
| 10 |
+
"alias": "based_squad"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"configs": {
|
| 14 |
+
"based_squad": {
|
| 15 |
+
"description": "",
|
| 16 |
+
"target_delimiter": " ",
|
| 17 |
+
"fewshot_delimiter": "\n\n",
|
| 18 |
+
"num_fewshot": 0,
|
| 19 |
+
"output_type": "generate_until",
|
| 20 |
+
"generation_kwargs": {
|
| 21 |
+
"until": [
|
| 22 |
+
"\n\n"
|
| 23 |
+
],
|
| 24 |
+
"do_sample": false
|
| 25 |
+
},
|
| 26 |
+
"repeats": 1,
|
| 27 |
+
"should_decontaminate": false,
|
| 28 |
+
"metadata": {
|
| 29 |
+
"version": 0
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"versions": {
|
| 34 |
+
"based_squad": 0
|
| 35 |
+
},
|
| 36 |
+
"n-shot": {
|
| 37 |
+
"based_squad": 0
|
| 38 |
+
},
|
| 39 |
+
"config": {
|
| 40 |
+
"model": "lm_eval_model",
|
| 41 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/retnet-1.3B-100B",
|
| 42 |
+
"batch_size": "8",
|
| 43 |
+
"batch_sizes": [],
|
| 44 |
+
"device": "cuda:0",
|
| 45 |
+
"use_cache": null,
|
| 46 |
+
"limit": null,
|
| 47 |
+
"bootstrap_iters": 100000,
|
| 48 |
+
"gen_kwargs": null
|
| 49 |
+
},
|
| 50 |
+
"git_hash": null,
|
| 51 |
+
"context_length": 2000,
|
| 52 |
+
"answer_length": 48,
|
| 53 |
+
"cutting_context": true,
|
| 54 |
+
"decode_mode": "default"
|
| 55 |
+
}
|
retnet-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__download_model__retnet-1.3B-100B_based_swde.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retnet-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__retnet-1.3B-100B_based_swde.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retnet-1.3B-100B/based_swde/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_swde": {
|
| 4 |
+
"contains,none": 0.26991565135895035,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_swde"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_swde": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": 0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_swde": 0
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_swde": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/retnet-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
retnet-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__download_model__retnet-1.3B-100B_based_triviaqa.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retnet-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__retnet-1.3B-100B_based_triviaqa.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
retnet-1.3B-100B/based_triviaqa/results.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_triviaqa": {
|
| 4 |
+
"exact,none": 1.3033175355450237,
|
| 5 |
+
"exact_stderr,none": "N/A",
|
| 6 |
+
"f1,none": 7.466834785986363,
|
| 7 |
+
"f1_stderr,none": "N/A",
|
| 8 |
+
"contains,none": 0.5313981042654028,
|
| 9 |
+
"contains_stderr,none": "N/A",
|
| 10 |
+
"alias": "based_triviaqa"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"configs": {
|
| 14 |
+
"based_triviaqa": {
|
| 15 |
+
"description": "",
|
| 16 |
+
"target_delimiter": " ",
|
| 17 |
+
"fewshot_delimiter": "\n\n",
|
| 18 |
+
"num_fewshot": 0,
|
| 19 |
+
"output_type": "generate_until",
|
| 20 |
+
"generation_kwargs": {
|
| 21 |
+
"until": [
|
| 22 |
+
"\n\n"
|
| 23 |
+
],
|
| 24 |
+
"do_sample": false
|
| 25 |
+
},
|
| 26 |
+
"repeats": 1,
|
| 27 |
+
"should_decontaminate": false,
|
| 28 |
+
"metadata": {
|
| 29 |
+
"version": "default"
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"versions": {
|
| 34 |
+
"based_triviaqa": "default"
|
| 35 |
+
},
|
| 36 |
+
"n-shot": {
|
| 37 |
+
"based_triviaqa": 0
|
| 38 |
+
},
|
| 39 |
+
"config": {
|
| 40 |
+
"model": "lm_eval_model",
|
| 41 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/retnet-1.3B-100B",
|
| 42 |
+
"batch_size": "8",
|
| 43 |
+
"batch_sizes": [],
|
| 44 |
+
"device": "cuda:0",
|
| 45 |
+
"use_cache": null,
|
| 46 |
+
"limit": null,
|
| 47 |
+
"bootstrap_iters": 100000,
|
| 48 |
+
"gen_kwargs": null
|
| 49 |
+
},
|
| 50 |
+
"git_hash": null,
|
| 51 |
+
"context_length": 2000,
|
| 52 |
+
"answer_length": 48,
|
| 53 |
+
"cutting_context": true,
|
| 54 |
+
"decode_mode": "default"
|
| 55 |
+
}
|
transformer-1.3B-100B/.hfd/aria2c_urls.txt
ADDED
|
File without changes
|
transformer-1.3B-100B/.hfd/last_download_command
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
REPO_ID=fla-hub/transformer-1.3B-100B TOOL=aria2c INCLUDE_PATTERNS= EXCLUDE_PATTERNS= DATASET=0 HF_USERNAME= HF_TOKEN= HF_TOKEN=https://huggingface.co REVISION=main
|
transformer-1.3B-100B/.hfd/repo_metadata.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_id":"660ae767f4ab651901e9aec1","id":"fla-hub/transformer-1.3B-100B","private":false,"pipeline_tag":"text-generation","library_name":"fla","tags":["fla","safetensors","transformer","text-generation","transformer++","en","dataset:cerebras/SlimPajama-627B","license:mit","region:us"],"downloads":311,"likes":0,"modelId":"fla-hub/transformer-1.3B-100B","author":"fla-hub","sha":"d6f66f4181fa669e5863327815b44533e3a395e7","lastModified":"2025-02-09T14:45:40.000Z","gated":false,"disabled":false,"widgetData":[{"text":"My name is Julien and I like to"},{"text":"I like traveling by train because"},{"text":"Paris is an amazing place to visit,"},{"text":"Once upon a time,"}],"model-index":null,"config":{"architectures":["TransformerForCausalLM"],"model_type":"transformer","tokenizer_config":{"bos_token":"<s>","eos_token":"</s>","pad_token":null,"unk_token":"<unk>","use_default_system_prompt":false}},"cardData":{"language":["en"],"tags":["text-generation","transformer++"],"license":"mit","datasets":["cerebras/SlimPajama-627B"],"library_name":"fla"},"siblings":[{"rfilename":".gitattributes"},{"rfilename":"README.md"},{"rfilename":"config.json"},{"rfilename":"generation_config.json"},{"rfilename":"model.safetensors"},{"rfilename":"special_tokens_map.json"},{"rfilename":"tokenizer.json"},{"rfilename":"tokenizer.model"},{"rfilename":"tokenizer_config.json"}],"spaces":[],"createdAt":"2024-04-01T16:57:11.000Z","safetensors":{"parameters":{"BF16":1364297728},"total":1364297728},"usedStorage":5457730171}
|
transformer-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__download_model__transformer-1.3B-100B_based_drop.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
transformer-1.3B-100B/based_drop/ checkpoint_name____mnt__jfzn__msj__transformer-1.3B-100B_based_drop.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
transformer-1.3B-100B/based_drop/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_drop": {
|
| 4 |
+
"contains,none": 0.2103497843794921,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_drop"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_drop": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": "default"
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_drop": "default"
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_drop": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/transformer-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
transformer-1.3B-100B/based_fda/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_fda": {
|
| 4 |
+
"contains,none": 0.5467756584922797,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_fda"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_fda": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": 0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_fda": 0
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_fda": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/transformer-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
transformer-1.3B-100B/based_nq_2048/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_nq_2048": {
|
| 4 |
+
"contains,none": 0.25340513145391197,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_nq_2048"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_nq_2048": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": "default"
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_nq_2048": "default"
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_nq_2048": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/transformer-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
transformer-1.3B-100B/based_squad/results.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_squad": {
|
| 4 |
+
"exact,none": 6.180718844474303,
|
| 5 |
+
"exact_stderr,none": "N/A",
|
| 6 |
+
"f1,none": 18.101832538083794,
|
| 7 |
+
"f1_stderr,none": "N/A",
|
| 8 |
+
"contains,none": 0.4316425932146456,
|
| 9 |
+
"contains_stderr,none": "N/A",
|
| 10 |
+
"alias": "based_squad"
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"configs": {
|
| 14 |
+
"based_squad": {
|
| 15 |
+
"description": "",
|
| 16 |
+
"target_delimiter": " ",
|
| 17 |
+
"fewshot_delimiter": "\n\n",
|
| 18 |
+
"num_fewshot": 0,
|
| 19 |
+
"output_type": "generate_until",
|
| 20 |
+
"generation_kwargs": {
|
| 21 |
+
"until": [
|
| 22 |
+
"\n\n"
|
| 23 |
+
],
|
| 24 |
+
"do_sample": false
|
| 25 |
+
},
|
| 26 |
+
"repeats": 1,
|
| 27 |
+
"should_decontaminate": false,
|
| 28 |
+
"metadata": {
|
| 29 |
+
"version": 0
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
"versions": {
|
| 34 |
+
"based_squad": 0
|
| 35 |
+
},
|
| 36 |
+
"n-shot": {
|
| 37 |
+
"based_squad": 0
|
| 38 |
+
},
|
| 39 |
+
"config": {
|
| 40 |
+
"model": "lm_eval_model",
|
| 41 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/transformer-1.3B-100B",
|
| 42 |
+
"batch_size": "8",
|
| 43 |
+
"batch_sizes": [],
|
| 44 |
+
"device": "cuda:0",
|
| 45 |
+
"use_cache": null,
|
| 46 |
+
"limit": null,
|
| 47 |
+
"bootstrap_iters": 100000,
|
| 48 |
+
"gen_kwargs": null
|
| 49 |
+
},
|
| 50 |
+
"git_hash": null,
|
| 51 |
+
"context_length": 2000,
|
| 52 |
+
"answer_length": 48,
|
| 53 |
+
"cutting_context": true,
|
| 54 |
+
"decode_mode": "default"
|
| 55 |
+
}
|
transformer-1.3B-100B/based_swde/ checkpoint_name____mnt__jfzn__msj__download_model__transformer-1.3B-100B_based_swde.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
transformer-1.3B-100B/based_swde/results.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"results": {
|
| 3 |
+
"based_swde": {
|
| 4 |
+
"contains,none": 0.4395501405810684,
|
| 5 |
+
"contains_stderr,none": "N/A",
|
| 6 |
+
"alias": "based_swde"
|
| 7 |
+
}
|
| 8 |
+
},
|
| 9 |
+
"configs": {
|
| 10 |
+
"based_swde": {
|
| 11 |
+
"description": "",
|
| 12 |
+
"target_delimiter": " ",
|
| 13 |
+
"fewshot_delimiter": "\n\n",
|
| 14 |
+
"num_fewshot": 0,
|
| 15 |
+
"output_type": "generate_until",
|
| 16 |
+
"generation_kwargs": {
|
| 17 |
+
"until": [
|
| 18 |
+
"\n\n"
|
| 19 |
+
],
|
| 20 |
+
"do_sample": false
|
| 21 |
+
},
|
| 22 |
+
"repeats": 1,
|
| 23 |
+
"should_decontaminate": false,
|
| 24 |
+
"metadata": {
|
| 25 |
+
"version": 0
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"versions": {
|
| 30 |
+
"based_swde": 0
|
| 31 |
+
},
|
| 32 |
+
"n-shot": {
|
| 33 |
+
"based_swde": 0
|
| 34 |
+
},
|
| 35 |
+
"config": {
|
| 36 |
+
"model": "lm_eval_model",
|
| 37 |
+
"model_args": " checkpoint_name=/mnt/jfzn/msj/download_model/transformer-1.3B-100B",
|
| 38 |
+
"batch_size": "8",
|
| 39 |
+
"batch_sizes": [],
|
| 40 |
+
"device": "cuda:0",
|
| 41 |
+
"use_cache": null,
|
| 42 |
+
"limit": null,
|
| 43 |
+
"bootstrap_iters": 100000,
|
| 44 |
+
"gen_kwargs": null
|
| 45 |
+
},
|
| 46 |
+
"git_hash": null,
|
| 47 |
+
"context_length": 2000,
|
| 48 |
+
"answer_length": 48,
|
| 49 |
+
"cutting_context": true,
|
| 50 |
+
"decode_mode": "default"
|
| 51 |
+
}
|
transformer-1.3B-100B/based_triviaqa/ checkpoint_name____mnt__jfzn__msj__download_model__transformer-1.3B-100B_based_triviaqa.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
transformer-1.3B-100B/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|