JRosenkranz commited on
Commit
0dae012
·
verified ·
1 Parent(s): 1575adf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -64,7 +64,7 @@ docker run --rm \
64
  -e TRANSFORMERS_CACHE=/models \
65
  $TGIS_IMAGE \
66
  text-generation-server download-weights \
67
- ibm/granite-7b-lab-accelerator \
68
  --token $HF_HUB_TOKEN
69
 
70
  # note: if the weights were downloaded separately (not with the above commands), please place them in the HF_HUB_CACHE directory and refer to them with /models/<model_name>
@@ -75,7 +75,7 @@ docker run -d --rm --gpus all \
75
  -e HF_HUB_CACHE=/models \
76
  -e TRANSFORMERS_CACHE=/models \
77
  -e MODEL_NAME=instructlab/granite-7b-lab \
78
- -e SPECULATOR_NAME=ibm/granite-7b-lab-accelerator \
79
  -e FLASH_ATTENTION=true \
80
  -e PAGED_ATTENTION=true \
81
  -e DTYPE=float16 \
@@ -124,7 +124,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
124
  --model_path=$MODEL_PATH \
125
  --model_source=hf \
126
  --tokenizer=$MODEL_PATH \
127
- --speculator_path=ibm/granite-7b-lab-accelerator \
128
  --speculator_source=hf \
129
  --speculator_variant=1_4b \
130
  --top_k_tokens_per_head=4,3,2,2,2 \
@@ -141,7 +141,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
141
  --model_path=$MODEL_PATH \
142
  --model_source=hf \
143
  --tokenizer=$MODEL_PATH \
144
- --speculator_path=ibm/granite-7b-lab-accelerator \
145
  --speculator_source=hf \
146
  --speculator_variant=1_4b \
147
  --top_k_tokens_per_head=4,3,2,2,2 \
@@ -157,7 +157,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
157
  --model_path=$MODEL_PATH \
158
  --model_source=hf \
159
  --tokenizer=$MODEL_PATH \
160
- --speculator_path=ibm/granite-7b-lab-accelerator \
161
  --speculator_source=hf \
162
  --speculator_variant=1_4b \
163
  --top_k_tokens_per_head=4,3,2,2,2 \
 
64
  -e TRANSFORMERS_CACHE=/models \
65
  $TGIS_IMAGE \
66
  text-generation-server download-weights \
67
+ ibm-granite/granite-7b-lab-accelerator \
68
  --token $HF_HUB_TOKEN
69
 
70
  # note: if the weights were downloaded separately (not with the above commands), please place them in the HF_HUB_CACHE directory and refer to them with /models/<model_name>
 
75
  -e HF_HUB_CACHE=/models \
76
  -e TRANSFORMERS_CACHE=/models \
77
  -e MODEL_NAME=instructlab/granite-7b-lab \
78
+ -e SPECULATOR_NAME=ibm-granite/granite-7b-lab-accelerator \
79
  -e FLASH_ATTENTION=true \
80
  -e PAGED_ATTENTION=true \
81
  -e DTYPE=float16 \
 
124
  --model_path=$MODEL_PATH \
125
  --model_source=hf \
126
  --tokenizer=$MODEL_PATH \
127
+ --speculator_path=ibm-granite/granite-7b-lab-accelerator \
128
  --speculator_source=hf \
129
  --speculator_variant=1_4b \
130
  --top_k_tokens_per_head=4,3,2,2,2 \
 
141
  --model_path=$MODEL_PATH \
142
  --model_source=hf \
143
  --tokenizer=$MODEL_PATH \
144
+ --speculator_path=ibm-granite/granite-7b-lab-accelerator \
145
  --speculator_source=hf \
146
  --speculator_variant=1_4b \
147
  --top_k_tokens_per_head=4,3,2,2,2 \
 
157
  --model_path=$MODEL_PATH \
158
  --model_source=hf \
159
  --tokenizer=$MODEL_PATH \
160
+ --speculator_path=ibm-granite/granite-7b-lab-accelerator \
161
  --speculator_source=hf \
162
  --speculator_variant=1_4b \
163
  --top_k_tokens_per_head=4,3,2,2,2 \