ibm-granite
/

granite-7b-instruct-accelerator

Model card Files Files and versions

JRosenkranz commited on May 20, 2024

Commit

0dae012

·

verified ·

1 Parent(s): 1575adf

Update README.md

Files changed (1) hide show

README.md +5 -5

README.md CHANGED Viewed

@@ -64,7 +64,7 @@ docker run --rm \
     -e TRANSFORMERS_CACHE=/models \
     $TGIS_IMAGE \
     text-generation-server download-weights \
-    ibm/granite-7b-lab-accelerator \
     --token $HF_HUB_TOKEN
 # note: if the weights were downloaded separately (not with the above commands), please place them in the HF_HUB_CACHE directory and refer to them with /models/<model_name>
@@ -75,7 +75,7 @@ docker run -d --rm --gpus all \
     -e HF_HUB_CACHE=/models \
     -e TRANSFORMERS_CACHE=/models \
     -e MODEL_NAME=instructlab/granite-7b-lab \
-    -e SPECULATOR_NAME=ibm/granite-7b-lab-accelerator \
     -e FLASH_ATTENTION=true \
     -e PAGED_ATTENTION=true \
     -e DTYPE=float16 \
@@ -124,7 +124,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
     --model_path=$MODEL_PATH \
     --model_source=hf \
     --tokenizer=$MODEL_PATH \
-    --speculator_path=ibm/granite-7b-lab-accelerator \
     --speculator_source=hf \
     --speculator_variant=1_4b \
     --top_k_tokens_per_head=4,3,2,2,2 \
@@ -141,7 +141,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
     --model_path=$MODEL_PATH \
     --model_source=hf \
     --tokenizer=$MODEL_PATH \
-    --speculator_path=ibm/granite-7b-lab-accelerator \
     --speculator_source=hf \
     --speculator_variant=1_4b \
     --top_k_tokens_per_head=4,3,2,2,2 \
@@ -157,7 +157,7 @@ python fms-extras/scripts/paged_speculative_inference.py \
     --model_path=$MODEL_PATH \
     --model_source=hf \
     --tokenizer=$MODEL_PATH \
-    --speculator_path=ibm/granite-7b-lab-accelerator \
     --speculator_source=hf \
     --speculator_variant=1_4b \
     --top_k_tokens_per_head=4,3,2,2,2 \

     -e TRANSFORMERS_CACHE=/models \
     $TGIS_IMAGE \
     text-generation-server download-weights \
+    ibm-granite/granite-7b-lab-accelerator \
     --token $HF_HUB_TOKEN
 # note: if the weights were downloaded separately (not with the above commands), please place them in the HF_HUB_CACHE directory and refer to them with /models/<model_name>
     -e HF_HUB_CACHE=/models \
     -e TRANSFORMERS_CACHE=/models \
     -e MODEL_NAME=instructlab/granite-7b-lab \
+    -e SPECULATOR_NAME=ibm-granite/granite-7b-lab-accelerator \
     -e FLASH_ATTENTION=true \
     -e PAGED_ATTENTION=true \
     -e DTYPE=float16 \
     --model_path=$MODEL_PATH \
     --model_source=hf \
     --tokenizer=$MODEL_PATH \
+    --speculator_path=ibm-granite/granite-7b-lab-accelerator \
     --speculator_source=hf \
     --speculator_variant=1_4b \
     --top_k_tokens_per_head=4,3,2,2,2 \
     --model_path=$MODEL_PATH \
     --model_source=hf \
     --tokenizer=$MODEL_PATH \
+    --speculator_path=ibm-granite/granite-7b-lab-accelerator \
     --speculator_source=hf \
     --speculator_variant=1_4b \
     --top_k_tokens_per_head=4,3,2,2,2 \
     --model_path=$MODEL_PATH \
     --model_source=hf \
     --tokenizer=$MODEL_PATH \
+    --speculator_path=ibm-granite/granite-7b-lab-accelerator \
     --speculator_source=hf \
     --speculator_variant=1_4b \
     --top_k_tokens_per_head=4,3,2,2,2 \