SpiridonSunRotator commited on
Commit
dcd41a6
·
verified ·
1 Parent(s): 72c64b3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -49,7 +49,7 @@ The results were obtained using the following commands:
49
 
50
  `OpenLLM v1`
51
  ```bash
52
- MODEL=daslab-testing/DeepSeek-R1-GPTQ-4b-128g-act_order-mse_scale
53
  MODEL_ARGS="pretrained=$MODEL,dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=8,gpu_memory_utilization=0.8,enable_chunked_prefill=True,trust_remote_code=True"
54
 
55
  lm_eval \
@@ -63,7 +63,7 @@ For reasoning evals we adopted the protocol from the [open-r1 repository](https:
63
 
64
  `Reasoning tasks`
65
  ```bash
66
- MODEL=daslab-testing/DeepSeek-R1-GPTQ-4b-128g-act_order-mse_scale
67
  MODEL_ARGS="pretrained=$MODEL,dtype=bfloat16,max_model_length=38768,gpu_memory_utilization=0.8,tensor_parallel_size=8,add_special_tokens=false,generation_parameters={\"max_new_tokens\":32768,\"temperature\":0.6,\"top_p\":0.95,\"seed\":7686}"
68
 
69
  export VLLM_WORKER_MULTIPROC_METHOD=spawn
 
49
 
50
  `OpenLLM v1`
51
  ```bash
52
+ MODEL=ISTA-DASLab/DeepSeek-R1-GPTQ-4b-128g-act_order-mse_scale
53
  MODEL_ARGS="pretrained=$MODEL,dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=8,gpu_memory_utilization=0.8,enable_chunked_prefill=True,trust_remote_code=True"
54
 
55
  lm_eval \
 
63
 
64
  `Reasoning tasks`
65
  ```bash
66
+ MODEL=ISTA-DASLab/DeepSeek-R1-GPTQ-4b-128g-act_order-mse_scale
67
  MODEL_ARGS="pretrained=$MODEL,dtype=bfloat16,max_model_length=38768,gpu_memory_utilization=0.8,tensor_parallel_size=8,add_special_tokens=false,generation_parameters={\"max_new_tokens\":32768,\"temperature\":0.6,\"top_p\":0.95,\"seed\":7686}"
68
 
69
  export VLLM_WORKER_MULTIPROC_METHOD=spawn