|
|
model_dict = { |
|
|
|
|
|
"HuggingFaceTB/SmolLM2-135M-Instruct" : { |
|
|
"path": "HuggingFaceTB/SmolLM2-135M-Instruct", |
|
|
"size": "small", |
|
|
"trust_remote_code": False, |
|
|
"half": False, |
|
|
"r_spe_tokens": 2, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"HuggingFaceTB/SmolLM2-360M-Instruct" : { |
|
|
"path": "HuggingFaceTB/SmolLM2-360M-Instruct", |
|
|
"size": "small", |
|
|
"trust_remote_code": False, |
|
|
"half": False, |
|
|
"r_spe_tokens": 2, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"HuggingFaceTB/SmolLM2-1.7B-Instruct" : { |
|
|
"path": "HuggingFaceTB/SmolLM2-1.7B-Instruct", |
|
|
"size": "small", |
|
|
"trust_remote_code": False, |
|
|
"half": False, |
|
|
"r_spe_tokens": 2, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"google/gemma-2-2b-it" : { |
|
|
"path": "google/gemma-2-2b-it", |
|
|
"size": "small", |
|
|
"trust_remote_code": False, |
|
|
"half": False, |
|
|
"r_spe_tokens": 2, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"state-spaces/mamba-2.8b-hf" : { |
|
|
"path": "state-spaces/mamba-2.8b-hf", |
|
|
"size": "small", |
|
|
"trust_remote_code": False, |
|
|
"half": False, |
|
|
"r_spe_tokens": 0, |
|
|
"use_chat_tmplt": False, |
|
|
}, |
|
|
"internlm/internlm2-chat-1_8b" : { |
|
|
"path": "internlm/internlm2-chat-1_8b", |
|
|
"size": "small", |
|
|
"trust_remote_code": True, |
|
|
"half": False, |
|
|
"r_spe_tokens": 2, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
|
|
|
"microsoft/Phi-4-mini-instruct" : { |
|
|
"path": "microsoft/Phi-4-mini-instruct", |
|
|
"size": "medium", |
|
|
"trust_remote_code": False, |
|
|
"half": False, |
|
|
"r_spe_tokens": 2, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"mistralai/Mistral-7B-Instruct-v0.2" : { |
|
|
"path": "mistralai/Mistral-7B-Instruct-v0.2", |
|
|
"size": "medium", |
|
|
"trust_remote_code": False, |
|
|
"half": True, |
|
|
"r_spe_tokens": 1, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"tiiuae/falcon-mamba-7b-instruct" : { |
|
|
"path": "tiiuae/falcon-mamba-7b-instruct", |
|
|
"size": "medium", |
|
|
"trust_remote_code": False, |
|
|
"half": False, |
|
|
"r_spe_tokens": 2, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"Qwen/Qwen2.5-7B-Instruct" : { |
|
|
"path": "Qwen/Qwen2.5-7B-Instruct", |
|
|
"size": "medium", |
|
|
"trust_remote_code": False, |
|
|
"half": False, |
|
|
"r_spe_tokens": 2, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"CohereForAI/aya-expanse-8b" : { |
|
|
"path": "CohereForAI/aya-expanse-8b", |
|
|
"size": "medium", |
|
|
"trust_remote_code": False, |
|
|
"half": False, |
|
|
"r_spe_tokens": 1, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"google/gemma-2-9b-it" : { |
|
|
"path": "google/gemma-2-9b-it", |
|
|
"size": "medium", |
|
|
"trust_remote_code": False, |
|
|
"half": False, |
|
|
"r_spe_tokens": 2, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"meta-llama/Meta-Llama-3-8B-Instruct" : { |
|
|
"path": "meta-llama/Meta-Llama-3-8B-Instruct", |
|
|
"size": "medium", |
|
|
"trust_remote_code": False, |
|
|
"half": False, |
|
|
"r_spe_tokens": 1, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"microsoft/phi-4" : { |
|
|
"path": "microsoft/phi-4", |
|
|
"size": "medium", |
|
|
"trust_remote_code": False, |
|
|
"half": False, |
|
|
"r_spe_tokens": 1, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
|
|
|
"CohereForAI/aya-expanse-32b" : { |
|
|
"path": "CohereForAI/aya-expanse-32b", |
|
|
"size": "large", |
|
|
"trust_remote_code": False, |
|
|
"half": True, |
|
|
"r_spe_tokens": 1, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"Qwen/QwQ-32B" : { |
|
|
"path": "Qwen/QwQ-32B", |
|
|
"size": "large", |
|
|
"trust_remote_code": False, |
|
|
"half": True, |
|
|
"r_spe_tokens": 2, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
"CohereForAI/c4ai-command-r-08-2024" : { |
|
|
"path": "CohereForAI/c4ai-command-r-08-2024", |
|
|
"size": "large", |
|
|
"trust_remote_code": False, |
|
|
"half": True, |
|
|
"r_spe_tokens": 1, |
|
|
"use_chat_tmplt": True, |
|
|
}, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |