Student0809's picture
Add files using upload-large-folder tool
cb2428f verified
# Copyright (c) Alibaba, Inc. and its affiliates.
# Classification criteria for model_type: same model architecture, tokenizer (get function), template.
from itertools import chain
from typing import List
class LLMModelType:
qwen = 'qwen'
qwen2 = 'qwen2'
qwen2_5 = 'qwen2_5'
qwen2_5_math = 'qwen2_5_math'
qwen2_moe = 'qwen2_moe'
qwq_preview = 'qwq_preview'
qwq = 'qwq'
qwen3 = 'qwen3'
qwen3_moe = 'qwen3_moe'
qwen2_gte = 'qwen2_gte'
codefuse_qwen = 'codefuse_qwen'
modelscope_agent = 'modelscope_agent'
marco_o1 = 'marco_o1'
llama = 'llama'
llama3 = 'llama3'
llama3_1 = 'llama3_1'
llama3_2 = 'llama3_2'
reflection = 'reflection'
megrez = 'megrez'
yi = 'yi'
yi_coder = 'yi_coder'
sus = 'sus'
codefuse_codellama = 'codefuse_codellama'
mengzi3 = 'mengzi3'
ziya = 'ziya'
numina = 'numina'
atom = 'atom'
chatglm2 = 'chatglm2'
chatglm3 = 'chatglm3'
glm4 = 'glm4'
glm4_0414 = 'glm4_0414'
glm4_z1_rumination = 'glm4_z1_rumination'
glm_edge = 'glm_edge'
codefuse_codegeex2 = 'codefuse_codegeex2'
codegeex4 = 'codegeex4'
longwriter_llama3_1 = 'longwriter_llama3_1'
internlm = 'internlm'
internlm2 = 'internlm2'
internlm3 = 'internlm3'
deepseek = 'deepseek'
deepseek_moe = 'deepseek_moe'
deepseek_v2 = 'deepseek_v2'
deepseek_v2_5 = 'deepseek_v2_5'
deepseek_r1 = 'deepseek_r1'
deepseek_r1_distill = 'deepseek_r1_distill'
openbuddy_llama = 'openbuddy_llama'
openbuddy_llama3 = 'openbuddy_llama3'
openbuddy_mistral = 'openbuddy_mistral'
openbuddy_mixtral = 'openbuddy_mixtral'
baichuan = 'baichuan'
baichuan2 = 'baichuan2'
baichuan_m1 = 'baichuan_m1'
minicpm = 'minicpm'
minicpm_chatml = 'minicpm_chatml'
minicpm3 = 'minicpm3'
minicpm_moe = 'minicpm_moe'
telechat = 'telechat'
telechat2 = 'telechat2'
mistral = 'mistral'
zephyr = 'zephyr'
mixtral = 'mixtral'
mistral_nemo = 'mistral_nemo'
mistral_2501 = 'mistral_2501'
wizardlm2 = 'wizardlm2'
wizardlm2_moe = 'wizardlm2_moe'
phi2 = 'phi2'
phi3_small = 'phi3_small'
phi3 = 'phi3'
phi3_moe = 'phi3_moe'
phi4 = 'phi4'
minimax = 'minimax'
gemma = 'gemma'
gemma2 = 'gemma2'
gemma3_text = 'gemma3_text'
skywork = 'skywork'
skywork_o1 = 'skywork_o1'
ling = 'ling'
yuan2 = 'yuan2'
orion = 'orion'
xverse = 'xverse'
xverse_moe = 'xverse_moe'
seggpt = 'seggpt'
bluelm = 'bluelm'
c4ai = 'c4ai'
dbrx = 'dbrx'
grok = 'grok'
mamba = 'mamba'
polylm = 'polylm'
aya = 'aya'
moonlight = 'moonlight'
mimo = 'mimo'
class BertModelType:
modern_bert = 'modern_bert'
modern_bert_gte = 'modern_bert_gte'
bert = 'bert'
class RMModelType:
internlm2_reward = 'internlm2_reward'
qwen2_reward = 'qwen2_reward'
qwen2_5_prm = 'qwen2_5_prm'
qwen2_5_math_reward = 'qwen2_5_math_reward'
llama3_2_reward = 'llama3_2_reward'
gemma_reward = 'gemma_reward'
class MLLMModelType:
qwen_vl = 'qwen_vl'
qwen_audio = 'qwen_audio'
qwen2_vl = 'qwen2_vl'
qwen2_5_vl = 'qwen2_5_vl'
qwen2_5_omni = 'qwen2_5_omni'
qwen2_audio = 'qwen2_audio'
qvq = 'qvq'
qwen2_gme = 'qwen2_gme'
ovis1_6 = 'ovis1_6'
ovis1_6_llama3 = 'ovis1_6_llama3'
ovis2 = 'ovis2'
glm4v = 'glm4v'
glm_edge_v = 'glm_edge_v'
cogvlm = 'cogvlm'
cogagent_vqa = 'cogagent_vqa'
cogagent_chat = 'cogagent_chat'
cogvlm2 = 'cogvlm2'
cogvlm2_video = 'cogvlm2_video'
internvl = 'internvl'
internvl_phi3 = 'internvl_phi3'
internvl2 = 'internvl2'
internvl2_phi3 = 'internvl2_phi3'
internvl2_5 = 'internvl2_5'
internvl3 = 'internvl3'
xcomposer2 = 'xcomposer2'
xcomposer2_4khd = 'xcomposer2_4khd'
xcomposer2_5 = 'xcomposer2_5'
xcomposer2_5_ol_audio = 'xcomposer2_5_ol_audio'
llama3_2_vision = 'llama3_2_vision'
llama4 = 'llama4'
llama3_1_omni = 'llama3_1_omni'
llava1_5_hf = 'llava1_5_hf'
llava1_6_mistral_hf = 'llava1_6_mistral_hf'
llava1_6_vicuna_hf = 'llava1_6_vicuna_hf'
llava1_6_yi_hf = 'llava1_6_yi_hf'
llama3_llava_next_hf = 'llama3_llava_next_hf'
llava_next_qwen_hf = 'llava_next_qwen_hf'
llava_next_video_hf = 'llava_next_video_hf'
llava_next_video_yi_hf = 'llava_next_video_yi_hf'
llava_onevision_hf = 'llava_onevision_hf'
yi_vl = 'yi_vl'
llava_llama3_1_hf = 'llava_llama3_1_hf' # DaozeZhang
llava_llama3_hf = 'llava_llama3_hf' # xtuner
llava1_6_mistral = 'llava1_6_mistral'
llava1_6_yi = 'llava1_6_yi'
llava_next_qwen = 'llava_next_qwen'
llama3_llava_next = 'llama3_llava_next'
deepseek_vl = 'deepseek_vl'
deepseek_vl2 = 'deepseek_vl2'
deepseek_janus = 'deepseek_janus'
deepseek_janus_pro = 'deepseek_janus_pro'
minicpmv = 'minicpmv'
minicpmv2_5 = 'minicpmv2_5'
minicpmv2_6 = 'minicpmv2_6'
minicpmo2_6 = 'minicpmo2_6'
minimax_vl = 'minimax_vl'
mplug_owl2 = 'mplug_owl2'
mplug_owl2_1 = 'mplug_owl2_1'
mplug_owl3 = 'mplug_owl3'
mplug_owl3_241101 = 'mplug_owl3_241101'
doc_owl2 = 'doc_owl2'
emu3_gen = 'emu3_gen'
emu3_chat = 'emu3_chat'
got_ocr2 = 'got_ocr2'
got_ocr2_hf = 'got_ocr2_hf'
step_audio = 'step_audio'
kimi_vl = 'kimi_vl'
phi3_vision = 'phi3_vision'
phi4_multimodal = 'phi4_multimodal'
florence = 'florence'
idefics3 = 'idefics3'
paligemma = 'paligemma'
molmo = 'molmo'
molmoe = 'molmoe'
pixtral = 'pixtral'
megrez_omni = 'megrez_omni'
valley = 'valley'
gemma3_vision = 'gemma3_vision'
mistral_2503 = 'mistral_2503'
class ModelType(LLMModelType, MLLMModelType, BertModelType, RMModelType):
@classmethod
def get_model_name_list(cls) -> List[str]:
def _get_model_name_list(cls):
res = []
for k in cls.__dict__:
if k.startswith('__'):
continue
value = getattr(cls, k)
if isinstance(value, str):
res.append(value)
return res
return list(
chain.from_iterable(
_get_model_name_list(model_type_cls)
for model_type_cls in [LLMModelType, MLLMModelType, BertModelType, RMModelType]))