File size: 5,942 Bytes
9a0f889
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# ------------------------------
# Torch-Compatible Model Definitions with Adjusted Descriptions
# ------------------------------
MODELS = {

    # 1.5B
    "Nemotron-Research-Reasoning-Qwen-1.5B": {
        "repo_id": "nvidia/Nemotron-Research-Reasoning-Qwen-1.5B",
        "description": "Nemotron-Research-Reasoning-Qwen-1.5B",
        "params_b": 1.5
    },
    "Falcon-H1-1.5B-Instruct": {
        "repo_id": "tiiuae/Falcon-H1-1.5B-Instruct",
        "description": "Falcon‑H1 model with 1.5 B parameters, instruction‑tuned",
        "params_b": 1.5
    },
    "Qwen2.5-Taiwan-1.5B-Instruct": {
        "repo_id": "benchang1110/Qwen2.5-Taiwan-1.5B-Instruct",
        "description": "Qwen2.5-Taiwan-1.5B-Instruct",
        "params_b": 1.5
    },

    # 1.2B
    "LFM2-1.2B": {
        "repo_id": "LiquidAI/LFM2-1.2B",
        "description": "A 1.2B parameter hybrid language model from Liquid AI, designed for efficient on-device and edge AI deployment, outperforming larger models like Llama-2-7b-hf in specific tasks.",
        "params_b": 1.2
    },

    # 1.1B
    "Taiwan-ELM-1_1B-Instruct": {
        "repo_id": "liswei/Taiwan-ELM-1_1B-Instruct",
        "description": "Taiwan-ELM-1_1B-Instruct",
        "params_b": 1.1
    },

    # 1B
    "Llama-3.2-Taiwan-1B": {
        "repo_id": "lianghsun/Llama-3.2-Taiwan-1B",
        "description": "Llama-3.2-Taiwan base model with 1 B parameters",
        "params_b": 1.0
    },

    # 700M
    "LFM2-700M": {
        "repo_id": "LiquidAI/LFM2-700M",
        "description": "A 700M parameter model from the LFM2 family, designed for high efficiency on edge devices with a hybrid architecture of multiplicative gates and short convolutions.",
        "params_b": 0.7
    },

    # 600M
    "Qwen3-0.6B": {
        "repo_id": "Qwen/Qwen3-0.6B",
        "description": "Dense causal language model with 0.6 B total parameters (0.44 B non-embedding), 28 transformer layers, 16 query heads & 8 KV heads, native 32 768-token context window, dual-mode generation, full multilingual & agentic capabilities.",
        "params_b": 0.6
    },
    "Qwen3-0.6B-Taiwan": {
        "repo_id": "ShengweiPeng/Qwen3-0.6B-Taiwan",
        "description": "Qwen3-Taiwan model with 0.6 B parameters",
        "params_b": 0.6
    },

    # 500M
    "Qwen2.5-0.5B-Taiwan-Instruct": {
        "repo_id": "ShengweiPeng/Qwen2.5-0.5B-Taiwan-Instruct",
        "description": "Qwen2.5-Taiwan model with 0.5 B parameters, instruction-tuned",
        "params_b": 0.5
    },

    # 360M
    "SmolLM2-360M-Instruct": {
        "repo_id": "HuggingFaceTB/SmolLM2-360M-Instruct",
        "description": "Original SmolLM2‑360M Instruct",
        "params_b": 0.36
    },
    "SmolLM2-360M-Instruct-TaiwanChat": {
        "repo_id": "Luigi/SmolLM2-360M-Instruct-TaiwanChat",
        "description": "SmolLM2‑360M Instruct fine-tuned on TaiwanChat",
        "params_b": 0.36
    },

    # 350M
    "LFM2-350M": {
        "repo_id": "LiquidAI/LFM2-350M",
        "description": "A compact 350M parameter hybrid model optimized for edge and on-device applications, offering significantly faster training and inference speeds compared to models like Qwen3.",
        "params_b": 0.35
    },

    # 270M
    "parser_model_ner_gemma_v0.1": {
        "repo_id": "myfi/parser_model_ner_gemma_v0.1",
        "description": "A lightweight named‑entity‑like (NER) parser fine‑tuned from Google’s **Gemma‑3‑270M** model. The base Gemma‑3‑270M is a 270 M‑parameter, hyper‑efficient LLM designed for on‑device inference, supporting >140 languages, a 128 k‑token context window, and instruction‑following capabilities [2][7]. This variant is further trained on standard NER corpora (e.g., CoNLL‑2003, OntoNotes) to extract PERSON, ORG, LOC, and MISC entities with high precision while keeping the memory footprint low (≈240 MB VRAM in BF16 quantized form) [1]. It is released under the Apache‑2.0 license and can be used for fast, cost‑effective entity extraction in low‑resource environments.",
        "params_b": 0.27
    },
    "Gemma-3-Taiwan-270M-it": {
        "repo_id": "lianghsun/Gemma-3-Taiwan-270M-it",
        "description": "google/gemma-3-270m-it fintuned on Taiwan Chinese dataset",
        "params_b": 0.27
    },
    "gemma-3-270m-it": {
        "repo_id": "google/gemma-3-270m-it",
        "description": "Gemma‑3‑270M‑IT is a compact, 270‑million‑parameter language model fine‑tuned for Italian, offering fast and efficient on‑device text generation and comprehension in the Italian language.",
        "params_b": 0.27
    },
    "Taiwan-ELM-270M-Instruct": {
        "repo_id": "liswei/Taiwan-ELM-270M-Instruct",
        "description": "Taiwan-ELM-270M-Instruct",
        "params_b": 0.27
    },

    # 135M
    "SmolLM2-135M-multilingual-base": {
        "repo_id": "agentlans/SmolLM2-135M-multilingual-base",
        "description": "SmolLM2-135M-multilingual-base",
        "params_b": 0.135
    },
    "SmolLM-135M-Taiwan-Instruct-v1.0": {
        "repo_id": "benchang1110/SmolLM-135M-Taiwan-Instruct-v1.0",
        "description": "135-million-parameter F32 safetensors instruction-finetuned variant of SmolLM-135M-Taiwan, trained on the 416 k-example ChatTaiwan dataset for Traditional Chinese conversational and instruction-following tasks",
        "params_b": 0.135
    },
    "SmolLM2_135M_Grpo_Gsm8k": {
        "repo_id": "prithivMLmods/SmolLM2_135M_Grpo_Gsm8k",
        "description": "SmolLM2_135M_Grpo_Gsm8k",
        "params_b": 0.135
    },
    "SmolLM2-135M-Instruct": {
        "repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct",
        "description": "Original SmolLM2‑135M Instruct",
        "params_b": 0.135
    },
    "SmolLM2-135M-Instruct-TaiwanChat": {
        "repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat",
        "description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat",
        "params_b": 0.135
    },
}