Spaces:
Running
Running
File size: 5,942 Bytes
9a0f889 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | # ------------------------------
# Torch-Compatible Model Definitions with Adjusted Descriptions
# ------------------------------
MODELS = {
# 1.5B
"Nemotron-Research-Reasoning-Qwen-1.5B": {
"repo_id": "nvidia/Nemotron-Research-Reasoning-Qwen-1.5B",
"description": "Nemotron-Research-Reasoning-Qwen-1.5B",
"params_b": 1.5
},
"Falcon-H1-1.5B-Instruct": {
"repo_id": "tiiuae/Falcon-H1-1.5B-Instruct",
"description": "Falcon‑H1 model with 1.5 B parameters, instruction‑tuned",
"params_b": 1.5
},
"Qwen2.5-Taiwan-1.5B-Instruct": {
"repo_id": "benchang1110/Qwen2.5-Taiwan-1.5B-Instruct",
"description": "Qwen2.5-Taiwan-1.5B-Instruct",
"params_b": 1.5
},
# 1.2B
"LFM2-1.2B": {
"repo_id": "LiquidAI/LFM2-1.2B",
"description": "A 1.2B parameter hybrid language model from Liquid AI, designed for efficient on-device and edge AI deployment, outperforming larger models like Llama-2-7b-hf in specific tasks.",
"params_b": 1.2
},
# 1.1B
"Taiwan-ELM-1_1B-Instruct": {
"repo_id": "liswei/Taiwan-ELM-1_1B-Instruct",
"description": "Taiwan-ELM-1_1B-Instruct",
"params_b": 1.1
},
# 1B
"Llama-3.2-Taiwan-1B": {
"repo_id": "lianghsun/Llama-3.2-Taiwan-1B",
"description": "Llama-3.2-Taiwan base model with 1 B parameters",
"params_b": 1.0
},
# 700M
"LFM2-700M": {
"repo_id": "LiquidAI/LFM2-700M",
"description": "A 700M parameter model from the LFM2 family, designed for high efficiency on edge devices with a hybrid architecture of multiplicative gates and short convolutions.",
"params_b": 0.7
},
# 600M
"Qwen3-0.6B": {
"repo_id": "Qwen/Qwen3-0.6B",
"description": "Dense causal language model with 0.6 B total parameters (0.44 B non-embedding), 28 transformer layers, 16 query heads & 8 KV heads, native 32 768-token context window, dual-mode generation, full multilingual & agentic capabilities.",
"params_b": 0.6
},
"Qwen3-0.6B-Taiwan": {
"repo_id": "ShengweiPeng/Qwen3-0.6B-Taiwan",
"description": "Qwen3-Taiwan model with 0.6 B parameters",
"params_b": 0.6
},
# 500M
"Qwen2.5-0.5B-Taiwan-Instruct": {
"repo_id": "ShengweiPeng/Qwen2.5-0.5B-Taiwan-Instruct",
"description": "Qwen2.5-Taiwan model with 0.5 B parameters, instruction-tuned",
"params_b": 0.5
},
# 360M
"SmolLM2-360M-Instruct": {
"repo_id": "HuggingFaceTB/SmolLM2-360M-Instruct",
"description": "Original SmolLM2‑360M Instruct",
"params_b": 0.36
},
"SmolLM2-360M-Instruct-TaiwanChat": {
"repo_id": "Luigi/SmolLM2-360M-Instruct-TaiwanChat",
"description": "SmolLM2‑360M Instruct fine-tuned on TaiwanChat",
"params_b": 0.36
},
# 350M
"LFM2-350M": {
"repo_id": "LiquidAI/LFM2-350M",
"description": "A compact 350M parameter hybrid model optimized for edge and on-device applications, offering significantly faster training and inference speeds compared to models like Qwen3.",
"params_b": 0.35
},
# 270M
"parser_model_ner_gemma_v0.1": {
"repo_id": "myfi/parser_model_ner_gemma_v0.1",
"description": "A lightweight named‑entity‑like (NER) parser fine‑tuned from Google’s **Gemma‑3‑270M** model. The base Gemma‑3‑270M is a 270 M‑parameter, hyper‑efficient LLM designed for on‑device inference, supporting >140 languages, a 128 k‑token context window, and instruction‑following capabilities [2][7]. This variant is further trained on standard NER corpora (e.g., CoNLL‑2003, OntoNotes) to extract PERSON, ORG, LOC, and MISC entities with high precision while keeping the memory footprint low (≈240 MB VRAM in BF16 quantized form) [1]. It is released under the Apache‑2.0 license and can be used for fast, cost‑effective entity extraction in low‑resource environments.",
"params_b": 0.27
},
"Gemma-3-Taiwan-270M-it": {
"repo_id": "lianghsun/Gemma-3-Taiwan-270M-it",
"description": "google/gemma-3-270m-it fintuned on Taiwan Chinese dataset",
"params_b": 0.27
},
"gemma-3-270m-it": {
"repo_id": "google/gemma-3-270m-it",
"description": "Gemma‑3‑270M‑IT is a compact, 270‑million‑parameter language model fine‑tuned for Italian, offering fast and efficient on‑device text generation and comprehension in the Italian language.",
"params_b": 0.27
},
"Taiwan-ELM-270M-Instruct": {
"repo_id": "liswei/Taiwan-ELM-270M-Instruct",
"description": "Taiwan-ELM-270M-Instruct",
"params_b": 0.27
},
# 135M
"SmolLM2-135M-multilingual-base": {
"repo_id": "agentlans/SmolLM2-135M-multilingual-base",
"description": "SmolLM2-135M-multilingual-base",
"params_b": 0.135
},
"SmolLM-135M-Taiwan-Instruct-v1.0": {
"repo_id": "benchang1110/SmolLM-135M-Taiwan-Instruct-v1.0",
"description": "135-million-parameter F32 safetensors instruction-finetuned variant of SmolLM-135M-Taiwan, trained on the 416 k-example ChatTaiwan dataset for Traditional Chinese conversational and instruction-following tasks",
"params_b": 0.135
},
"SmolLM2_135M_Grpo_Gsm8k": {
"repo_id": "prithivMLmods/SmolLM2_135M_Grpo_Gsm8k",
"description": "SmolLM2_135M_Grpo_Gsm8k",
"params_b": 0.135
},
"SmolLM2-135M-Instruct": {
"repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct",
"description": "Original SmolLM2‑135M Instruct",
"params_b": 0.135
},
"SmolLM2-135M-Instruct-TaiwanChat": {
"repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat",
"description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat",
"params_b": 0.135
},
} |