# ------------------------------ # Torch-Compatible Model Definitions with Adjusted Descriptions # ------------------------------ MODELS = { # 1.5B "Nemotron-Research-Reasoning-Qwen-1.5B": { "repo_id": "nvidia/Nemotron-Research-Reasoning-Qwen-1.5B", "description": "Nemotron-Research-Reasoning-Qwen-1.5B", "params_b": 1.5 }, "Falcon-H1-1.5B-Instruct": { "repo_id": "tiiuae/Falcon-H1-1.5B-Instruct", "description": "Falcon‑H1 model with 1.5 B parameters, instruction‑tuned", "params_b": 1.5 }, "Qwen2.5-Taiwan-1.5B-Instruct": { "repo_id": "benchang1110/Qwen2.5-Taiwan-1.5B-Instruct", "description": "Qwen2.5-Taiwan-1.5B-Instruct", "params_b": 1.5 }, # 1.2B "LFM2-1.2B": { "repo_id": "LiquidAI/LFM2-1.2B", "description": "A 1.2B parameter hybrid language model from Liquid AI, designed for efficient on-device and edge AI deployment, outperforming larger models like Llama-2-7b-hf in specific tasks.", "params_b": 1.2 }, # 1.1B "Taiwan-ELM-1_1B-Instruct": { "repo_id": "liswei/Taiwan-ELM-1_1B-Instruct", "description": "Taiwan-ELM-1_1B-Instruct", "params_b": 1.1 }, # 1B "Llama-3.2-Taiwan-1B": { "repo_id": "lianghsun/Llama-3.2-Taiwan-1B", "description": "Llama-3.2-Taiwan base model with 1 B parameters", "params_b": 1.0 }, # 700M "LFM2-700M": { "repo_id": "LiquidAI/LFM2-700M", "description": "A 700M parameter model from the LFM2 family, designed for high efficiency on edge devices with a hybrid architecture of multiplicative gates and short convolutions.", "params_b": 0.7 }, # 600M "Qwen3-0.6B": { "repo_id": "Qwen/Qwen3-0.6B", "description": "Dense causal language model with 0.6 B total parameters (0.44 B non-embedding), 28 transformer layers, 16 query heads & 8 KV heads, native 32 768-token context window, dual-mode generation, full multilingual & agentic capabilities.", "params_b": 0.6 }, "Qwen3-0.6B-Taiwan": { "repo_id": "ShengweiPeng/Qwen3-0.6B-Taiwan", "description": "Qwen3-Taiwan model with 0.6 B parameters", "params_b": 0.6 }, # 500M "Qwen2.5-0.5B-Taiwan-Instruct": { "repo_id": "ShengweiPeng/Qwen2.5-0.5B-Taiwan-Instruct", "description": "Qwen2.5-Taiwan model with 0.5 B parameters, instruction-tuned", "params_b": 0.5 }, # 360M "SmolLM2-360M-Instruct": { "repo_id": "HuggingFaceTB/SmolLM2-360M-Instruct", "description": "Original SmolLM2‑360M Instruct", "params_b": 0.36 }, "SmolLM2-360M-Instruct-TaiwanChat": { "repo_id": "Luigi/SmolLM2-360M-Instruct-TaiwanChat", "description": "SmolLM2‑360M Instruct fine-tuned on TaiwanChat", "params_b": 0.36 }, # 350M "LFM2-350M": { "repo_id": "LiquidAI/LFM2-350M", "description": "A compact 350M parameter hybrid model optimized for edge and on-device applications, offering significantly faster training and inference speeds compared to models like Qwen3.", "params_b": 0.35 }, # 270M "parser_model_ner_gemma_v0.1": { "repo_id": "myfi/parser_model_ner_gemma_v0.1", "description": "A lightweight named‑entity‑like (NER) parser fine‑tuned from Google’s **Gemma‑3‑270M** model. The base Gemma‑3‑270M is a 270 M‑parameter, hyper‑efficient LLM designed for on‑device inference, supporting >140 languages, a 128 k‑token context window, and instruction‑following capabilities [2][7]. This variant is further trained on standard NER corpora (e.g., CoNLL‑2003, OntoNotes) to extract PERSON, ORG, LOC, and MISC entities with high precision while keeping the memory footprint low (≈240 MB VRAM in BF16 quantized form) [1]. It is released under the Apache‑2.0 license and can be used for fast, cost‑effective entity extraction in low‑resource environments.", "params_b": 0.27 }, "Gemma-3-Taiwan-270M-it": { "repo_id": "lianghsun/Gemma-3-Taiwan-270M-it", "description": "google/gemma-3-270m-it fintuned on Taiwan Chinese dataset", "params_b": 0.27 }, "gemma-3-270m-it": { "repo_id": "google/gemma-3-270m-it", "description": "Gemma‑3‑270M‑IT is a compact, 270‑million‑parameter language model fine‑tuned for Italian, offering fast and efficient on‑device text generation and comprehension in the Italian language.", "params_b": 0.27 }, "Taiwan-ELM-270M-Instruct": { "repo_id": "liswei/Taiwan-ELM-270M-Instruct", "description": "Taiwan-ELM-270M-Instruct", "params_b": 0.27 }, # 135M "SmolLM2-135M-multilingual-base": { "repo_id": "agentlans/SmolLM2-135M-multilingual-base", "description": "SmolLM2-135M-multilingual-base", "params_b": 0.135 }, "SmolLM-135M-Taiwan-Instruct-v1.0": { "repo_id": "benchang1110/SmolLM-135M-Taiwan-Instruct-v1.0", "description": "135-million-parameter F32 safetensors instruction-finetuned variant of SmolLM-135M-Taiwan, trained on the 416 k-example ChatTaiwan dataset for Traditional Chinese conversational and instruction-following tasks", "params_b": 0.135 }, "SmolLM2_135M_Grpo_Gsm8k": { "repo_id": "prithivMLmods/SmolLM2_135M_Grpo_Gsm8k", "description": "SmolLM2_135M_Grpo_Gsm8k", "params_b": 0.135 }, "SmolLM2-135M-Instruct": { "repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct", "description": "Original SmolLM2‑135M Instruct", "params_b": 0.135 }, "SmolLM2-135M-Instruct-TaiwanChat": { "repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat", "description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat", "params_b": 0.135 }, }