Spaces:
Running
Running
| # ------------------------------ | |
| # Torch-Compatible Model Definitions with Adjusted Descriptions | |
| # ------------------------------ | |
| MODELS = { | |
| # 1.5B | |
| "Nemotron-Research-Reasoning-Qwen-1.5B": { | |
| "repo_id": "nvidia/Nemotron-Research-Reasoning-Qwen-1.5B", | |
| "description": "Nemotron-Research-Reasoning-Qwen-1.5B", | |
| "params_b": 1.5 | |
| }, | |
| "Falcon-H1-1.5B-Instruct": { | |
| "repo_id": "tiiuae/Falcon-H1-1.5B-Instruct", | |
| "description": "Falcon‑H1 model with 1.5 B parameters, instruction‑tuned", | |
| "params_b": 1.5 | |
| }, | |
| "Qwen2.5-Taiwan-1.5B-Instruct": { | |
| "repo_id": "benchang1110/Qwen2.5-Taiwan-1.5B-Instruct", | |
| "description": "Qwen2.5-Taiwan-1.5B-Instruct", | |
| "params_b": 1.5 | |
| }, | |
| # 1.2B | |
| "LFM2-1.2B": { | |
| "repo_id": "LiquidAI/LFM2-1.2B", | |
| "description": "A 1.2B parameter hybrid language model from Liquid AI, designed for efficient on-device and edge AI deployment, outperforming larger models like Llama-2-7b-hf in specific tasks.", | |
| "params_b": 1.2 | |
| }, | |
| # 1.1B | |
| "Taiwan-ELM-1_1B-Instruct": { | |
| "repo_id": "liswei/Taiwan-ELM-1_1B-Instruct", | |
| "description": "Taiwan-ELM-1_1B-Instruct", | |
| "params_b": 1.1 | |
| }, | |
| # 1B | |
| "Llama-3.2-Taiwan-1B": { | |
| "repo_id": "lianghsun/Llama-3.2-Taiwan-1B", | |
| "description": "Llama-3.2-Taiwan base model with 1 B parameters", | |
| "params_b": 1.0 | |
| }, | |
| # 700M | |
| "LFM2-700M": { | |
| "repo_id": "LiquidAI/LFM2-700M", | |
| "description": "A 700M parameter model from the LFM2 family, designed for high efficiency on edge devices with a hybrid architecture of multiplicative gates and short convolutions.", | |
| "params_b": 0.7 | |
| }, | |
| # 600M | |
| "Qwen3-0.6B": { | |
| "repo_id": "Qwen/Qwen3-0.6B", | |
| "description": "Dense causal language model with 0.6 B total parameters (0.44 B non-embedding), 28 transformer layers, 16 query heads & 8 KV heads, native 32 768-token context window, dual-mode generation, full multilingual & agentic capabilities.", | |
| "params_b": 0.6 | |
| }, | |
| "Qwen3-0.6B-Taiwan": { | |
| "repo_id": "ShengweiPeng/Qwen3-0.6B-Taiwan", | |
| "description": "Qwen3-Taiwan model with 0.6 B parameters", | |
| "params_b": 0.6 | |
| }, | |
| # 500M | |
| "Qwen2.5-0.5B-Taiwan-Instruct": { | |
| "repo_id": "ShengweiPeng/Qwen2.5-0.5B-Taiwan-Instruct", | |
| "description": "Qwen2.5-Taiwan model with 0.5 B parameters, instruction-tuned", | |
| "params_b": 0.5 | |
| }, | |
| # 360M | |
| "SmolLM2-360M-Instruct": { | |
| "repo_id": "HuggingFaceTB/SmolLM2-360M-Instruct", | |
| "description": "Original SmolLM2‑360M Instruct", | |
| "params_b": 0.36 | |
| }, | |
| "SmolLM2-360M-Instruct-TaiwanChat": { | |
| "repo_id": "Luigi/SmolLM2-360M-Instruct-TaiwanChat", | |
| "description": "SmolLM2‑360M Instruct fine-tuned on TaiwanChat", | |
| "params_b": 0.36 | |
| }, | |
| # 350M | |
| "LFM2-350M": { | |
| "repo_id": "LiquidAI/LFM2-350M", | |
| "description": "A compact 350M parameter hybrid model optimized for edge and on-device applications, offering significantly faster training and inference speeds compared to models like Qwen3.", | |
| "params_b": 0.35 | |
| }, | |
| # 270M | |
| "parser_model_ner_gemma_v0.1": { | |
| "repo_id": "myfi/parser_model_ner_gemma_v0.1", | |
| "description": "A lightweight named‑entity‑like (NER) parser fine‑tuned from Google’s **Gemma‑3‑270M** model. The base Gemma‑3‑270M is a 270 M‑parameter, hyper‑efficient LLM designed for on‑device inference, supporting >140 languages, a 128 k‑token context window, and instruction‑following capabilities [2][7]. This variant is further trained on standard NER corpora (e.g., CoNLL‑2003, OntoNotes) to extract PERSON, ORG, LOC, and MISC entities with high precision while keeping the memory footprint low (≈240 MB VRAM in BF16 quantized form) [1]. It is released under the Apache‑2.0 license and can be used for fast, cost‑effective entity extraction in low‑resource environments.", | |
| "params_b": 0.27 | |
| }, | |
| "Gemma-3-Taiwan-270M-it": { | |
| "repo_id": "lianghsun/Gemma-3-Taiwan-270M-it", | |
| "description": "google/gemma-3-270m-it fintuned on Taiwan Chinese dataset", | |
| "params_b": 0.27 | |
| }, | |
| "gemma-3-270m-it": { | |
| "repo_id": "google/gemma-3-270m-it", | |
| "description": "Gemma‑3‑270M‑IT is a compact, 270‑million‑parameter language model fine‑tuned for Italian, offering fast and efficient on‑device text generation and comprehension in the Italian language.", | |
| "params_b": 0.27 | |
| }, | |
| "Taiwan-ELM-270M-Instruct": { | |
| "repo_id": "liswei/Taiwan-ELM-270M-Instruct", | |
| "description": "Taiwan-ELM-270M-Instruct", | |
| "params_b": 0.27 | |
| }, | |
| # 135M | |
| "SmolLM2-135M-multilingual-base": { | |
| "repo_id": "agentlans/SmolLM2-135M-multilingual-base", | |
| "description": "SmolLM2-135M-multilingual-base", | |
| "params_b": 0.135 | |
| }, | |
| "SmolLM-135M-Taiwan-Instruct-v1.0": { | |
| "repo_id": "benchang1110/SmolLM-135M-Taiwan-Instruct-v1.0", | |
| "description": "135-million-parameter F32 safetensors instruction-finetuned variant of SmolLM-135M-Taiwan, trained on the 416 k-example ChatTaiwan dataset for Traditional Chinese conversational and instruction-following tasks", | |
| "params_b": 0.135 | |
| }, | |
| "SmolLM2_135M_Grpo_Gsm8k": { | |
| "repo_id": "prithivMLmods/SmolLM2_135M_Grpo_Gsm8k", | |
| "description": "SmolLM2_135M_Grpo_Gsm8k", | |
| "params_b": 0.135 | |
| }, | |
| "SmolLM2-135M-Instruct": { | |
| "repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct", | |
| "description": "Original SmolLM2‑135M Instruct", | |
| "params_b": 0.135 | |
| }, | |
| "SmolLM2-135M-Instruct-TaiwanChat": { | |
| "repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat", | |
| "description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat", | |
| "params_b": 0.135 | |
| }, | |
| } |