CPU-LLM-Inference / config.py
R-Kentaren's picture
Rename app.py to config.py
9a0f889 verified
Raw
History Blame Contribute Delete
5.94 kB
# ------------------------------
# Torch-Compatible Model Definitions with Adjusted Descriptions
# ------------------------------
MODELS = {
# 1.5B
"Nemotron-Research-Reasoning-Qwen-1.5B": {
"repo_id": "nvidia/Nemotron-Research-Reasoning-Qwen-1.5B",
"description": "Nemotron-Research-Reasoning-Qwen-1.5B",
"params_b": 1.5
},
"Falcon-H1-1.5B-Instruct": {
"repo_id": "tiiuae/Falcon-H1-1.5B-Instruct",
"description": "Falcon‑H1 model with 1.5 B parameters, instruction‑tuned",
"params_b": 1.5
},
"Qwen2.5-Taiwan-1.5B-Instruct": {
"repo_id": "benchang1110/Qwen2.5-Taiwan-1.5B-Instruct",
"description": "Qwen2.5-Taiwan-1.5B-Instruct",
"params_b": 1.5
},
# 1.2B
"LFM2-1.2B": {
"repo_id": "LiquidAI/LFM2-1.2B",
"description": "A 1.2B parameter hybrid language model from Liquid AI, designed for efficient on-device and edge AI deployment, outperforming larger models like Llama-2-7b-hf in specific tasks.",
"params_b": 1.2
},
# 1.1B
"Taiwan-ELM-1_1B-Instruct": {
"repo_id": "liswei/Taiwan-ELM-1_1B-Instruct",
"description": "Taiwan-ELM-1_1B-Instruct",
"params_b": 1.1
},
# 1B
"Llama-3.2-Taiwan-1B": {
"repo_id": "lianghsun/Llama-3.2-Taiwan-1B",
"description": "Llama-3.2-Taiwan base model with 1 B parameters",
"params_b": 1.0
},
# 700M
"LFM2-700M": {
"repo_id": "LiquidAI/LFM2-700M",
"description": "A 700M parameter model from the LFM2 family, designed for high efficiency on edge devices with a hybrid architecture of multiplicative gates and short convolutions.",
"params_b": 0.7
},
# 600M
"Qwen3-0.6B": {
"repo_id": "Qwen/Qwen3-0.6B",
"description": "Dense causal language model with 0.6 B total parameters (0.44 B non-embedding), 28 transformer layers, 16 query heads & 8 KV heads, native 32 768-token context window, dual-mode generation, full multilingual & agentic capabilities.",
"params_b": 0.6
},
"Qwen3-0.6B-Taiwan": {
"repo_id": "ShengweiPeng/Qwen3-0.6B-Taiwan",
"description": "Qwen3-Taiwan model with 0.6 B parameters",
"params_b": 0.6
},
# 500M
"Qwen2.5-0.5B-Taiwan-Instruct": {
"repo_id": "ShengweiPeng/Qwen2.5-0.5B-Taiwan-Instruct",
"description": "Qwen2.5-Taiwan model with 0.5 B parameters, instruction-tuned",
"params_b": 0.5
},
# 360M
"SmolLM2-360M-Instruct": {
"repo_id": "HuggingFaceTB/SmolLM2-360M-Instruct",
"description": "Original SmolLM2‑360M Instruct",
"params_b": 0.36
},
"SmolLM2-360M-Instruct-TaiwanChat": {
"repo_id": "Luigi/SmolLM2-360M-Instruct-TaiwanChat",
"description": "SmolLM2‑360M Instruct fine-tuned on TaiwanChat",
"params_b": 0.36
},
# 350M
"LFM2-350M": {
"repo_id": "LiquidAI/LFM2-350M",
"description": "A compact 350M parameter hybrid model optimized for edge and on-device applications, offering significantly faster training and inference speeds compared to models like Qwen3.",
"params_b": 0.35
},
# 270M
"parser_model_ner_gemma_v0.1": {
"repo_id": "myfi/parser_model_ner_gemma_v0.1",
"description": "A lightweight named‑entity‑like (NER) parser fine‑tuned from Google’s **Gemma‑3‑270M** model. The base Gemma‑3‑270M is a 270 M‑parameter, hyper‑efficient LLM designed for on‑device inference, supporting >140 languages, a 128 k‑token context window, and instruction‑following capabilities [2][7]. This variant is further trained on standard NER corpora (e.g., CoNLL‑2003, OntoNotes) to extract PERSON, ORG, LOC, and MISC entities with high precision while keeping the memory footprint low (≈240 MB VRAM in BF16 quantized form) [1]. It is released under the Apache‑2.0 license and can be used for fast, cost‑effective entity extraction in low‑resource environments.",
"params_b": 0.27
},
"Gemma-3-Taiwan-270M-it": {
"repo_id": "lianghsun/Gemma-3-Taiwan-270M-it",
"description": "google/gemma-3-270m-it fintuned on Taiwan Chinese dataset",
"params_b": 0.27
},
"gemma-3-270m-it": {
"repo_id": "google/gemma-3-270m-it",
"description": "Gemma‑3‑270M‑IT is a compact, 270‑million‑parameter language model fine‑tuned for Italian, offering fast and efficient on‑device text generation and comprehension in the Italian language.",
"params_b": 0.27
},
"Taiwan-ELM-270M-Instruct": {
"repo_id": "liswei/Taiwan-ELM-270M-Instruct",
"description": "Taiwan-ELM-270M-Instruct",
"params_b": 0.27
},
# 135M
"SmolLM2-135M-multilingual-base": {
"repo_id": "agentlans/SmolLM2-135M-multilingual-base",
"description": "SmolLM2-135M-multilingual-base",
"params_b": 0.135
},
"SmolLM-135M-Taiwan-Instruct-v1.0": {
"repo_id": "benchang1110/SmolLM-135M-Taiwan-Instruct-v1.0",
"description": "135-million-parameter F32 safetensors instruction-finetuned variant of SmolLM-135M-Taiwan, trained on the 416 k-example ChatTaiwan dataset for Traditional Chinese conversational and instruction-following tasks",
"params_b": 0.135
},
"SmolLM2_135M_Grpo_Gsm8k": {
"repo_id": "prithivMLmods/SmolLM2_135M_Grpo_Gsm8k",
"description": "SmolLM2_135M_Grpo_Gsm8k",
"params_b": 0.135
},
"SmolLM2-135M-Instruct": {
"repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct",
"description": "Original SmolLM2‑135M Instruct",
"params_b": 0.135
},
"SmolLM2-135M-Instruct-TaiwanChat": {
"repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat",
"description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat",
"params_b": 0.135
},
}