CPU-LLM-Inference

Running

App Files Files Community

CPU-LLM-Inference / config.py

R-Kentaren

Rename app.py to config.py

9a0f889 verified 1 day ago

Raw

History Blame Contribute Delete

5.94 kB

	# ------------------------------
	# Torch-Compatible Model Definitions with Adjusted Descriptions
	# ------------------------------
	MODELS = {

	# 1.5B
	"Nemotron-Research-Reasoning-Qwen-1.5B": {
	"repo_id": "nvidia/Nemotron-Research-Reasoning-Qwen-1.5B",
	"description": "Nemotron-Research-Reasoning-Qwen-1.5B",
	"params_b": 1.5
	},
	"Falcon-H1-1.5B-Instruct": {
	"repo_id": "tiiuae/Falcon-H1-1.5B-Instruct",
	"description": "Falcon‑H1 model with 1.5 B parameters, instruction‑tuned",
	"params_b": 1.5
	},
	"Qwen2.5-Taiwan-1.5B-Instruct": {
	"repo_id": "benchang1110/Qwen2.5-Taiwan-1.5B-Instruct",
	"description": "Qwen2.5-Taiwan-1.5B-Instruct",
	"params_b": 1.5
	},

	# 1.2B
	"LFM2-1.2B": {
	"repo_id": "LiquidAI/LFM2-1.2B",
	"description": "A 1.2B parameter hybrid language model from Liquid AI, designed for efficient on-device and edge AI deployment, outperforming larger models like Llama-2-7b-hf in specific tasks.",
	"params_b": 1.2
	},

	# 1.1B
	"Taiwan-ELM-1_1B-Instruct": {
	"repo_id": "liswei/Taiwan-ELM-1_1B-Instruct",
	"description": "Taiwan-ELM-1_1B-Instruct",
	"params_b": 1.1
	},

	# 1B
	"Llama-3.2-Taiwan-1B": {
	"repo_id": "lianghsun/Llama-3.2-Taiwan-1B",
	"description": "Llama-3.2-Taiwan base model with 1 B parameters",
	"params_b": 1.0
	},

	# 700M
	"LFM2-700M": {
	"repo_id": "LiquidAI/LFM2-700M",
	"description": "A 700M parameter model from the LFM2 family, designed for high efficiency on edge devices with a hybrid architecture of multiplicative gates and short convolutions.",
	"params_b": 0.7
	},

	# 600M
	"Qwen3-0.6B": {
	"repo_id": "Qwen/Qwen3-0.6B",
	"description": "Dense causal language model with 0.6 B total parameters (0.44 B non-embedding), 28 transformer layers, 16 query heads & 8 KV heads, native 32 768-token context window, dual-mode generation, full multilingual & agentic capabilities.",
	"params_b": 0.6
	},
	"Qwen3-0.6B-Taiwan": {
	"repo_id": "ShengweiPeng/Qwen3-0.6B-Taiwan",
	"description": "Qwen3-Taiwan model with 0.6 B parameters",
	"params_b": 0.6
	},

	# 500M
	"Qwen2.5-0.5B-Taiwan-Instruct": {
	"repo_id": "ShengweiPeng/Qwen2.5-0.5B-Taiwan-Instruct",
	"description": "Qwen2.5-Taiwan model with 0.5 B parameters, instruction-tuned",
	"params_b": 0.5
	},

	# 360M
	"SmolLM2-360M-Instruct": {
	"repo_id": "HuggingFaceTB/SmolLM2-360M-Instruct",
	"description": "Original SmolLM2‑360M Instruct",
	"params_b": 0.36
	},
	"SmolLM2-360M-Instruct-TaiwanChat": {
	"repo_id": "Luigi/SmolLM2-360M-Instruct-TaiwanChat",
	"description": "SmolLM2‑360M Instruct fine-tuned on TaiwanChat",
	"params_b": 0.36
	},

	# 350M
	"LFM2-350M": {
	"repo_id": "LiquidAI/LFM2-350M",
	"description": "A compact 350M parameter hybrid model optimized for edge and on-device applications, offering significantly faster training and inference speeds compared to models like Qwen3.",
	"params_b": 0.35
	},

	# 270M
	"parser_model_ner_gemma_v0.1": {
	"repo_id": "myfi/parser_model_ner_gemma_v0.1",
	"description": "A lightweight named‑entity‑like (NER) parser fine‑tuned from Google’s Gemma‑3‑270M model. The base Gemma‑3‑270M is a 270 M‑parameter, hyper‑efficient LLM designed for on‑device inference, supporting >140 languages, a 128 k‑token context window, and instruction‑following capabilities [2][7]. This variant is further trained on standard NER corpora (e.g., CoNLL‑2003, OntoNotes) to extract PERSON, ORG, LOC, and MISC entities with high precision while keeping the memory footprint low (≈240 MB VRAM in BF16 quantized form) [1]. It is released under the Apache‑2.0 license and can be used for fast, cost‑effective entity extraction in low‑resource environments.",
	"params_b": 0.27
	},
	"Gemma-3-Taiwan-270M-it": {
	"repo_id": "lianghsun/Gemma-3-Taiwan-270M-it",
	"description": "google/gemma-3-270m-it fintuned on Taiwan Chinese dataset",
	"params_b": 0.27
	},
	"gemma-3-270m-it": {
	"repo_id": "google/gemma-3-270m-it",
	"description": "Gemma‑3‑270M‑IT is a compact, 270‑million‑parameter language model fine‑tuned for Italian, offering fast and efficient on‑device text generation and comprehension in the Italian language.",
	"params_b": 0.27
	},
	"Taiwan-ELM-270M-Instruct": {
	"repo_id": "liswei/Taiwan-ELM-270M-Instruct",
	"description": "Taiwan-ELM-270M-Instruct",
	"params_b": 0.27
	},

	# 135M
	"SmolLM2-135M-multilingual-base": {
	"repo_id": "agentlans/SmolLM2-135M-multilingual-base",
	"description": "SmolLM2-135M-multilingual-base",
	"params_b": 0.135
	},
	"SmolLM-135M-Taiwan-Instruct-v1.0": {
	"repo_id": "benchang1110/SmolLM-135M-Taiwan-Instruct-v1.0",
	"description": "135-million-parameter F32 safetensors instruction-finetuned variant of SmolLM-135M-Taiwan, trained on the 416 k-example ChatTaiwan dataset for Traditional Chinese conversational and instruction-following tasks",
	"params_b": 0.135
	},
	"SmolLM2_135M_Grpo_Gsm8k": {
	"repo_id": "prithivMLmods/SmolLM2_135M_Grpo_Gsm8k",
	"description": "SmolLM2_135M_Grpo_Gsm8k",
	"params_b": 0.135
	},
	"SmolLM2-135M-Instruct": {
	"repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct",
	"description": "Original SmolLM2‑135M Instruct",
	"params_b": 0.135
	},
	"SmolLM2-135M-Instruct-TaiwanChat": {
	"repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat",
	"description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat",
	"params_b": 0.135
	},
	}