FitCheck / catalogue.json
cn0303's picture
Real catalogue: 83 verified models, buy-advice mode, live model lookup, license-aware cards
e34beb2 verified
{
"version": 1,
"generated_at": "2026-06-09T22:12:37+00:00",
"count": 83,
"entries": [
{
"key": "qwen3-0.6b",
"family": "llm",
"name": "Qwen3 0.6B",
"repo_id": "Qwen/Qwen3-0.6B",
"params_b": 0.75,
"license": "apache-2.0",
"gguf_repo": "unsloth/Qwen3-0.6B-GGUF",
"ollama_tag": "qwen3:0.6b",
"good_for": "Quick simple chat and text tidy-up. Runs on almost anything.",
"aliases": [
"Qwen/Qwen3-0.6B-Base"
],
"stale": false,
"gated": false,
"downloads_30d": 21649861,
"arch": {
"n_layers": 28,
"hidden": 1024,
"n_heads": 16,
"n_kv_heads": 8
},
"context_len": 40960,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 0.64,
"source_repo": "unsloth/Qwen3-0.6B-GGUF",
"filename": "Qwen3-0.6B-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 0.5,
"source_repo": "unsloth/Qwen3-0.6B-GGUF",
"filename": "Qwen3-0.6B-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 0.44,
"source_repo": "unsloth/Qwen3-0.6B-GGUF",
"filename": "Qwen3-0.6B-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 0.4,
"source_repo": "unsloth/Qwen3-0.6B-GGUF",
"filename": "Qwen3-0.6B-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 0.37,
"source_repo": "unsloth/Qwen3-0.6B-GGUF",
"filename": "Qwen3-0.6B-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 0.35,
"source_repo": "unsloth/Qwen3-0.6B-GGUF",
"filename": "Qwen3-0.6B-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 0.3,
"source_repo": "unsloth/Qwen3-0.6B-GGUF",
"filename": "Qwen3-0.6B-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-0.6B",
"gguf": "https://huggingface.co/unsloth/Qwen3-0.6B-GGUF",
"ollama": "https://ollama.com/library/qwen3"
},
"run": {
"ollama": "ollama run qwen3:0.6b",
"llamacpp": "llama-server -hf unsloth/Qwen3-0.6B-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-1.7b",
"family": "llm",
"name": "Qwen3 1.7B",
"repo_id": "Qwen/Qwen3-1.7B",
"params_b": 2.03,
"license": "apache-2.0",
"gguf_repo": "unsloth/Qwen3-1.7B-GGUF",
"ollama_tag": "qwen3:1.7b",
"good_for": "Light everyday chat and summarising on weak hardware.",
"aliases": [
"Qwen/Qwen3-1.7B-Base"
],
"stale": false,
"gated": false,
"downloads_30d": 4452758,
"arch": {
"n_layers": 28,
"hidden": 2048,
"n_heads": 16,
"n_kv_heads": 8
},
"context_len": 40960,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 1.83,
"source_repo": "unsloth/Qwen3-1.7B-GGUF",
"filename": "Qwen3-1.7B-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 1.42,
"source_repo": "unsloth/Qwen3-1.7B-GGUF",
"filename": "Qwen3-1.7B-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 1.26,
"source_repo": "unsloth/Qwen3-1.7B-GGUF",
"filename": "Qwen3-1.7B-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 1.11,
"source_repo": "unsloth/Qwen3-1.7B-GGUF",
"filename": "Qwen3-1.7B-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 1.01,
"source_repo": "unsloth/Qwen3-1.7B-GGUF",
"filename": "Qwen3-1.7B-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 0.94,
"source_repo": "unsloth/Qwen3-1.7B-GGUF",
"filename": "Qwen3-1.7B-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 0.78,
"source_repo": "unsloth/Qwen3-1.7B-GGUF",
"filename": "Qwen3-1.7B-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-1.7B",
"gguf": "https://huggingface.co/unsloth/Qwen3-1.7B-GGUF",
"ollama": "https://ollama.com/library/qwen3"
},
"run": {
"ollama": "ollama run qwen3:1.7b",
"llamacpp": "llama-server -hf unsloth/Qwen3-1.7B-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-4b",
"family": "llm",
"name": "Qwen3 4B Instruct (2507)",
"repo_id": "Qwen/Qwen3-4B-Instruct-2507",
"params_b": 4.02,
"license": "apache-2.0",
"gguf_repo": "unsloth/Qwen3-4B-Instruct-2507-GGUF",
"ollama_tag": "qwen3:4b",
"good_for": "Surprisingly capable everyday assistant: chat, summarising, light coding.",
"aliases": [
"Qwen/Qwen3-4B",
"Qwen/Qwen3-4B-Base",
"Qwen/Qwen3-4B-Thinking-2507"
],
"stale": false,
"gated": false,
"downloads_30d": 4349929,
"arch": {
"n_layers": 36,
"hidden": 2560,
"n_heads": 32,
"n_kv_heads": 8
},
"context_len": 262144,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 4.28,
"source_repo": "unsloth/Qwen3-4B-Instruct-2507-GGUF",
"filename": "Qwen3-4B-Instruct-2507-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 3.31,
"source_repo": "unsloth/Qwen3-4B-Instruct-2507-GGUF",
"filename": "Qwen3-4B-Instruct-2507-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 2.89,
"source_repo": "unsloth/Qwen3-4B-Instruct-2507-GGUF",
"filename": "Qwen3-4B-Instruct-2507-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 2.5,
"source_repo": "unsloth/Qwen3-4B-Instruct-2507-GGUF",
"filename": "Qwen3-4B-Instruct-2507-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 2.27,
"source_repo": "unsloth/Qwen3-4B-Instruct-2507-GGUF",
"filename": "Qwen3-4B-Instruct-2507-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 2.08,
"source_repo": "unsloth/Qwen3-4B-Instruct-2507-GGUF",
"filename": "Qwen3-4B-Instruct-2507-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 1.67,
"source_repo": "unsloth/Qwen3-4B-Instruct-2507-GGUF",
"filename": "Qwen3-4B-Instruct-2507-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-4B-Instruct-2507",
"gguf": "https://huggingface.co/unsloth/Qwen3-4B-Instruct-2507-GGUF",
"ollama": "https://ollama.com/library/qwen3"
},
"run": {
"ollama": "ollama run qwen3:4b",
"llamacpp": "llama-server -hf unsloth/Qwen3-4B-Instruct-2507-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-8b",
"family": "llm",
"name": "Qwen3 8B",
"repo_id": "Qwen/Qwen3-8B",
"params_b": 8.19,
"license": "apache-2.0",
"gguf_repo": "unsloth/Qwen3-8B-GGUF",
"ollama_tag": "qwen3:8b",
"good_for": "A solid all-rounder: good chat, real coding help, decent reasoning.",
"aliases": [
"Qwen/Qwen3-8B-Base"
],
"stale": false,
"gated": false,
"downloads_30d": 10950977,
"arch": {
"n_layers": 36,
"hidden": 4096,
"n_heads": 32,
"n_kv_heads": 8
},
"context_len": 40960,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 8.71,
"source_repo": "unsloth/Qwen3-8B-GGUF",
"filename": "Qwen3-8B-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 6.73,
"source_repo": "unsloth/Qwen3-8B-GGUF",
"filename": "Qwen3-8B-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 5.85,
"source_repo": "unsloth/Qwen3-8B-GGUF",
"filename": "Qwen3-8B-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 5.03,
"source_repo": "unsloth/Qwen3-8B-GGUF",
"filename": "Qwen3-8B-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 4.58,
"source_repo": "unsloth/Qwen3-8B-GGUF",
"filename": "Qwen3-8B-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 4.12,
"source_repo": "unsloth/Qwen3-8B-GGUF",
"filename": "Qwen3-8B-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 3.28,
"source_repo": "unsloth/Qwen3-8B-GGUF",
"filename": "Qwen3-8B-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-8B",
"gguf": "https://huggingface.co/unsloth/Qwen3-8B-GGUF",
"ollama": "https://ollama.com/library/qwen3"
},
"run": {
"ollama": "ollama run qwen3:8b",
"llamacpp": "llama-server -hf unsloth/Qwen3-8B-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-14b",
"family": "llm",
"name": "Qwen3 14B",
"repo_id": "Qwen/Qwen3-14B",
"params_b": 14.77,
"license": "apache-2.0",
"gguf_repo": "unsloth/Qwen3-14B-GGUF",
"ollama_tag": "qwen3:14b",
"good_for": "Noticeably smarter and more reliable. Wants a real graphics card.",
"aliases": [
"Qwen/Qwen3-14B-Base"
],
"stale": false,
"gated": false,
"downloads_30d": 1640580,
"arch": {
"n_layers": 40,
"hidden": 5120,
"n_heads": 40,
"n_kv_heads": 8
},
"context_len": 40960,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 15.7,
"source_repo": "unsloth/Qwen3-14B-GGUF",
"filename": "Qwen3-14B-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 12.12,
"source_repo": "unsloth/Qwen3-14B-GGUF",
"filename": "Qwen3-14B-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 10.51,
"source_repo": "unsloth/Qwen3-14B-GGUF",
"filename": "Qwen3-14B-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 9.0,
"source_repo": "unsloth/Qwen3-14B-GGUF",
"filename": "Qwen3-14B-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 8.14,
"source_repo": "unsloth/Qwen3-14B-GGUF",
"filename": "Qwen3-14B-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 7.32,
"source_repo": "unsloth/Qwen3-14B-GGUF",
"filename": "Qwen3-14B-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 5.75,
"source_repo": "unsloth/Qwen3-14B-GGUF",
"filename": "Qwen3-14B-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-14B",
"gguf": "https://huggingface.co/unsloth/Qwen3-14B-GGUF",
"ollama": "https://ollama.com/library/qwen3"
},
"run": {
"ollama": "ollama run qwen3:14b",
"llamacpp": "llama-server -hf unsloth/Qwen3-14B-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-32b",
"family": "llm",
"name": "Qwen3 32B",
"repo_id": "Qwen/Qwen3-32B",
"params_b": 32.76,
"license": "apache-2.0",
"gguf_repo": "unsloth/Qwen3-32B-GGUF",
"ollama_tag": "qwen3:32b",
"good_for": "Near-premium quality. Needs a strong GPU or a lot of memory.",
"stale": false,
"gated": false,
"downloads_30d": 3167218,
"arch": {
"n_layers": 64,
"hidden": 5120,
"n_heads": 64,
"n_kv_heads": 8
},
"context_len": 40960,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 34.82,
"source_repo": "unsloth/Qwen3-32B-GGUF",
"filename": "Qwen3-32B-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 26.88,
"source_repo": "unsloth/Qwen3-32B-GGUF",
"filename": "Qwen3-32B-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 23.21,
"source_repo": "unsloth/Qwen3-32B-GGUF",
"filename": "Qwen3-32B-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 19.76,
"source_repo": "unsloth/Qwen3-32B-GGUF",
"filename": "Qwen3-32B-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 17.71,
"source_repo": "unsloth/Qwen3-32B-GGUF",
"filename": "Qwen3-32B-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 15.97,
"source_repo": "unsloth/Qwen3-32B-GGUF",
"filename": "Qwen3-32B-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 12.34,
"source_repo": "unsloth/Qwen3-32B-GGUF",
"filename": "Qwen3-32B-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-32B",
"gguf": "https://huggingface.co/unsloth/Qwen3-32B-GGUF",
"ollama": "https://ollama.com/library/qwen3"
},
"run": {
"ollama": "ollama run qwen3:32b",
"llamacpp": "llama-server -hf unsloth/Qwen3-32B-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-30b-a3b",
"family": "llm",
"name": "Qwen3 30B-A3B (2507, MoE)",
"repo_id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
"params_b": 30.53,
"active_params_b": 3.0,
"license": "apache-2.0",
"gguf_repo": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF",
"ollama_tag": "qwen3:30b",
"good_for": "Big-model quality that runs fast: only 3B of its 30B work per word, so it flies even part-offloaded to RAM.",
"stale": false,
"gated": false,
"downloads_30d": 821913,
"arch": {
"n_layers": 48,
"hidden": 2048,
"n_heads": 32,
"n_kv_heads": 4
},
"context_len": 262144,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 32.48,
"source_repo": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF",
"filename": "Qwen3-30B-A3B-Instruct-2507-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 25.09,
"source_repo": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF",
"filename": "Qwen3-30B-A3B-Instruct-2507-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 21.73,
"source_repo": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF",
"filename": "Qwen3-30B-A3B-Instruct-2507-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 18.56,
"source_repo": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF",
"filename": "Qwen3-30B-A3B-Instruct-2507-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 16.38,
"source_repo": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF",
"filename": "Qwen3-30B-A3B-Instruct-2507-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 14.71,
"source_repo": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF",
"filename": "Qwen3-30B-A3B-Instruct-2507-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 11.26,
"source_repo": "unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF",
"filename": "Qwen3-30B-A3B-Instruct-2507-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507",
"gguf": "https://huggingface.co/unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF",
"ollama": "https://ollama.com/library/qwen3"
},
"run": {
"ollama": "ollama run qwen3:30b",
"llamacpp": "llama-server -hf unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-coder-30b",
"family": "llm",
"name": "Qwen3 Coder 30B-A3B (MoE)",
"repo_id": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
"params_b": 30.53,
"active_params_b": 3.0,
"license": "apache-2.0",
"gguf_repo": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
"ollama_tag": "qwen3-coder:30b",
"good_for": "The local coding specialist. Fast despite its size (3B active).",
"stale": false,
"gated": false,
"downloads_30d": 1961682,
"arch": {
"n_layers": 48,
"hidden": 2048,
"n_heads": 32,
"n_kv_heads": 4
},
"context_len": 262144,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 32.48,
"source_repo": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-Coder-30B-A3B-Instruct-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 25.09,
"source_repo": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-Coder-30B-A3B-Instruct-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 21.73,
"source_repo": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-Coder-30B-A3B-Instruct-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 18.56,
"source_repo": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 16.38,
"source_repo": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-Coder-30B-A3B-Instruct-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 14.71,
"source_repo": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-Coder-30B-A3B-Instruct-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 11.26,
"source_repo": "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-Coder-30B-A3B-Instruct-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct",
"gguf": "https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
"ollama": "https://ollama.com/library/qwen3-coder"
},
"run": {
"ollama": "ollama run qwen3-coder:30b",
"llamacpp": "llama-server -hf unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "llama-3.2-1b",
"family": "llm",
"name": "Llama 3.2 1B",
"repo_id": "meta-llama/Llama-3.2-1B-Instruct",
"params_b": 1.24,
"license": "llama3.2",
"gguf_repo": "bartowski/Llama-3.2-1B-Instruct-GGUF",
"ollama_tag": "llama3.2:1b",
"good_for": "Quick simple chat from the Llama family. Runs on almost anything.",
"stale": false,
"gated": true,
"downloads_30d": 7481230,
"context_len": 131072,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 1.32,
"source_repo": "bartowski/Llama-3.2-1B-Instruct-GGUF",
"filename": "Llama-3.2-1B-Instruct-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 1.02,
"source_repo": "bartowski/Llama-3.2-1B-Instruct-GGUF",
"filename": "Llama-3.2-1B-Instruct-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 0.91,
"source_repo": "bartowski/Llama-3.2-1B-Instruct-GGUF",
"filename": "Llama-3.2-1B-Instruct-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 0.81,
"source_repo": "bartowski/Llama-3.2-1B-Instruct-GGUF",
"filename": "Llama-3.2-1B-Instruct-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 0.74,
"source_repo": "bartowski/Llama-3.2-1B-Instruct-GGUF",
"filename": "Llama-3.2-1B-Instruct-IQ4_XS.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct",
"gguf": "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF",
"ollama": "https://ollama.com/library/llama3.2"
},
"run": {
"ollama": "ollama run llama3.2:1b",
"llamacpp": "llama-server -hf bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "llama-3.2-3b",
"family": "llm",
"name": "Llama 3.2 3B",
"repo_id": "meta-llama/Llama-3.2-3B-Instruct",
"params_b": 3.21,
"license": "llama3.2",
"gguf_repo": "bartowski/Llama-3.2-3B-Instruct-GGUF",
"ollama_tag": "llama3.2:3b",
"good_for": "Capable small assistant with the huge Llama ecosystem behind it.",
"stale": false,
"gated": true,
"downloads_30d": 1509782,
"context_len": 131072,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 3.42,
"source_repo": "bartowski/Llama-3.2-3B-Instruct-GGUF",
"filename": "Llama-3.2-3B-Instruct-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 2.64,
"source_repo": "bartowski/Llama-3.2-3B-Instruct-GGUF",
"filename": "Llama-3.2-3B-Instruct-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 2.32,
"source_repo": "bartowski/Llama-3.2-3B-Instruct-GGUF",
"filename": "Llama-3.2-3B-Instruct-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 2.02,
"source_repo": "bartowski/Llama-3.2-3B-Instruct-GGUF",
"filename": "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 1.83,
"source_repo": "bartowski/Llama-3.2-3B-Instruct-GGUF",
"filename": "Llama-3.2-3B-Instruct-IQ4_XS.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct",
"gguf": "https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF",
"ollama": "https://ollama.com/library/llama3.2"
},
"run": {
"ollama": "ollama run llama3.2:3b",
"llamacpp": "llama-server -hf bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "llama-3.1-8b",
"family": "llm",
"name": "Llama 3.1 8B",
"repo_id": "meta-llama/Llama-3.1-8B-Instruct",
"params_b": 8.03,
"license": "llama3.1",
"gguf_repo": "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
"ollama_tag": "llama3.1:8b",
"good_for": "The classic dependable 8B: chat, coding help, tool use.",
"stale": false,
"gated": true,
"downloads_30d": 10031112,
"context_len": 131072,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 8.54,
"source_repo": "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
"filename": "Meta-Llama-3.1-8B-Instruct-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 6.6,
"source_repo": "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
"filename": "Meta-Llama-3.1-8B-Instruct-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 5.73,
"source_repo": "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
"filename": "Meta-Llama-3.1-8B-Instruct-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 4.92,
"source_repo": "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
"filename": "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 4.45,
"source_repo": "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
"filename": "Meta-Llama-3.1-8B-Instruct-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 4.02,
"source_repo": "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
"filename": "Meta-Llama-3.1-8B-Instruct-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 3.18,
"source_repo": "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
"filename": "Meta-Llama-3.1-8B-Instruct-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct",
"gguf": "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
"ollama": "https://ollama.com/library/llama3.1"
},
"run": {
"ollama": "ollama run llama3.1:8b",
"llamacpp": "llama-server -hf bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "llama-4-scout",
"family": "llm",
"name": "Llama 4 Scout (109B MoE)",
"repo_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
"params_b": 108.64,
"active_params_b": 17.0,
"license": "other",
"gguf_repo": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF",
"ollama_tag": "llama4:scout",
"good_for": "Frontier-class open model. Workstation territory: it is honest to say most PCs cannot run this.",
"stale": false,
"gated": true,
"downloads_30d": 421808,
"context_len": 10485760,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 114.53,
"source_repo": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF",
"filename": "Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 88.43,
"source_repo": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF",
"filename": "Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 76.55,
"source_repo": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF",
"filename": "Q5_K_M/Llama-4-Scout-17B-16E-Instruct-Q5_K_M-00001-of-00002.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 65.36,
"source_repo": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF",
"filename": "Q4_K_M/Llama-4-Scout-17B-16E-Instruct-Q4_K_M-00001-of-00002.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 57.65,
"source_repo": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF",
"filename": "IQ4_XS/Llama-4-Scout-17B-16E-Instruct-IQ4_XS-00001-of-00002.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 51.76,
"source_repo": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF",
"filename": "Q3_K_M/Llama-4-Scout-17B-16E-Instruct-Q3_K_M-00001-of-00002.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 39.56,
"source_repo": "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF",
"filename": "Llama-4-Scout-17B-16E-Instruct-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct",
"gguf": "https://huggingface.co/unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF",
"ollama": "https://ollama.com/library/llama4"
},
"run": {
"ollama": "ollama run llama4:scout",
"llamacpp": "llama-server -hf unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "gemma-3-270m",
"family": "llm",
"name": "Gemma 3 270M",
"repo_id": "google/gemma-3-270m-it",
"params_b": 0.27,
"license": "gemma",
"gguf_repo": "unsloth/gemma-3-270m-it-GGUF",
"ollama_tag": "gemma3:270m",
"good_for": "Tiny and instant. Fine for simple completions, not real conversation.",
"stale": false,
"gated": true,
"downloads_30d": 127163,
"context_len": 32768,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 0.29,
"source_repo": "unsloth/gemma-3-270m-it-GGUF",
"filename": "gemma-3-270m-it-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 0.28,
"source_repo": "unsloth/gemma-3-270m-it-GGUF",
"filename": "gemma-3-270m-it-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 0.26,
"source_repo": "unsloth/gemma-3-270m-it-GGUF",
"filename": "gemma-3-270m-it-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 0.25,
"source_repo": "unsloth/gemma-3-270m-it-GGUF",
"filename": "gemma-3-270m-it-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 0.24,
"source_repo": "unsloth/gemma-3-270m-it-GGUF",
"filename": "gemma-3-270m-it-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 0.24,
"source_repo": "unsloth/gemma-3-270m-it-GGUF",
"filename": "gemma-3-270m-it-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 0.24,
"source_repo": "unsloth/gemma-3-270m-it-GGUF",
"filename": "gemma-3-270m-it-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/google/gemma-3-270m-it",
"gguf": "https://huggingface.co/unsloth/gemma-3-270m-it-GGUF",
"ollama": "https://ollama.com/library/gemma3"
},
"run": {
"ollama": "ollama run gemma3:270m",
"llamacpp": "llama-server -hf unsloth/gemma-3-270m-it-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "gemma-3-1b",
"family": "llm",
"name": "Gemma 3 1B",
"repo_id": "google/gemma-3-1b-it",
"params_b": 1.0,
"license": "gemma",
"gguf_repo": "unsloth/gemma-3-1b-it-GGUF",
"ollama_tag": "gemma3:1b",
"good_for": "Google's small chat model. Light and friendly on weak hardware.",
"stale": false,
"gated": true,
"downloads_30d": 1658957,
"context_len": 32768,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 1.07,
"source_repo": "unsloth/gemma-3-1b-it-GGUF",
"filename": "gemma-3-1b-it-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 1.01,
"source_repo": "unsloth/gemma-3-1b-it-GGUF",
"filename": "gemma-3-1b-it-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 0.85,
"source_repo": "unsloth/gemma-3-1b-it-GGUF",
"filename": "gemma-3-1b-it-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 0.81,
"source_repo": "unsloth/gemma-3-1b-it-GGUF",
"filename": "gemma-3-1b-it-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 0.71,
"source_repo": "unsloth/gemma-3-1b-it-GGUF",
"filename": "gemma-3-1b-it-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 0.72,
"source_repo": "unsloth/gemma-3-1b-it-GGUF",
"filename": "gemma-3-1b-it-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 0.69,
"source_repo": "unsloth/gemma-3-1b-it-GGUF",
"filename": "gemma-3-1b-it-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/google/gemma-3-1b-it",
"gguf": "https://huggingface.co/unsloth/gemma-3-1b-it-GGUF",
"ollama": "https://ollama.com/library/gemma3"
},
"run": {
"ollama": "ollama run gemma3:1b",
"llamacpp": "llama-server -hf unsloth/gemma-3-1b-it-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "gemma-3-4b",
"family": "llm",
"name": "Gemma 3 4B",
"repo_id": "google/gemma-3-4b-it",
"params_b": 4.3,
"license": "gemma",
"gguf_repo": "unsloth/gemma-3-4b-it-GGUF",
"ollama_tag": "gemma3:4b",
"good_for": "Warm-toned everyday assistant; can also look at images.",
"stale": false,
"gated": true,
"downloads_30d": 1601809,
"context_len": 131072,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 4.13,
"source_repo": "unsloth/gemma-3-4b-it-GGUF",
"filename": "gemma-3-4b-it-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 3.19,
"source_repo": "unsloth/gemma-3-4b-it-GGUF",
"filename": "gemma-3-4b-it-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 2.83,
"source_repo": "unsloth/gemma-3-4b-it-GGUF",
"filename": "gemma-3-4b-it-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 2.49,
"source_repo": "unsloth/gemma-3-4b-it-GGUF",
"filename": "gemma-3-4b-it-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 2.26,
"source_repo": "unsloth/gemma-3-4b-it-GGUF",
"filename": "gemma-3-4b-it-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 2.1,
"source_repo": "unsloth/gemma-3-4b-it-GGUF",
"filename": "gemma-3-4b-it-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 1.73,
"source_repo": "unsloth/gemma-3-4b-it-GGUF",
"filename": "gemma-3-4b-it-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/google/gemma-3-4b-it",
"gguf": "https://huggingface.co/unsloth/gemma-3-4b-it-GGUF",
"ollama": "https://ollama.com/library/gemma3"
},
"run": {
"ollama": "ollama run gemma3:4b",
"llamacpp": "llama-server -hf unsloth/gemma-3-4b-it-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "gemma-3-12b",
"family": "llm",
"name": "Gemma 3 12B",
"repo_id": "google/gemma-3-12b-it",
"params_b": 12.19,
"license": "gemma",
"gguf_repo": "unsloth/gemma-3-12b-it-GGUF",
"ollama_tag": "gemma3:12b",
"good_for": "Strong mid-size model with vision support.",
"stale": false,
"gated": true,
"downloads_30d": 2810935,
"context_len": 131072,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 12.51,
"source_repo": "unsloth/gemma-3-12b-it-GGUF",
"filename": "gemma-3-12b-it-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 9.66,
"source_repo": "unsloth/gemma-3-12b-it-GGUF",
"filename": "gemma-3-12b-it-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 8.45,
"source_repo": "unsloth/gemma-3-12b-it-GGUF",
"filename": "gemma-3-12b-it-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 7.3,
"source_repo": "unsloth/gemma-3-12b-it-GGUF",
"filename": "gemma-3-12b-it-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 6.55,
"source_repo": "unsloth/gemma-3-12b-it-GGUF",
"filename": "gemma-3-12b-it-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 6.01,
"source_repo": "unsloth/gemma-3-12b-it-GGUF",
"filename": "gemma-3-12b-it-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 4.77,
"source_repo": "unsloth/gemma-3-12b-it-GGUF",
"filename": "gemma-3-12b-it-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/google/gemma-3-12b-it",
"gguf": "https://huggingface.co/unsloth/gemma-3-12b-it-GGUF",
"ollama": "https://ollama.com/library/gemma3"
},
"run": {
"ollama": "ollama run gemma3:12b",
"llamacpp": "llama-server -hf unsloth/gemma-3-12b-it-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "gemma-3-27b",
"family": "llm",
"name": "Gemma 3 27B",
"repo_id": "google/gemma-3-27b-it",
"params_b": 27.43,
"license": "gemma",
"gguf_repo": "unsloth/gemma-3-27b-it-GGUF",
"ollama_tag": "gemma3:27b",
"good_for": "Google's big open model. Needs a serious GPU or lots of memory.",
"stale": false,
"gated": true,
"downloads_30d": 1418920,
"context_len": 131072,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 28.71,
"source_repo": "unsloth/gemma-3-27b-it-GGUF",
"filename": "gemma-3-27b-it-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 22.17,
"source_repo": "unsloth/gemma-3-27b-it-GGUF",
"filename": "gemma-3-27b-it-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 19.27,
"source_repo": "unsloth/gemma-3-27b-it-GGUF",
"filename": "gemma-3-27b-it-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 16.55,
"source_repo": "unsloth/gemma-3-27b-it-GGUF",
"filename": "gemma-3-27b-it-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 14.77,
"source_repo": "unsloth/gemma-3-27b-it-GGUF",
"filename": "gemma-3-27b-it-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 13.44,
"source_repo": "unsloth/gemma-3-27b-it-GGUF",
"filename": "gemma-3-27b-it-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 10.5,
"source_repo": "unsloth/gemma-3-27b-it-GGUF",
"filename": "gemma-3-27b-it-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/google/gemma-3-27b-it",
"gguf": "https://huggingface.co/unsloth/gemma-3-27b-it-GGUF",
"ollama": "https://ollama.com/library/gemma3"
},
"run": {
"ollama": "ollama run gemma3:27b",
"llamacpp": "llama-server -hf unsloth/gemma-3-27b-it-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "gemma-3n-e2b",
"family": "llm",
"name": "Gemma 3n E2B",
"repo_id": "google/gemma-3n-E2B-it",
"params_b": 5.44,
"active_params_b": 2.0,
"license": "gemma",
"gguf_repo": "unsloth/gemma-3n-E2B-it-GGUF",
"ollama_tag": "gemma3n:e2b",
"good_for": "Phone-class design: feels like a 2B while knowing more.",
"stale": false,
"gated": true,
"downloads_30d": 372825,
"context_len": 32768,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 4.79,
"source_repo": "unsloth/gemma-3n-E2B-it-GGUF",
"filename": "gemma-3n-E2B-it-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 4.21,
"source_repo": "unsloth/gemma-3n-E2B-it-GGUF",
"filename": "gemma-3n-E2B-it-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 3.29,
"source_repo": "unsloth/gemma-3n-E2B-it-GGUF",
"filename": "gemma-3n-E2B-it-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 3.03,
"source_repo": "unsloth/gemma-3n-E2B-it-GGUF",
"filename": "gemma-3n-E2B-it-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 2.91,
"source_repo": "unsloth/gemma-3n-E2B-it-GGUF",
"filename": "gemma-3n-E2B-it-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 2.48,
"source_repo": "unsloth/gemma-3n-E2B-it-GGUF",
"filename": "gemma-3n-E2B-it-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 2.22,
"source_repo": "unsloth/gemma-3n-E2B-it-GGUF",
"filename": "gemma-3n-E2B-it-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/google/gemma-3n-E2B-it",
"gguf": "https://huggingface.co/unsloth/gemma-3n-E2B-it-GGUF",
"ollama": "https://ollama.com/library/gemma3n"
},
"run": {
"ollama": "ollama run gemma3n:e2b",
"llamacpp": "llama-server -hf unsloth/gemma-3n-E2B-it-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "gemma-3n-e4b",
"family": "llm",
"name": "Gemma 3n E4B",
"repo_id": "google/gemma-3n-E4B-it",
"params_b": 7.85,
"active_params_b": 4.0,
"license": "gemma",
"gguf_repo": "unsloth/gemma-3n-E4B-it-GGUF",
"ollama_tag": "gemma3n:e4b",
"good_for": "Efficient on-device design with 4B-class speed.",
"stale": false,
"gated": true,
"downloads_30d": 17622,
"context_len": 32768,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 7.35,
"source_repo": "unsloth/gemma-3n-E4B-it-GGUF",
"filename": "gemma-3n-E4B-it-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 6.27,
"source_repo": "unsloth/gemma-3n-E4B-it-GGUF",
"filename": "gemma-3n-E4B-it-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 5.02,
"source_repo": "unsloth/gemma-3n-E4B-it-GGUF",
"filename": "gemma-3n-E4B-it-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 4.54,
"source_repo": "unsloth/gemma-3n-E4B-it-GGUF",
"filename": "gemma-3n-E4B-it-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 4.27,
"source_repo": "unsloth/gemma-3n-E4B-it-GGUF",
"filename": "gemma-3n-E4B-it-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 3.69,
"source_repo": "unsloth/gemma-3n-E4B-it-GGUF",
"filename": "gemma-3n-E4B-it-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 3.19,
"source_repo": "unsloth/gemma-3n-E4B-it-GGUF",
"filename": "gemma-3n-E4B-it-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/google/gemma-3n-E4B-it",
"gguf": "https://huggingface.co/unsloth/gemma-3n-E4B-it-GGUF",
"ollama": "https://ollama.com/library/gemma3n"
},
"run": {
"ollama": "ollama run gemma3n:e4b",
"llamacpp": "llama-server -hf unsloth/gemma-3n-E4B-it-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "mistral-7b",
"family": "llm",
"name": "Mistral 7B (v0.3)",
"repo_id": "mistralai/Mistral-7B-Instruct-v0.3",
"params_b": 7.25,
"license": "apache-2.0",
"gguf_repo": "bartowski/Mistral-7B-Instruct-v0.3-GGUF",
"ollama_tag": "mistral:7b",
"good_for": "The classic open 7B. Still a solid, fast all-rounder.",
"stale": false,
"gated": false,
"downloads_30d": 3392572,
"arch": {
"n_layers": 32,
"hidden": 4096,
"n_heads": 32,
"n_kv_heads": 8
},
"context_len": 32768,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 7.7,
"source_repo": "bartowski/Mistral-7B-Instruct-v0.3-GGUF",
"filename": "Mistral-7B-Instruct-v0.3-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 5.95,
"source_repo": "bartowski/Mistral-7B-Instruct-v0.3-GGUF",
"filename": "Mistral-7B-Instruct-v0.3-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 5.14,
"source_repo": "bartowski/Mistral-7B-Instruct-v0.3-GGUF",
"filename": "Mistral-7B-Instruct-v0.3-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 4.37,
"source_repo": "bartowski/Mistral-7B-Instruct-v0.3-GGUF",
"filename": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 3.91,
"source_repo": "bartowski/Mistral-7B-Instruct-v0.3-GGUF",
"filename": "Mistral-7B-Instruct-v0.3-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 3.52,
"source_repo": "bartowski/Mistral-7B-Instruct-v0.3-GGUF",
"filename": "Mistral-7B-Instruct-v0.3-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 2.72,
"source_repo": "bartowski/Mistral-7B-Instruct-v0.3-GGUF",
"filename": "Mistral-7B-Instruct-v0.3-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3",
"gguf": "https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF",
"ollama": "https://ollama.com/library/mistral"
},
"run": {
"ollama": "ollama run mistral:7b",
"llamacpp": "llama-server -hf bartowski/Mistral-7B-Instruct-v0.3-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "mistral-small-3.2",
"family": "llm",
"name": "Mistral Small 3.2 (24B)",
"repo_id": "mistralai/Mistral-Small-3.2-24B-Instruct-2506",
"params_b": 24.01,
"license": "apache-2.0",
"gguf_repo": "unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF",
"ollama_tag": "mistral-small3.2",
"good_for": "Excellent quality-for-size; a favourite on 24 GB cards.",
"stale": false,
"gated": false,
"downloads_30d": 537956,
"arch": {
"n_layers": 40,
"hidden": 5120,
"n_heads": 32,
"n_kv_heads": 8
},
"context_len": 131072,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 25.05,
"source_repo": "unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF",
"filename": "Mistral-Small-3.2-24B-Instruct-2506-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 19.35,
"source_repo": "unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF",
"filename": "Mistral-Small-3.2-24B-Instruct-2506-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 16.76,
"source_repo": "unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF",
"filename": "Mistral-Small-3.2-24B-Instruct-2506-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 14.33,
"source_repo": "unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF",
"filename": "Mistral-Small-3.2-24B-Instruct-2506-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 12.78,
"source_repo": "unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF",
"filename": "Mistral-Small-3.2-24B-Instruct-2506-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 11.47,
"source_repo": "unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF",
"filename": "Mistral-Small-3.2-24B-Instruct-2506-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 8.89,
"source_repo": "unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF",
"filename": "Mistral-Small-3.2-24B-Instruct-2506-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/mistralai/Mistral-Small-3.2-24B-Instruct-2506",
"gguf": "https://huggingface.co/unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF",
"ollama": "https://ollama.com/library/mistral-small3.2"
},
"run": {
"ollama": "ollama run mistral-small3.2",
"llamacpp": "llama-server -hf unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "devstral-small",
"family": "llm",
"name": "Devstral Small (24B, coding)",
"repo_id": "mistralai/Devstral-Small-2507",
"params_b": 23.57,
"license": "apache-2.0",
"gguf_repo": "unsloth/Devstral-Small-2507-GGUF",
"ollama_tag": "devstral",
"good_for": "Built for coding agents and repo-level work.",
"stale": false,
"gated": false,
"downloads_30d": 24427,
"arch": {
"n_layers": 40,
"hidden": 5120,
"n_heads": 32,
"n_kv_heads": 8
},
"context_len": 131072,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 25.05,
"source_repo": "unsloth/Devstral-Small-2507-GGUF",
"filename": "Devstral-Small-2507-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 19.35,
"source_repo": "unsloth/Devstral-Small-2507-GGUF",
"filename": "Devstral-Small-2507-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 16.76,
"source_repo": "unsloth/Devstral-Small-2507-GGUF",
"filename": "Devstral-Small-2507-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 14.33,
"source_repo": "unsloth/Devstral-Small-2507-GGUF",
"filename": "Devstral-Small-2507-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 12.78,
"source_repo": "unsloth/Devstral-Small-2507-GGUF",
"filename": "Devstral-Small-2507-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 11.47,
"source_repo": "unsloth/Devstral-Small-2507-GGUF",
"filename": "Devstral-Small-2507-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 8.89,
"source_repo": "unsloth/Devstral-Small-2507-GGUF",
"filename": "Devstral-Small-2507-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/mistralai/Devstral-Small-2507",
"gguf": "https://huggingface.co/unsloth/Devstral-Small-2507-GGUF",
"ollama": "https://ollama.com/library/devstral"
},
"run": {
"ollama": "ollama run devstral",
"llamacpp": "llama-server -hf unsloth/Devstral-Small-2507-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "phi-4",
"family": "llm",
"name": "Phi-4 (14B)",
"repo_id": "microsoft/phi-4",
"params_b": 14.66,
"license": "mit",
"gguf_repo": "unsloth/phi-4-GGUF",
"ollama_tag": "phi4",
"good_for": "Microsoft's strong 14B, great at reasoning and maths.",
"stale": false,
"gated": false,
"downloads_30d": 809973,
"arch": {
"n_layers": 40,
"hidden": 5120,
"n_heads": 40,
"n_kv_heads": 10
},
"context_len": 16384,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 15.58,
"source_repo": "unsloth/phi-4-GGUF",
"filename": "phi-4-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 12.03,
"source_repo": "unsloth/phi-4-GGUF",
"filename": "phi-4-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 10.41,
"source_repo": "unsloth/phi-4-GGUF",
"filename": "phi-4-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 8.89,
"source_repo": "unsloth/phi-4-GGUF",
"filename": "phi-4-Q4_K_M.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 7.19,
"source_repo": "unsloth/phi-4-GGUF",
"filename": "phi-4-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 5.61,
"source_repo": "unsloth/phi-4-GGUF",
"filename": "phi-4-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/microsoft/phi-4",
"gguf": "https://huggingface.co/unsloth/phi-4-GGUF",
"ollama": "https://ollama.com/library/phi4"
},
"run": {
"ollama": "ollama run phi4",
"llamacpp": "llama-server -hf unsloth/phi-4-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "phi-4-mini",
"family": "llm",
"name": "Phi-4 Mini (3.8B)",
"repo_id": "microsoft/Phi-4-mini-instruct",
"params_b": 3.84,
"license": "mit",
"gguf_repo": "unsloth/Phi-4-mini-instruct-GGUF",
"ollama_tag": "phi4-mini",
"good_for": "Small, MIT-licensed, punchy for its size.",
"stale": false,
"gated": false,
"downloads_30d": 1221436,
"arch": {
"n_layers": 32,
"hidden": 3072,
"n_heads": 24,
"n_kv_heads": 8
},
"context_len": 131072,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 4.08,
"source_repo": "unsloth/Phi-4-mini-instruct-GGUF",
"filename": "Phi-4-mini-instruct.Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 3.16,
"source_repo": "unsloth/Phi-4-mini-instruct-GGUF",
"filename": "Phi-4-mini-instruct-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 2.85,
"source_repo": "unsloth/Phi-4-mini-instruct-GGUF",
"filename": "Phi-4-mini-instruct-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 2.49,
"source_repo": "unsloth/Phi-4-mini-instruct-GGUF",
"filename": "Phi-4-mini-instruct-Q4_K_M.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 2.12,
"source_repo": "unsloth/Phi-4-mini-instruct-GGUF",
"filename": "Phi-4-mini-instruct-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 1.68,
"source_repo": "unsloth/Phi-4-mini-instruct-GGUF",
"filename": "Phi-4-mini-instruct-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/microsoft/Phi-4-mini-instruct",
"gguf": "https://huggingface.co/unsloth/Phi-4-mini-instruct-GGUF",
"ollama": "https://ollama.com/library/phi4-mini"
},
"run": {
"ollama": "ollama run phi4-mini",
"llamacpp": "llama-server -hf unsloth/Phi-4-mini-instruct-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "granite-4.0-1b",
"family": "llm",
"name": "Granite 4.0 1B",
"repo_id": "ibm-granite/granite-4.0-1b",
"params_b": 1.63,
"license": "apache-2.0",
"gguf_repo": "ibm-granite/granite-4.0-1b-GGUF",
"good_for": "IBM's tiny enterprise-grade model; official GGUFs.",
"stale": false,
"gated": false,
"downloads_30d": 4653,
"arch": {
"n_layers": 40,
"hidden": 2048,
"n_heads": 16,
"n_kv_heads": 4
},
"context_len": 131072,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 1.74,
"source_repo": "ibm-granite/granite-4.0-1b-GGUF",
"filename": "granite-4.0-1b-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 1.34,
"source_repo": "ibm-granite/granite-4.0-1b-GGUF",
"filename": "granite-4.0-1b-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 1.18,
"source_repo": "ibm-granite/granite-4.0-1b-GGUF",
"filename": "granite-4.0-1b-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 1.02,
"source_repo": "ibm-granite/granite-4.0-1b-GGUF",
"filename": "granite-4.0-1b-Q4_K_M.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 0.86,
"source_repo": "ibm-granite/granite-4.0-1b-GGUF",
"filename": "granite-4.0-1b-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 0.7,
"source_repo": "ibm-granite/granite-4.0-1b-GGUF",
"filename": "granite-4.0-1b-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/ibm-granite/granite-4.0-1b",
"gguf": "https://huggingface.co/ibm-granite/granite-4.0-1b-GGUF"
},
"run": {
"ollama": "ollama run hf.co/ibm-granite/granite-4.0-1b-GGUF:Q4_K_M",
"llamacpp": "llama-server -hf ibm-granite/granite-4.0-1b-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "granite-4.0-micro",
"family": "llm",
"name": "Granite 4.0 Micro (3B)",
"repo_id": "ibm-granite/granite-4.0-micro",
"params_b": 3.4,
"license": "apache-2.0",
"gguf_repo": "ibm-granite/granite-4.0-micro-GGUF",
"ollama_tag": "granite4:micro",
"good_for": "Grounded, RAG-friendly small model from IBM.",
"stale": false,
"gated": false,
"downloads_30d": 129738,
"arch": {
"n_layers": 40,
"hidden": 2560,
"n_heads": 40,
"n_kv_heads": 8
},
"context_len": 131072,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 3.62,
"source_repo": "ibm-granite/granite-4.0-micro-GGUF",
"filename": "granite-4.0-micro-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 2.8,
"source_repo": "ibm-granite/granite-4.0-micro-GGUF",
"filename": "granite-4.0-micro-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 2.44,
"source_repo": "ibm-granite/granite-4.0-micro-GGUF",
"filename": "granite-4.0-micro-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 2.1,
"source_repo": "ibm-granite/granite-4.0-micro-GGUF",
"filename": "granite-4.0-micro-Q4_K_M.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 1.73,
"source_repo": "ibm-granite/granite-4.0-micro-GGUF",
"filename": "granite-4.0-micro-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 1.37,
"source_repo": "ibm-granite/granite-4.0-micro-GGUF",
"filename": "granite-4.0-micro-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/ibm-granite/granite-4.0-micro",
"gguf": "https://huggingface.co/ibm-granite/granite-4.0-micro-GGUF",
"ollama": "https://ollama.com/library/granite4"
},
"run": {
"ollama": "ollama run granite4:micro",
"llamacpp": "llama-server -hf ibm-granite/granite-4.0-micro-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "granite-4.0-h-tiny",
"family": "llm",
"name": "Granite 4.0 H-Tiny (7B-A1B MoE)",
"repo_id": "ibm-granite/granite-4.0-h-tiny",
"params_b": 6.94,
"active_params_b": 1.0,
"license": "apache-2.0",
"gguf_repo": "ibm-granite/granite-4.0-h-tiny-GGUF",
"ollama_tag": "granite4:tiny-h",
"good_for": "Hybrid design: 7B knowledge, 1B-speed replies.",
"stale": false,
"gated": false,
"downloads_30d": 93730,
"arch": {
"n_layers": 40,
"hidden": 1536,
"n_heads": 12,
"n_kv_heads": 4
},
"context_len": 1048576,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 7.39,
"source_repo": "ibm-granite/granite-4.0-h-tiny-GGUF",
"filename": "granite-4.0-h-tiny-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 5.71,
"source_repo": "ibm-granite/granite-4.0-h-tiny-GGUF",
"filename": "granite-4.0-h-tiny-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 4.95,
"source_repo": "ibm-granite/granite-4.0-h-tiny-GGUF",
"filename": "granite-4.0-h-tiny-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 4.23,
"source_repo": "ibm-granite/granite-4.0-h-tiny-GGUF",
"filename": "granite-4.0-h-tiny-Q4_K_M.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 3.35,
"source_repo": "ibm-granite/granite-4.0-h-tiny-GGUF",
"filename": "granite-4.0-h-tiny-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 2.59,
"source_repo": "ibm-granite/granite-4.0-h-tiny-GGUF",
"filename": "granite-4.0-h-tiny-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/ibm-granite/granite-4.0-h-tiny",
"gguf": "https://huggingface.co/ibm-granite/granite-4.0-h-tiny-GGUF",
"ollama": "https://ollama.com/library/granite4"
},
"run": {
"ollama": "ollama run granite4:tiny-h",
"llamacpp": "llama-server -hf ibm-granite/granite-4.0-h-tiny-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "granite-4.0-h-small",
"family": "llm",
"name": "Granite 4.0 H-Small (32B-A9B MoE)",
"repo_id": "ibm-granite/granite-4.0-h-small",
"params_b": 32.21,
"active_params_b": 9.0,
"license": "apache-2.0",
"gguf_repo": "ibm-granite/granite-4.0-h-small-GGUF",
"ollama_tag": "granite4:small-h",
"good_for": "Big hybrid that stays responsive thanks to 9B active.",
"stale": false,
"gated": false,
"downloads_30d": 569199,
"arch": {
"n_layers": 40,
"hidden": 4096,
"n_heads": 32,
"n_kv_heads": 8
},
"context_len": 1048576,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 34.26,
"source_repo": "ibm-granite/granite-4.0-h-small-GGUF",
"filename": "granite-4.0-h-small-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 26.47,
"source_repo": "ibm-granite/granite-4.0-h-small-GGUF",
"filename": "granite-4.0-h-small-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 22.87,
"source_repo": "ibm-granite/granite-4.0-h-small-GGUF",
"filename": "granite-4.0-h-small-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 19.48,
"source_repo": "ibm-granite/granite-4.0-h-small-GGUF",
"filename": "granite-4.0-h-small-Q4_K_M.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 15.36,
"source_repo": "ibm-granite/granite-4.0-h-small-GGUF",
"filename": "granite-4.0-h-small-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 11.78,
"source_repo": "ibm-granite/granite-4.0-h-small-GGUF",
"filename": "granite-4.0-h-small-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/ibm-granite/granite-4.0-h-small",
"gguf": "https://huggingface.co/ibm-granite/granite-4.0-h-small-GGUF",
"ollama": "https://ollama.com/library/granite4"
},
"run": {
"ollama": "ollama run granite4:small-h",
"llamacpp": "llama-server -hf ibm-granite/granite-4.0-h-small-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "smollm3-3b",
"family": "llm",
"name": "SmolLM3 3B",
"repo_id": "HuggingFaceTB/SmolLM3-3B",
"params_b": 3.08,
"license": "apache-2.0",
"gguf_repo": "ggml-org/SmolLM3-3B-GGUF",
"good_for": "Hugging Face's own fully-open small model, trained in public.",
"stale": false,
"gated": false,
"downloads_30d": 519434,
"arch": {
"n_layers": 36,
"hidden": 2048,
"n_heads": 16,
"n_kv_heads": 4
},
"context_len": 65536,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 3.28,
"source_repo": "ggml-org/SmolLM3-3B-GGUF",
"filename": "SmolLM3-Q8_0.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 1.92,
"source_repo": "ggml-org/SmolLM3-3B-GGUF",
"filename": "SmolLM3-Q4_K_M.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/HuggingFaceTB/SmolLM3-3B",
"gguf": "https://huggingface.co/ggml-org/SmolLM3-3B-GGUF"
},
"run": {
"ollama": "ollama run hf.co/ggml-org/SmolLM3-3B-GGUF:Q4_K_M",
"llamacpp": "llama-server -hf ggml-org/SmolLM3-3B-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "minicpm-4.1-8b",
"family": "llm",
"name": "MiniCPM 4.1 8B",
"repo_id": "openbmb/MiniCPM4.1-8B",
"params_b": 8.19,
"license": "apache-2.0",
"gguf_repo": "openbmb/MiniCPM4.1-8B-GGUF",
"good_for": "Tuned hard for running on ordinary devices; official GGUFs.",
"stale": false,
"gated": false,
"downloads_30d": 50634,
"arch": {
"n_layers": 32,
"hidden": 4096,
"n_heads": 32,
"n_kv_heads": 2
},
"context_len": 65536,
"quants": [
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 4.97,
"source_repo": "openbmb/MiniCPM4.1-8B-GGUF",
"filename": "MiniCPM4.1-8B-Q4_K_M.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/openbmb/MiniCPM4.1-8B",
"gguf": "https://huggingface.co/openbmb/MiniCPM4.1-8B-GGUF"
},
"run": {
"ollama": "ollama run hf.co/openbmb/MiniCPM4.1-8B-GGUF:Q4_K_M",
"llamacpp": "llama-server -hf openbmb/MiniCPM4.1-8B-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "nemotron-nano-9b",
"family": "llm",
"name": "NVIDIA Nemotron Nano 9B v2",
"repo_id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2",
"params_b": 8.89,
"license": "other",
"gguf_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF",
"good_for": "Hybrid Mamba design: strong reasoning with fast, steady output.",
"stale": false,
"gated": false,
"downloads_30d": 545070,
"arch": {
"n_layers": 56,
"hidden": 4480,
"n_heads": 40,
"n_kv_heads": 8
},
"context_len": 1048576,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 9.46,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-9B-v2-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 9.14,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-9B-v2-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 7.07,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-9B-v2-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 6.53,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-9B-v2-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 5.27,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-9B-v2-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 5.38,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-9B-v2-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 5.01,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-9B-v2-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2",
"gguf": "https://huggingface.co/bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF"
},
"run": {
"ollama": "ollama run hf.co/bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF:Q4_K_M",
"llamacpp": "llama-server -hf bartowski/nvidia_NVIDIA-Nemotron-Nano-9B-v2-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "nemotron-nano-12b",
"family": "llm",
"name": "NVIDIA Nemotron Nano 12B v2",
"repo_id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2",
"params_b": 12.31,
"license": "other",
"gguf_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-12B-v2-GGUF",
"good_for": "The bigger Nemotron Nano; reasoning-first hybrid.",
"stale": false,
"gated": false,
"downloads_30d": 15998,
"arch": {
"n_layers": 62,
"hidden": 5120,
"n_heads": 40,
"n_kv_heads": 8
},
"context_len": 1048576,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 13.09,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-12B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-12B-v2-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 10.11,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-12B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-12B-v2-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 8.76,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-12B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-12B-v2-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 7.49,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-12B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-12B-v2-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 6.75,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-12B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-12B-v2-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 6.02,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-12B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-12B-v2-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 4.7,
"source_repo": "bartowski/nvidia_NVIDIA-Nemotron-Nano-12B-v2-GGUF",
"filename": "nvidia_NVIDIA-Nemotron-Nano-12B-v2-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-12B-v2",
"gguf": "https://huggingface.co/bartowski/nvidia_NVIDIA-Nemotron-Nano-12B-v2-GGUF"
},
"run": {
"ollama": "ollama run hf.co/bartowski/nvidia_NVIDIA-Nemotron-Nano-12B-v2-GGUF:Q4_K_M",
"llamacpp": "llama-server -hf bartowski/nvidia_NVIDIA-Nemotron-Nano-12B-v2-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-vl-2b",
"family": "vlm",
"name": "Qwen3-VL 2B",
"repo_id": "Qwen/Qwen3-VL-2B-Instruct",
"params_b": 2.13,
"license": "apache-2.0",
"gguf_repo": "unsloth/Qwen3-VL-2B-Instruct-GGUF",
"ollama_tag": "qwen3-vl:2b",
"good_for": "Chat about images on very light hardware.",
"mem_note": "Vision models load an extra image encoder (~0.5 GB on top of the figures shown).",
"stale": false,
"gated": false,
"downloads_30d": 1944930,
"arch": {
"n_layers": 28,
"hidden": 2048,
"n_heads": 16,
"n_kv_heads": 8
},
"context_len": 262144,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 1.83,
"source_repo": "unsloth/Qwen3-VL-2B-Instruct-GGUF",
"filename": "Qwen3-VL-2B-Instruct-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 1.42,
"source_repo": "unsloth/Qwen3-VL-2B-Instruct-GGUF",
"filename": "Qwen3-VL-2B-Instruct-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 1.26,
"source_repo": "unsloth/Qwen3-VL-2B-Instruct-GGUF",
"filename": "Qwen3-VL-2B-Instruct-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 1.11,
"source_repo": "unsloth/Qwen3-VL-2B-Instruct-GGUF",
"filename": "Qwen3-VL-2B-Instruct-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 1.01,
"source_repo": "unsloth/Qwen3-VL-2B-Instruct-GGUF",
"filename": "Qwen3-VL-2B-Instruct-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 0.94,
"source_repo": "unsloth/Qwen3-VL-2B-Instruct-GGUF",
"filename": "Qwen3-VL-2B-Instruct-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 0.78,
"source_repo": "unsloth/Qwen3-VL-2B-Instruct-GGUF",
"filename": "Qwen3-VL-2B-Instruct-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-VL-2B-Instruct",
"gguf": "https://huggingface.co/unsloth/Qwen3-VL-2B-Instruct-GGUF",
"ollama": "https://ollama.com/library/qwen3-vl"
},
"run": {
"ollama": "ollama run qwen3-vl:2b",
"llamacpp": "llama-server -hf unsloth/Qwen3-VL-2B-Instruct-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-vl-4b",
"family": "vlm",
"name": "Qwen3-VL 4B",
"repo_id": "Qwen/Qwen3-VL-4B-Instruct",
"params_b": 4.44,
"license": "apache-2.0",
"gguf_repo": "unsloth/Qwen3-VL-4B-Instruct-GGUF",
"ollama_tag": "qwen3-vl:4b",
"good_for": "The sweet spot for asking questions about images and screenshots.",
"mem_note": "Vision models load an extra image encoder (~0.5 GB on top of the figures shown).",
"stale": false,
"gated": false,
"downloads_30d": 3769866,
"arch": {
"n_layers": 36,
"hidden": 2560,
"n_heads": 32,
"n_kv_heads": 8
},
"context_len": 262144,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 4.28,
"source_repo": "unsloth/Qwen3-VL-4B-Instruct-GGUF",
"filename": "Qwen3-VL-4B-Instruct-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 3.31,
"source_repo": "unsloth/Qwen3-VL-4B-Instruct-GGUF",
"filename": "Qwen3-VL-4B-Instruct-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 2.89,
"source_repo": "unsloth/Qwen3-VL-4B-Instruct-GGUF",
"filename": "Qwen3-VL-4B-Instruct-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 2.5,
"source_repo": "unsloth/Qwen3-VL-4B-Instruct-GGUF",
"filename": "Qwen3-VL-4B-Instruct-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 2.27,
"source_repo": "unsloth/Qwen3-VL-4B-Instruct-GGUF",
"filename": "Qwen3-VL-4B-Instruct-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 2.08,
"source_repo": "unsloth/Qwen3-VL-4B-Instruct-GGUF",
"filename": "Qwen3-VL-4B-Instruct-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 1.67,
"source_repo": "unsloth/Qwen3-VL-4B-Instruct-GGUF",
"filename": "Qwen3-VL-4B-Instruct-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-VL-4B-Instruct",
"gguf": "https://huggingface.co/unsloth/Qwen3-VL-4B-Instruct-GGUF",
"ollama": "https://ollama.com/library/qwen3-vl"
},
"run": {
"ollama": "ollama run qwen3-vl:4b",
"llamacpp": "llama-server -hf unsloth/Qwen3-VL-4B-Instruct-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-vl-8b",
"family": "vlm",
"name": "Qwen3-VL 8B",
"repo_id": "Qwen/Qwen3-VL-8B-Instruct",
"params_b": 8.77,
"license": "apache-2.0",
"gguf_repo": "unsloth/Qwen3-VL-8B-Instruct-GGUF",
"ollama_tag": "qwen3-vl:8b",
"good_for": "Strong image understanding: documents, charts, photos.",
"mem_note": "Vision models load an extra image encoder (~0.5 GB on top of the figures shown).",
"stale": false,
"gated": false,
"downloads_30d": 7690985,
"arch": {
"n_layers": 36,
"hidden": 4096,
"n_heads": 32,
"n_kv_heads": 8
},
"context_len": 262144,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 8.71,
"source_repo": "unsloth/Qwen3-VL-8B-Instruct-GGUF",
"filename": "Qwen3-VL-8B-Instruct-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 6.73,
"source_repo": "unsloth/Qwen3-VL-8B-Instruct-GGUF",
"filename": "Qwen3-VL-8B-Instruct-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 5.85,
"source_repo": "unsloth/Qwen3-VL-8B-Instruct-GGUF",
"filename": "Qwen3-VL-8B-Instruct-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 5.03,
"source_repo": "unsloth/Qwen3-VL-8B-Instruct-GGUF",
"filename": "Qwen3-VL-8B-Instruct-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 4.58,
"source_repo": "unsloth/Qwen3-VL-8B-Instruct-GGUF",
"filename": "Qwen3-VL-8B-Instruct-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 4.12,
"source_repo": "unsloth/Qwen3-VL-8B-Instruct-GGUF",
"filename": "Qwen3-VL-8B-Instruct-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 3.28,
"source_repo": "unsloth/Qwen3-VL-8B-Instruct-GGUF",
"filename": "Qwen3-VL-8B-Instruct-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-VL-8B-Instruct",
"gguf": "https://huggingface.co/unsloth/Qwen3-VL-8B-Instruct-GGUF",
"ollama": "https://ollama.com/library/qwen3-vl"
},
"run": {
"ollama": "ollama run qwen3-vl:8b",
"llamacpp": "llama-server -hf unsloth/Qwen3-VL-8B-Instruct-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-vl-30b-a3b",
"family": "vlm",
"name": "Qwen3-VL 30B-A3B (MoE)",
"repo_id": "Qwen/Qwen3-VL-30B-A3B-Instruct",
"params_b": 31.07,
"active_params_b": 3.0,
"license": "apache-2.0",
"gguf_repo": "unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF",
"ollama_tag": "qwen3-vl:30b",
"good_for": "Top-tier local vision-language quality, fast thanks to 3B active.",
"mem_note": "Vision models load an extra image encoder (~0.5 GB on top of the figures shown).",
"stale": false,
"gated": false,
"downloads_30d": 783446,
"arch": {
"n_layers": 48,
"hidden": 2048,
"n_heads": 32,
"n_kv_heads": 4
},
"context_len": 262144,
"quants": [
{
"key": "Q8_0",
"plain": "Near-full (8-bit)",
"file_gb": 32.48,
"source_repo": "unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-VL-30B-A3B-Instruct-Q8_0.gguf"
},
{
"key": "Q6_K",
"plain": "High (6-bit)",
"file_gb": 25.09,
"source_repo": "unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-VL-30B-A3B-Instruct-Q6_K.gguf"
},
{
"key": "Q5_K_M",
"plain": "Balanced+ (5-bit)",
"file_gb": 21.73,
"source_repo": "unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-VL-30B-A3B-Instruct-Q5_K_M.gguf"
},
{
"key": "Q4_K_M",
"plain": "Balanced (4-bit)",
"file_gb": 18.56,
"source_repo": "unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-VL-30B-A3B-Instruct-Q4_K_M.gguf"
},
{
"key": "IQ4_XS",
"plain": "Compact (4-bit)",
"file_gb": 16.38,
"source_repo": "unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-VL-30B-A3B-Instruct-IQ4_XS.gguf"
},
{
"key": "Q3_K_M",
"plain": "Compact (3-bit)",
"file_gb": 14.71,
"source_repo": "unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-VL-30B-A3B-Instruct-Q3_K_M.gguf"
},
{
"key": "Q2_K",
"plain": "Tiny (2-bit)",
"file_gb": 11.26,
"source_repo": "unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF",
"filename": "Qwen3-VL-30B-A3B-Instruct-Q2_K.gguf"
}
],
"provenance": "filesize",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-VL-30B-A3B-Instruct",
"gguf": "https://huggingface.co/unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF",
"ollama": "https://ollama.com/library/qwen3-vl"
},
"run": {
"ollama": "ollama run qwen3-vl:30b",
"llamacpp": "llama-server -hf unsloth/Qwen3-VL-30B-A3B-Instruct-GGUF:Q4_K_M"
},
"last_verified": "2026-06-09"
},
{
"key": "smolvlm2-2.2b",
"family": "vlm",
"name": "SmolVLM2 2.2B",
"repo_id": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
"params_b": 2.25,
"license": "apache-2.0",
"mem_gb": 6.0,
"mem_provenance": "estimated",
"run_pip": "pip install transformers torch",
"good_for": "Small open video+image understanding; also a common base for robot-policy finetunes.",
"stale": false,
"gated": false,
"downloads_30d": 272382,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/HuggingFaceTB/SmolVLM2-2.2B-Instruct"
},
"run": {
"pip": "pip install transformers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "moondream2",
"family": "vlm",
"name": "Moondream2",
"repo_id": "vikhyatk/moondream2",
"params_b": 1.93,
"license": "apache-2.0",
"ollama_tag": "moondream",
"mem_gb": 4.5,
"mem_provenance": "estimated",
"run_pip": "pip install moondream",
"good_for": "Tiny image-question model that runs almost anywhere.",
"stale": false,
"gated": false,
"downloads_30d": 2074069,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/vikhyatk/moondream2",
"ollama": "https://ollama.com/library/moondream"
},
"run": {
"ollama": "ollama run moondream",
"pip": "pip install moondream"
},
"last_verified": "2026-06-09"
},
{
"key": "yolo26-n",
"family": "vision",
"use_cases": [
"detect",
"classify",
"pose",
"train-vision"
],
"name": "YOLO26 Nano",
"repo_id": "Ultralytics/YOLO26",
"params_b": 0.0026,
"license": "agpl-3.0",
"license_note": "AGPL-3.0: free for open-source use; commercial products need an Ultralytics license.",
"mem_gb": 1.0,
"mem_provenance": "estimated",
"run_pip": "pip install ultralytics",
"good_for": "Real-time detection even on weak hardware or a webcam.",
"docs": "https://docs.ultralytics.com/models/yolo26",
"stale": false,
"gated": false,
"downloads_30d": 7012,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/Ultralytics/YOLO26",
"docs": "https://docs.ultralytics.com/models/yolo26"
},
"run": {
"pip": "pip install ultralytics"
},
"last_verified": "2026-06-09"
},
{
"key": "yolo26-s",
"family": "vision",
"use_cases": [
"detect",
"classify",
"pose",
"train-vision"
],
"name": "YOLO26 Small",
"repo_id": "Ultralytics/YOLO26",
"params_b": 0.0096,
"license": "agpl-3.0",
"license_note": "AGPL-3.0: free for open-source use; commercial products need an Ultralytics license.",
"mem_gb": 1.2,
"mem_provenance": "estimated",
"run_pip": "pip install ultralytics",
"good_for": "Fast and clearly more accurate than Nano.",
"docs": "https://docs.ultralytics.com/models/yolo26",
"stale": false,
"gated": false,
"downloads_30d": 7012,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/Ultralytics/YOLO26",
"docs": "https://docs.ultralytics.com/models/yolo26"
},
"run": {
"pip": "pip install ultralytics"
},
"last_verified": "2026-06-09"
},
{
"key": "yolo26-m",
"family": "vision",
"use_cases": [
"detect",
"classify",
"pose",
"train-vision"
],
"name": "YOLO26 Medium",
"repo_id": "Ultralytics/YOLO26",
"params_b": 0.0202,
"license": "agpl-3.0",
"license_note": "AGPL-3.0: free for open-source use; commercial products need an Ultralytics license.",
"mem_gb": 1.6,
"mem_provenance": "estimated",
"run_pip": "pip install ultralytics",
"good_for": "Balanced accuracy and speed for real work.",
"docs": "https://docs.ultralytics.com/models/yolo26",
"stale": false,
"gated": false,
"downloads_30d": 7012,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/Ultralytics/YOLO26",
"docs": "https://docs.ultralytics.com/models/yolo26"
},
"run": {
"pip": "pip install ultralytics"
},
"last_verified": "2026-06-09"
},
{
"key": "yolo26-l",
"family": "vision",
"use_cases": [
"detect",
"classify",
"pose",
"train-vision"
],
"name": "YOLO26 Large",
"repo_id": "Ultralytics/YOLO26",
"params_b": 0.0337,
"license": "agpl-3.0",
"license_note": "AGPL-3.0: free for open-source use; commercial products need an Ultralytics license.",
"mem_gb": 2.0,
"mem_provenance": "estimated",
"run_pip": "pip install ultralytics",
"good_for": "High accuracy when you have a real GPU.",
"docs": "https://docs.ultralytics.com/models/yolo26",
"stale": false,
"gated": false,
"downloads_30d": 7012,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/Ultralytics/YOLO26",
"docs": "https://docs.ultralytics.com/models/yolo26"
},
"run": {
"pip": "pip install ultralytics"
},
"last_verified": "2026-06-09"
},
{
"key": "yolo26-x",
"family": "vision",
"use_cases": [
"detect",
"classify",
"pose",
"train-vision"
],
"name": "YOLO26 X-Large",
"repo_id": "Ultralytics/YOLO26",
"params_b": 0.0569,
"license": "agpl-3.0",
"license_note": "AGPL-3.0: free for open-source use; commercial products need an Ultralytics license.",
"mem_gb": 2.5,
"mem_provenance": "estimated",
"run_pip": "pip install ultralytics",
"good_for": "Highest accuracy in the family, slowest.",
"docs": "https://docs.ultralytics.com/models/yolo26",
"stale": false,
"gated": false,
"downloads_30d": 7012,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/Ultralytics/YOLO26",
"docs": "https://docs.ultralytics.com/models/yolo26"
},
"run": {
"pip": "pip install ultralytics"
},
"last_verified": "2026-06-09"
},
{
"key": "sam-2.1-tiny",
"family": "vision",
"use_cases": [
"segment"
],
"name": "SAM 2.1 Tiny",
"repo_id": "facebook/sam2.1-hiera-tiny",
"params_b": 0.04,
"license": "apache-2.0",
"mem_gb": 1.5,
"mem_provenance": "community",
"run_pip": "pip install sam2",
"good_for": "Click-to-segment anything in images; light and quick.",
"stale": false,
"gated": false,
"downloads_30d": 30268,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/facebook/sam2.1-hiera-tiny"
},
"run": {
"pip": "pip install sam2"
},
"last_verified": "2026-06-09"
},
{
"key": "sam-2.1-small",
"family": "vision",
"use_cases": [
"segment"
],
"name": "SAM 2.1 Small",
"repo_id": "facebook/sam2.1-hiera-small",
"params_b": 0.05,
"license": "apache-2.0",
"mem_gb": 1.8,
"mem_provenance": "community",
"run_pip": "pip install sam2",
"good_for": "Solid segmentation quality, still light.",
"stale": false,
"gated": false,
"downloads_30d": 12700,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/facebook/sam2.1-hiera-small"
},
"run": {
"pip": "pip install sam2"
},
"last_verified": "2026-06-09"
},
{
"key": "sam-2.1-large",
"family": "vision",
"use_cases": [
"segment"
],
"name": "SAM 2.1 Large",
"repo_id": "facebook/sam2.1-hiera-large",
"params_b": 0.22,
"license": "apache-2.0",
"mem_gb": 3.0,
"mem_provenance": "community",
"mem_note": "Image segmentation is light; tracking through video needs much more (community reports: 12 GB+).",
"run_pip": "pip install sam2",
"good_for": "Best SAM quality for images and video object tracking.",
"stale": false,
"gated": false,
"downloads_30d": 70422,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/facebook/sam2.1-hiera-large"
},
"run": {
"pip": "pip install sam2"
},
"last_verified": "2026-06-09"
},
{
"key": "sam-3",
"family": "vision",
"use_cases": [
"segment"
],
"name": "SAM 3",
"repo_id": "facebook/sam3",
"params_b": 0.86,
"license": "other",
"mem_gb": 4.0,
"mem_provenance": "community",
"run_pip": "pip install transformers torch",
"good_for": "Segment things by describing them in words, not just clicks.",
"stale": false,
"gated": true,
"downloads_30d": 1879279,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/facebook/sam3"
},
"run": {
"pip": "pip install transformers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "da3-small",
"family": "vision",
"use_cases": [
"depth"
],
"name": "Depth Anything 3 Small",
"repo_id": "depth-anything/DA3-SMALL",
"params_b": 0.03,
"license": "apache-2.0",
"mem_gb": 1.2,
"mem_provenance": "estimated",
"run_pip": "pip install transformers torch",
"good_for": "Depth maps from any photo, fast.",
"stale": false,
"gated": false,
"downloads_30d": 23161,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/depth-anything/DA3-SMALL"
},
"run": {
"pip": "pip install transformers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "da3-base",
"family": "vision",
"use_cases": [
"depth"
],
"name": "Depth Anything 3 Base",
"repo_id": "depth-anything/DA3-BASE",
"params_b": 0.14,
"license": "apache-2.0",
"mem_gb": 1.8,
"mem_provenance": "estimated",
"run_pip": "pip install transformers torch",
"good_for": "Sharper depth, still light.",
"stale": false,
"gated": false,
"downloads_30d": 50537,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/depth-anything/DA3-BASE"
},
"run": {
"pip": "pip install transformers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "da3-large",
"family": "vision",
"use_cases": [
"depth"
],
"name": "Depth Anything 3 Large",
"repo_id": "depth-anything/DA3-LARGE",
"params_b": 0.41,
"license": "cc-by-nc-4.0",
"license_note": "Non-commercial licence.",
"mem_gb": 3.0,
"mem_provenance": "estimated",
"run_pip": "pip install transformers torch",
"good_for": "Best depth quality; research / personal use only.",
"stale": false,
"gated": false,
"downloads_30d": 135024,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/depth-anything/DA3-LARGE"
},
"run": {
"pip": "pip install transformers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "foundationpose",
"family": "vision",
"use_cases": [
"pose"
],
"name": "FoundationPose (6-DoF)",
"repo_id": "NVlabs/FoundationPose",
"repo_kind": "github",
"params_b": 0.3,
"license": "nvidia-source-code",
"license_note": "Research-only licence; weights are downloaded from links in the GitHub README, not Hugging Face.",
"mem_gb": 8.0,
"mem_provenance": "community",
"mem_note": "Community reports it running on a 12 GB RTX 3060. Needs CUDA and a custom build: advanced setup.",
"docs": "https://github.com/NVlabs/FoundationPose",
"good_for": "Full 6-DoF object pose (position + rotation) for robotics. The serious option.",
"stale": false,
"links": {
"home": "https://github.com/NVlabs/FoundationPose",
"docs": "https://github.com/NVlabs/FoundationPose"
},
"provenance": "community",
"last_verified": "2026-06-09"
},
{
"key": "paddleocr",
"family": "vision",
"use_cases": [
"ocr"
],
"name": "PaddleOCR (PP-OCRv5)",
"repo_id": "PaddlePaddle/PP-OCRv5_server_rec",
"params_b": 0.05,
"license": "apache-2.0",
"mem_gb": 1.0,
"mem_provenance": "community",
"run_pip": "pip install paddleocr",
"good_for": "The classic text-from-images toolkit. Works fine on CPU.",
"stale": false,
"gated": false,
"downloads_30d": 196930,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/PaddlePaddle/PP-OCRv5_server_rec"
},
"run": {
"pip": "pip install paddleocr"
},
"last_verified": "2026-06-09"
},
{
"key": "deepseek-ocr",
"family": "vision",
"use_cases": [
"ocr"
],
"name": "DeepSeek-OCR",
"repo_id": "deepseek-ai/DeepSeek-OCR",
"params_b": 3.34,
"license": "mit",
"mem_gb": 8.0,
"mem_provenance": "estimated",
"ollama_tag": "deepseek-ocr",
"run_pip": "pip install transformers torch",
"good_for": "Turns whole documents into clean text/markdown, layout and all.",
"stale": false,
"gated": false,
"downloads_30d": 2404442,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/deepseek-ai/DeepSeek-OCR",
"ollama": "https://ollama.com/library/deepseek-ocr"
},
"run": {
"ollama": "ollama run deepseek-ocr",
"pip": "pip install transformers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "sdxl",
"family": "imagegen",
"use_cases": [
"imagegen",
"inpaint"
],
"name": "Stable Diffusion XL",
"repo_id": "stabilityai/stable-diffusion-xl-base-1.0",
"params_b": 3.5,
"license": "openrail++",
"mem_gb": 8.0,
"mem_provenance": "community",
"run_pip": "pip install diffusers torch",
"good_for": "Excellent 1024px images with a huge community of styles and add-ons.",
"stale": false,
"gated": false,
"downloads_30d": 1436868,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"
},
"run": {
"pip": "pip install diffusers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "sd-3.5-medium",
"family": "imagegen",
"use_cases": [
"imagegen",
"inpaint"
],
"name": "Stable Diffusion 3.5 Medium",
"repo_id": "stabilityai/stable-diffusion-3.5-medium",
"params_b": 2.5,
"license": "other",
"license_note": "Free for individuals and businesses under $1M revenue.",
"mem_gb": 9.9,
"mem_provenance": "vendor",
"mem_note": "9.9 GB is Stability's own published figure (excluding text encoders).",
"run_pip": "pip install diffusers torch",
"good_for": "Modern image quality tuned to run on consumer cards.",
"stale": false,
"gated": true,
"downloads_30d": 126738,
"provenance": "vendor",
"links": {
"hf": "https://huggingface.co/stabilityai/stable-diffusion-3.5-medium"
},
"run": {
"pip": "pip install diffusers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "sd-3.5-large",
"family": "imagegen",
"use_cases": [
"imagegen",
"inpaint"
],
"name": "Stable Diffusion 3.5 Large",
"repo_id": "stabilityai/stable-diffusion-3.5-large",
"params_b": 8.0,
"license": "other",
"license_note": "Free for individuals and businesses under $1M revenue.",
"mem_gb": 24.0,
"mem_provenance": "vendor",
"mem_note": "About 24 GB at full quality (vendor figure); quantized builds run in roughly half that.",
"run_pip": "pip install diffusers torch",
"good_for": "Stability's top open image model.",
"stale": false,
"gated": true,
"downloads_30d": 22257,
"provenance": "vendor",
"links": {
"hf": "https://huggingface.co/stabilityai/stable-diffusion-3.5-large"
},
"run": {
"pip": "pip install diffusers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "flux1-schnell",
"family": "imagegen",
"use_cases": [
"imagegen"
],
"name": "FLUX.1 Schnell",
"repo_id": "black-forest-labs/FLUX.1-schnell",
"params_b": 12.0,
"license": "apache-2.0",
"mem_gb": 16.0,
"mem_provenance": "community",
"mem_note": "Roughly 16 GB at 8-bit, 24 GB at full quality. Community-reported figures.",
"run_pip": "pip install diffusers torch",
"good_for": "Near-top image quality in few steps, and Apache-licensed.",
"stale": false,
"gated": true,
"downloads_30d": 337454,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/black-forest-labs/FLUX.1-schnell"
},
"run": {
"pip": "pip install diffusers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "flux1-dev",
"family": "imagegen",
"use_cases": [
"imagegen",
"inpaint"
],
"name": "FLUX.1 Dev",
"repo_id": "black-forest-labs/FLUX.1-dev",
"params_b": 12.0,
"license": "other",
"license_note": "Non-commercial licence; gated (accept terms on Hugging Face).",
"mem_gb": 16.0,
"mem_provenance": "community",
"mem_note": "Roughly 16 GB at 8-bit, 24 GB at full quality. Community-reported figures.",
"run_pip": "pip install diffusers torch",
"good_for": "State-of-the-art open image quality (for non-commercial work).",
"stale": false,
"gated": true,
"downloads_30d": 984011,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/black-forest-labs/FLUX.1-dev"
},
"run": {
"pip": "pip install diffusers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "flux2-dev",
"family": "imagegen",
"use_cases": [
"imagegen"
],
"name": "FLUX.2 Dev (32B)",
"repo_id": "black-forest-labs/FLUX.2-dev",
"params_b": 32.0,
"license": "other",
"license_note": "Non-commercial licence; gated.",
"mem_gb": 90.0,
"mem_provenance": "community",
"mem_note": "Around 90 GB at full quality; 4-bit with offloading is reported to fit a 24 GB card, slowly.",
"run_pip": "pip install diffusers torch",
"good_for": "The frontier of open image generation. Honest answer: workstation hardware.",
"stale": false,
"gated": true,
"downloads_30d": 309301,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/black-forest-labs/FLUX.2-dev"
},
"run": {
"pip": "pip install diffusers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "real-esrgan",
"family": "imagegen",
"use_cases": [
"upscale"
],
"name": "Real-ESRGAN",
"repo_id": "ai-forever/Real-ESRGAN",
"params_b": 0.017,
"license": "bsd-3-clause",
"mem_gb": 2.0,
"mem_provenance": "community",
"run_pip": "pip install realesrgan",
"good_for": "The standard for upscaling and restoring photos.",
"stale": false,
"gated": false,
"downloads_30d": 0,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/ai-forever/Real-ESRGAN"
},
"run": {
"pip": "pip install realesrgan"
},
"last_verified": "2026-06-09"
},
{
"key": "birefnet",
"family": "imagegen",
"use_cases": [
"bgremove"
],
"name": "BiRefNet",
"repo_id": "ZhengPeng7/BiRefNet",
"params_b": 0.22,
"license": "mit",
"mem_gb": 3.0,
"mem_provenance": "community",
"run_pip": "pip install transformers torch",
"good_for": "Crisp background removal, MIT-licensed.",
"stale": false,
"gated": false,
"downloads_30d": 721429,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/ZhengPeng7/BiRefNet"
},
"run": {
"pip": "pip install transformers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "rmbg-2.0",
"family": "imagegen",
"use_cases": [
"bgremove"
],
"name": "RMBG 2.0",
"repo_id": "briaai/RMBG-2.0",
"params_b": 0.22,
"license": "other",
"license_note": "Free for non-commercial use only.",
"mem_gb": 3.0,
"mem_provenance": "community",
"run_pip": "pip install transformers torch",
"good_for": "Very strong background removal (check the licence for products).",
"stale": false,
"gated": true,
"downloads_30d": 620472,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/briaai/RMBG-2.0"
},
"run": {
"pip": "pip install transformers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "ltx-video",
"family": "imagegen",
"use_cases": [
"videogen"
],
"name": "LTX-Video",
"repo_id": "Lightricks/LTX-Video",
"params_b": 2.0,
"license": "other",
"mem_gb": 12.0,
"mem_provenance": "community",
"run_pip": "pip install diffusers torch",
"good_for": "Real-time-class local video generation on consumer cards.",
"stale": false,
"gated": false,
"downloads_30d": 446216,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/Lightricks/LTX-Video"
},
"run": {
"pip": "pip install diffusers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "wan-2.2-ti2v-5b",
"family": "imagegen",
"use_cases": [
"videogen"
],
"name": "Wan 2.2 TI2V 5B",
"repo_id": "Wan-AI/Wan2.2-TI2V-5B",
"params_b": 5.0,
"license": "apache-2.0",
"mem_gb": 16.0,
"mem_provenance": "community",
"run_pip": "pip install diffusers torch",
"good_for": "Strong open text/image-to-video that fits prosumer cards.",
"stale": false,
"gated": false,
"downloads_30d": 9456,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/Wan-AI/Wan2.2-TI2V-5B"
},
"run": {
"pip": "pip install diffusers torch"
},
"last_verified": "2026-06-09"
},
{
"key": "whisper-base",
"family": "audio",
"use_cases": [
"stt"
],
"name": "Whisper Base",
"repo_id": "openai/whisper-base",
"params_b": 0.07,
"license": "apache-2.0",
"mem_gb": 1.0,
"mem_provenance": "vendor",
"mem_note": "OpenAI's own published VRAM figure.",
"run_pip": "pip install faster-whisper",
"good_for": "Quick rough transcription on any machine.",
"stale": false,
"gated": false,
"downloads_30d": 4308029,
"provenance": "vendor",
"links": {
"hf": "https://huggingface.co/openai/whisper-base"
},
"run": {
"pip": "pip install faster-whisper"
},
"last_verified": "2026-06-09"
},
{
"key": "whisper-small",
"family": "audio",
"use_cases": [
"stt"
],
"name": "Whisper Small",
"repo_id": "openai/whisper-small",
"params_b": 0.24,
"license": "apache-2.0",
"mem_gb": 2.0,
"mem_provenance": "vendor",
"mem_note": "OpenAI's own published VRAM figure.",
"run_pip": "pip install faster-whisper",
"good_for": "Good accuracy on clear audio, still fast.",
"stale": false,
"gated": false,
"downloads_30d": 2315618,
"provenance": "vendor",
"links": {
"hf": "https://huggingface.co/openai/whisper-small"
},
"run": {
"pip": "pip install faster-whisper"
},
"last_verified": "2026-06-09"
},
{
"key": "whisper-large-v3-turbo",
"family": "audio",
"use_cases": [
"stt"
],
"name": "Whisper Large v3 Turbo",
"repo_id": "openai/whisper-large-v3-turbo",
"params_b": 0.81,
"license": "mit",
"mem_gb": 6.0,
"mem_provenance": "vendor",
"mem_note": "OpenAI's own published VRAM figure. faster-whisper in 8-bit roughly halves it.",
"run_pip": "pip install faster-whisper",
"good_for": "Near-best accuracy at several times the speed of Large.",
"stale": false,
"gated": false,
"downloads_30d": 7948390,
"provenance": "vendor",
"links": {
"hf": "https://huggingface.co/openai/whisper-large-v3-turbo"
},
"run": {
"pip": "pip install faster-whisper"
},
"last_verified": "2026-06-09"
},
{
"key": "whisper-large-v3",
"family": "audio",
"use_cases": [
"stt"
],
"name": "Whisper Large v3",
"repo_id": "openai/whisper-large-v3",
"params_b": 1.54,
"license": "apache-2.0",
"mem_gb": 10.0,
"mem_provenance": "vendor",
"mem_note": "OpenAI's own published VRAM figure. faster-whisper in 8-bit roughly halves it.",
"run_pip": "pip install faster-whisper",
"good_for": "The best open transcription accuracy, any language.",
"stale": false,
"gated": false,
"downloads_30d": 5054098,
"provenance": "vendor",
"links": {
"hf": "https://huggingface.co/openai/whisper-large-v3"
},
"run": {
"pip": "pip install faster-whisper"
},
"last_verified": "2026-06-09"
},
{
"key": "kokoro-82m",
"family": "audio",
"use_cases": [
"tts"
],
"name": "Kokoro TTS (82M)",
"repo_id": "hexgrad/Kokoro-82M",
"params_b": 0.082,
"license": "apache-2.0",
"mem_gb": 1.5,
"mem_provenance": "community",
"run_pip": "pip install kokoro",
"good_for": "Shockingly good small voice model; runs even on CPU.",
"stale": false,
"gated": false,
"downloads_30d": 13198222,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/hexgrad/Kokoro-82M"
},
"run": {
"pip": "pip install kokoro"
},
"last_verified": "2026-06-09"
},
{
"key": "chatterbox",
"family": "audio",
"use_cases": [
"tts"
],
"name": "Chatterbox TTS",
"repo_id": "ResembleAI/chatterbox",
"params_b": 0.5,
"license": "mit",
"mem_gb": 4.0,
"mem_provenance": "community",
"run_pip": "pip install chatterbox-tts",
"good_for": "Production-grade voice cloning, MIT-licensed.",
"stale": false,
"gated": false,
"downloads_30d": 1694198,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/ResembleAI/chatterbox"
},
"run": {
"pip": "pip install chatterbox-tts"
},
"last_verified": "2026-06-09"
},
{
"key": "f5-tts",
"family": "audio",
"use_cases": [
"tts"
],
"name": "F5-TTS",
"repo_id": "SWivid/F5-TTS",
"params_b": 0.3,
"license": "cc-by-nc-4.0",
"license_note": "Non-commercial licence.",
"mem_gb": 8.0,
"mem_provenance": "community",
"run_pip": "pip install f5-tts",
"good_for": "High-quality voice cloning for personal projects.",
"stale": false,
"gated": false,
"downloads_30d": 646717,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/SWivid/F5-TTS"
},
"run": {
"pip": "pip install f5-tts"
},
"last_verified": "2026-06-09"
},
{
"key": "ace-step",
"family": "audio",
"use_cases": [
"music"
],
"name": "ACE-Step (3.5B)",
"repo_id": "ACE-Step/ACE-Step-v1-3.5B",
"params_b": 3.5,
"license": "apache-2.0",
"mem_gb": 10.0,
"mem_provenance": "community",
"run_pip": "pip install acestep",
"good_for": "The best permissively-licensed local music generator.",
"stale": false,
"gated": false,
"downloads_30d": 0,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/ACE-Step/ACE-Step-v1-3.5B"
},
"run": {
"pip": "pip install acestep"
},
"last_verified": "2026-06-09"
},
{
"key": "musicgen-small",
"family": "audio",
"use_cases": [
"music"
],
"name": "MusicGen Small",
"repo_id": "facebook/musicgen-small",
"params_b": 0.59,
"license": "cc-by-nc-4.0",
"license_note": "Non-commercial licence.",
"mem_gb": 6.0,
"mem_provenance": "community",
"run_pip": "pip install audiocraft",
"good_for": "Quick music sketches on modest hardware.",
"stale": false,
"gated": false,
"downloads_30d": 186078,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/facebook/musicgen-small"
},
"run": {
"pip": "pip install audiocraft"
},
"last_verified": "2026-06-09"
},
{
"key": "musicgen-medium",
"family": "audio",
"use_cases": [
"music"
],
"name": "MusicGen Medium",
"repo_id": "facebook/musicgen-medium",
"params_b": 1.5,
"license": "cc-by-nc-4.0",
"license_note": "Non-commercial licence.",
"mem_gb": 16.0,
"mem_provenance": "vendor",
"mem_note": "Meta's docs say a GPU with at least 16 GB for this size.",
"run_pip": "pip install audiocraft",
"good_for": "Meta's well-known music model (non-commercial).",
"stale": false,
"gated": false,
"downloads_30d": 1443963,
"provenance": "vendor",
"links": {
"hf": "https://huggingface.co/facebook/musicgen-medium"
},
"run": {
"pip": "pip install audiocraft"
},
"last_verified": "2026-06-09"
},
{
"key": "all-minilm-l6-v2",
"family": "embed",
"use_cases": [
"embed"
],
"name": "all-MiniLM-L6-v2",
"repo_id": "sentence-transformers/all-MiniLM-L6-v2",
"params_b": 0.02,
"license": "apache-2.0",
"mem_gb": 0.5,
"mem_provenance": "estimated",
"ollama_tag": "all-minilm",
"run_pip": "pip install sentence-transformers",
"good_for": "The classic tiny embedder. Search your notes on any machine.",
"stale": false,
"gated": false,
"downloads_30d": 230826064,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
"ollama": "https://ollama.com/library/all-minilm"
},
"run": {
"ollama": "ollama run all-minilm",
"pip": "pip install sentence-transformers"
},
"last_verified": "2026-06-09"
},
{
"key": "nomic-embed-text-v1.5",
"family": "embed",
"use_cases": [
"embed"
],
"name": "Nomic Embed Text v1.5",
"repo_id": "nomic-ai/nomic-embed-text-v1.5",
"params_b": 0.14,
"license": "apache-2.0",
"mem_gb": 1.0,
"mem_provenance": "estimated",
"ollama_tag": "nomic-embed-text",
"run_pip": "pip install sentence-transformers",
"good_for": "Strong long-document search with adjustable size.",
"stale": false,
"gated": false,
"downloads_30d": 17630308,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
"ollama": "https://ollama.com/library/nomic-embed-text"
},
"run": {
"ollama": "ollama run nomic-embed-text",
"pip": "pip install sentence-transformers"
},
"last_verified": "2026-06-09"
},
{
"key": "embeddinggemma-300m",
"family": "embed",
"use_cases": [
"embed"
],
"name": "EmbeddingGemma 300M",
"repo_id": "google/embeddinggemma-300m",
"params_b": 0.3,
"license": "gemma",
"mem_gb": 1.2,
"mem_provenance": "estimated",
"ollama_tag": "embeddinggemma",
"run_pip": "pip install sentence-transformers",
"good_for": "Google's on-device embedder, great multilingual search.",
"stale": false,
"gated": true,
"downloads_30d": 1701766,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/google/embeddinggemma-300m",
"ollama": "https://ollama.com/library/embeddinggemma"
},
"run": {
"ollama": "ollama run embeddinggemma",
"pip": "pip install sentence-transformers"
},
"last_verified": "2026-06-09"
},
{
"key": "bge-m3",
"family": "embed",
"use_cases": [
"embed"
],
"name": "BGE-M3",
"repo_id": "BAAI/bge-m3",
"params_b": 0.57,
"license": "mit",
"mem_gb": 2.5,
"mem_provenance": "estimated",
"ollama_tag": "bge-m3",
"run_pip": "pip install sentence-transformers",
"good_for": "Heavyweight multilingual search quality.",
"stale": false,
"gated": false,
"downloads_30d": 29227796,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/BAAI/bge-m3",
"ollama": "https://ollama.com/library/bge-m3"
},
"run": {
"ollama": "ollama run bge-m3",
"pip": "pip install sentence-transformers"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-embedding-0.6b",
"family": "embed",
"use_cases": [
"embed"
],
"name": "Qwen3 Embedding 0.6B",
"repo_id": "Qwen/Qwen3-Embedding-0.6B",
"params_b": 0.6,
"license": "apache-2.0",
"mem_gb": 1.5,
"mem_provenance": "estimated",
"ollama_tag": "qwen3-embedding",
"run_pip": "pip install sentence-transformers",
"good_for": "Modern top-ranked small embedder.",
"stale": false,
"gated": false,
"downloads_30d": 8593433,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B",
"ollama": "https://ollama.com/library/qwen3-embedding"
},
"run": {
"ollama": "ollama run qwen3-embedding",
"pip": "pip install sentence-transformers"
},
"last_verified": "2026-06-09"
},
{
"key": "qwen3-embedding-4b",
"family": "embed",
"use_cases": [
"embed"
],
"name": "Qwen3 Embedding 4B",
"repo_id": "Qwen/Qwen3-Embedding-4B",
"params_b": 4.02,
"license": "apache-2.0",
"mem_gb": 6.0,
"mem_provenance": "estimated",
"run_pip": "pip install sentence-transformers",
"good_for": "Best-in-class search relevance if you have the memory.",
"stale": false,
"gated": false,
"downloads_30d": 2360699,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/Qwen/Qwen3-Embedding-4B"
},
"run": {
"pip": "pip install sentence-transformers"
},
"last_verified": "2026-06-09"
},
{
"key": "chronos-bolt-small",
"family": "data",
"use_cases": [
"forecast"
],
"name": "Chronos-Bolt Small",
"repo_id": "amazon/chronos-bolt-small",
"params_b": 0.05,
"license": "apache-2.0",
"mem_gb": 0.5,
"mem_provenance": "estimated",
"run_pip": "pip install chronos-forecasting",
"good_for": "Zero-shot time-series forecasting; runs on CPU.",
"stale": false,
"gated": false,
"downloads_30d": 1461124,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/amazon/chronos-bolt-small"
},
"run": {
"pip": "pip install chronos-forecasting"
},
"last_verified": "2026-06-09"
},
{
"key": "chronos-bolt-base",
"family": "data",
"use_cases": [
"forecast"
],
"name": "Chronos-Bolt Base",
"repo_id": "amazon/chronos-bolt-base",
"params_b": 0.21,
"license": "apache-2.0",
"mem_gb": 1.0,
"mem_provenance": "estimated",
"run_pip": "pip install chronos-forecasting",
"good_for": "Stronger forecasts, still laptop-friendly.",
"stale": false,
"gated": false,
"downloads_30d": 1253619,
"provenance": "estimated",
"links": {
"hf": "https://huggingface.co/amazon/chronos-bolt-base"
},
"run": {
"pip": "pip install chronos-forecasting"
},
"last_verified": "2026-06-09"
},
{
"key": "tabpfn-v2",
"family": "data",
"use_cases": [
"tabular"
],
"name": "TabPFN v2",
"repo_id": "Prior-Labs/TabPFN-v2-clf",
"params_b": 0.011,
"license": "other",
"mem_gb": 2.0,
"mem_provenance": "community",
"run_pip": "pip install tabpfn",
"good_for": "Spreadsheet predictions (classification) that beat tuned baselines on small data.",
"stale": false,
"gated": false,
"downloads_30d": 34302,
"provenance": "community",
"links": {
"hf": "https://huggingface.co/Prior-Labs/TabPFN-v2-clf"
},
"run": {
"pip": "pip install tabpfn"
},
"last_verified": "2026-06-09"
}
]
}