Spaces:
Running on Zero
Running on Zero
File size: 3,596 Bytes
ba32aed 3fe3bd5 ba32aed 3fe3bd5 ba32aed e12a049 ca766b5 ba32aed e12a049 ba32aed 7d1e08d ba32aed 7d1e08d ba32aed e12a049 ba32aed 13fe947 ba32aed 3fe3bd5 ba32aed e12a049 ba32aed 2b2e65d e0cdb73 3fe3bd5 e0cdb73 3fe3bd5 2b2e65d 7d1e08d ba32aed e12a049 7d1e08d ba32aed 2b2e65d ba32aed | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 | from __future__ import annotations
from typing import Any
MODEL_STACK = [
{
"role": "LLM brain",
"model": "openbmb/MiniCPM5-1B",
"adapter_repo": "build-small-hackathon/hackathon-advisor-minicpm5-lora",
"params_b": 1.08,
"status": "deployed adapter target",
"runtime": "ZeroGPU + transformers + PEFT",
},
{
"role": "Embedding retriever",
"model": "ggml-org/embeddinggemma-300m-qat-q8_0-GGUF",
"params_b": 0.30,
"status": "deployed",
"runtime": "Modal-built llama.cpp GGUF index + runtime llama.cpp query embeddings",
},
{
"role": "Voice input",
"model": "nvidia/nemotron-speech-streaming-en-0.6b",
"params_b": 0.60,
"status": "deployed",
"runtime": "ZeroGPU + NVIDIA NeMo ASR",
},
]
BADGE_LEDGER = [
{
"name": "Off the Grid",
"status": "ready",
"evidence": "Runtime uses checked-in project vectors and local llama.cpp query embeddings; no proprietary inference API.",
},
{
"name": "Off-Brand",
"status": "ready",
"evidence": "Custom gr.Server frontend renders the agent as The Unwritten Almanac.",
},
{
"name": "Sharing is Caring",
"status": "ready",
"evidence": "Real Codex session logs are published as a redacted Hugging Face dataset with source hashes and a reusable publisher script.",
},
{
"name": "Field Notes",
"status": "ready",
"evidence": "Field Notes markdown export is generated from exact session state.",
},
{
"name": "Tiny Titan",
"status": "eligible",
"evidence": "Documented stack stays under 4B parameters; largest model is MiniCPM5-1B.",
},
{
"name": "Well-Tuned",
"status": "ready",
"evidence": "MiniCPM5 LoRA adapter target is published to the Hub and loaded by the ZeroGPU Transformers runtime.",
},
{
"name": "Llama Champion",
"status": "ready",
"evidence": "Retrieval uses an EmbeddingGemma GGUF index built by llama.cpp on Modal and query embeddings computed through llama.cpp at runtime.",
},
]
TRAINING_ARTIFACTS = [
{
"name": "MiniCPM5 LoRA SFT dataset",
"status": "export-ready",
"endpoint": "lora_dataset",
"format": "chat-jsonl",
"base_model": "openbmb/MiniCPM5-1B",
},
{
"name": "MiniCPM5 LoRA training kit",
"status": "published-recipe",
"endpoint": "/api/lora-training-kit.zip",
"format": "zip",
"base_model": "openbmb/MiniCPM5-1B",
"adapter_repo": "build-small-hackathon/hackathon-advisor-minicpm5-lora",
}
]
def prize_ledger(
runtime: dict[str, Any],
index_metadata: dict[str, Any] | None = None,
voice_metadata: dict[str, Any] | None = None,
) -> dict[str, Any]:
total_params = round(sum(float(item["params_b"]) for item in MODEL_STACK), 2)
largest = max(MODEL_STACK, key=lambda item: float(item["params_b"]))
return {
"runtime": runtime,
"retrieval_index": index_metadata or {},
"voice": voice_metadata or {},
"model_stack": MODEL_STACK,
"total_params_b": total_params,
"largest_model": {
"model": largest["model"],
"params_b": largest["params_b"],
},
"tiny_titan_limit_b": 4.0,
"tiny_titan_eligible": total_params <= 4.0 and float(largest["params_b"]) <= 4.0,
"badges": BADGE_LEDGER,
"training_artifacts": TRAINING_ARTIFACTS,
}
|