JacobLinCool's picture
deploy: sync GitHub main de5dbf9
13fe947 verified
from __future__ import annotations
from typing import Any
MODEL_STACK = [
{
"role": "LLM brain",
"model": "openbmb/MiniCPM5-1B",
"adapter_repo": "build-small-hackathon/hackathon-advisor-minicpm5-lora",
"params_b": 1.08,
"status": "deployed adapter target",
"runtime": "ZeroGPU + transformers + PEFT",
},
{
"role": "Embedding retriever",
"model": "ggml-org/embeddinggemma-300m-qat-q8_0-GGUF",
"params_b": 0.30,
"status": "deployed",
"runtime": "Modal-built llama.cpp GGUF index + runtime llama.cpp query embeddings",
},
{
"role": "Voice input",
"model": "nvidia/nemotron-speech-streaming-en-0.6b",
"params_b": 0.60,
"status": "deployed",
"runtime": "ZeroGPU + NVIDIA NeMo ASR",
},
]
BADGE_LEDGER = [
{
"name": "Off the Grid",
"status": "ready",
"evidence": "Runtime uses checked-in project vectors and local llama.cpp query embeddings; no proprietary inference API.",
},
{
"name": "Off-Brand",
"status": "ready",
"evidence": "Custom gr.Server frontend renders the agent as The Unwritten Almanac.",
},
{
"name": "Sharing is Caring",
"status": "ready",
"evidence": "Real Codex session logs are published as a redacted Hugging Face dataset with source hashes and a reusable publisher script.",
},
{
"name": "Field Notes",
"status": "ready",
"evidence": "Field Notes markdown export is generated from exact session state.",
},
{
"name": "Tiny Titan",
"status": "eligible",
"evidence": "Documented stack stays under 4B parameters; largest model is MiniCPM5-1B.",
},
{
"name": "Well-Tuned",
"status": "ready",
"evidence": "MiniCPM5 LoRA adapter target is published to the Hub and loaded by the ZeroGPU Transformers runtime.",
},
{
"name": "Llama Champion",
"status": "ready",
"evidence": "Retrieval uses an EmbeddingGemma GGUF index built by llama.cpp on Modal and query embeddings computed through llama.cpp at runtime.",
},
]
TRAINING_ARTIFACTS = [
{
"name": "MiniCPM5 LoRA SFT dataset",
"status": "export-ready",
"endpoint": "lora_dataset",
"format": "chat-jsonl",
"base_model": "openbmb/MiniCPM5-1B",
},
{
"name": "MiniCPM5 LoRA training kit",
"status": "published-recipe",
"endpoint": "/api/lora-training-kit.zip",
"format": "zip",
"base_model": "openbmb/MiniCPM5-1B",
"adapter_repo": "build-small-hackathon/hackathon-advisor-minicpm5-lora",
}
]
def prize_ledger(
runtime: dict[str, Any],
index_metadata: dict[str, Any] | None = None,
voice_metadata: dict[str, Any] | None = None,
) -> dict[str, Any]:
total_params = round(sum(float(item["params_b"]) for item in MODEL_STACK), 2)
largest = max(MODEL_STACK, key=lambda item: float(item["params_b"]))
return {
"runtime": runtime,
"retrieval_index": index_metadata or {},
"voice": voice_metadata or {},
"model_stack": MODEL_STACK,
"total_params_b": total_params,
"largest_model": {
"model": largest["model"],
"params_b": largest["params_b"],
},
"tiny_titan_limit_b": 4.0,
"tiny_titan_eligible": total_params <= 4.0 and float(largest["params_b"]) <= 4.0,
"badges": BADGE_LEDGER,
"training_artifacts": TRAINING_ARTIFACTS,
}