"""Plain-English risk narrative via MiniCPM5-1B on llama.cpp. Earns the Llama Champion badge, keeps the app fully offline (Off the Grid), and uses OpenBMB's model (aligns with the OpenBMB special-category prize). GGUF artifacts: repo openbmb/MiniCPM5-1B-GGUF files MiniCPM5-1B-Q4_K_M.gguf (657 MB, recommended) | -Q8_0 (1.1 GB) | -F16 (2.1 GB) For offline use, download once then pass `gguf_path` to load from disk and bundle it. Sampling (no-think): temperature 0.7, top_p 0.95. See skill: minicpm5-deploy-llama-cpp. """ from __future__ import annotations PROMPT_TEMPLATE = """You are a project controls analyst writing for a client report. Given the forecast below, write a concise, factual 3-4 sentence risk commentary. Do not invent numbers. Be direct about schedule and cost risk. Forecast: - Percent complete: {pct_complete:.0%} - Cost performance index (CPI): {cpi:.2f}; Schedule performance index (SPI): {spi:.2f} - Projected finish: period {finish:.0f} (baseline {planned}) -> slippage {slippage:+.0f} periods - Projected final cost (EAC): {eac:,.0f} vs budget {bac:,.0f} ({overrun:+.0%}) - Probability of cost overrun above 10%: {p_overrun:.0%} Risk commentary:""" GGUF_REPO = "openbmb/MiniCPM5-1B-GGUF" GGUF_FILE = "MiniCPM5-1B-Q4_K_M.gguf" # 657 MB; swap for -Q8_0 / -F16 for fidelity _NUM_KEYS = ("pct_complete", "cpi", "spi", "finish", "slippage", "eac", "bac", "overrun", "p_overrun", "planned") def _coerce(summary: dict) -> dict: """Numeric fields can arrive as strings over the JSON/API boundary; float them so the f-string format specs (`:.2f`, `:%`) and the `<` comparisons below don't crash.""" out = dict(summary) for k in _NUM_KEYS: if k in out: try: out[k] = float(out[k]) except (TypeError, ValueError): pass return out def fallback_narrative(s: dict) -> str: """Deterministic, no-LLM narrative from the forecast summary (used when llama.cpp / the MiniCPM5 GGUF is unavailable, e.g. on a fresh checkout).""" s = _coerce(s) sched = "behind" if s["spi"] < 0.97 else "ahead of" if s["spi"] > 1.03 else "on" cost = "over" if s["cpi"] < 0.97 else "under" if s["cpi"] > 1.03 else "on" slip = s["slippage"] return ( f"At {s['pct_complete']:.0%} complete, the project is {sched} schedule " f"(SPI {s['spi']:.2f}) and {cost} budget (CPI {s['cpi']:.2f}). The forecast projects " f"completion around period {s['finish']:.0f} versus a baseline of {s['planned']} " f"({slip:+.0f} periods), and a final cost near {s['eac']:,.0f} against a " f"{s['bac']:,.0f} budget ({s['overrun']:+.0%}). Estimated chance of a cost overrun " f"beyond 10%: {s['p_overrun']:.0%}." ) _llm = None def load_llm(gguf_path: str | None = None, repo: str = GGUF_REPO, filename: str = GGUF_FILE, n_ctx: int = 8192): """Load (and cache) MiniCPM5-1B. Order of preference, all offline-friendly: 1. an explicit `gguf_path`; 2. the **distilled** GGUF under ./models - reuses the single Llama instance the agent already loaded (no second download, no extra memory); 3. otherwise download the base MiniCPM5-1B GGUF from the Hub once and cache it.""" global _llm if _llm is None: if gguf_path is None: # reuse the local distilled agent model if present try: from . import local_llm if local_llm.is_available(): g, t = local_llm.ensure_local() # download the distilled GGUF if needed _llm, _ = local_llm.load(g, t, n_ctx=n_ctx) return _llm except Exception: pass from llama_cpp import Llama if gguf_path: _llm = Llama(model_path=gguf_path, n_ctx=n_ctx, verbose=False) else: _llm = Llama.from_pretrained(repo_id=repo, filename=filename, n_ctx=n_ctx, verbose=False) return _llm def generate_narrative(summary: dict, max_tokens: int = 220, temperature: float = 0.7, top_p: float = 0.95, **load_kw) -> str: """`summary` must contain the keys referenced in PROMPT_TEMPLATE. Uses MiniCPM5 no-think sampling defaults (temp 0.7, top_p 0.95).""" llm = load_llm(**load_kw) prompt = PROMPT_TEMPLATE.format(**_coerce(summary)) out = llm.create_chat_completion( messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens, temperature=temperature, top_p=top_p, ) return out["choices"][0]["message"]["content"].strip()