Spaces:
Running
Running
| """Plain-English risk narrative via MiniCPM5-1B on llama.cpp. | |
| Earns the Llama Champion badge, keeps the app fully offline (Off the Grid), and uses | |
| OpenBMB's model (aligns with the OpenBMB special-category prize). GGUF artifacts: | |
| repo openbmb/MiniCPM5-1B-GGUF | |
| files MiniCPM5-1B-Q4_K_M.gguf (657 MB, recommended) | -Q8_0 (1.1 GB) | -F16 (2.1 GB) | |
| For offline use, download once then pass `gguf_path` to load from disk and bundle it. | |
| Sampling (no-think): temperature 0.7, top_p 0.95. See skill: minicpm5-deploy-llama-cpp. | |
| """ | |
| from __future__ import annotations | |
| PROMPT_TEMPLATE = """You are a project controls analyst writing for a client report. | |
| Given the forecast below, write a concise, factual 3-4 sentence risk commentary. | |
| Do not invent numbers. Be direct about schedule and cost risk. | |
| Forecast: | |
| - Percent complete: {pct_complete:.0%} | |
| - Cost performance index (CPI): {cpi:.2f}; Schedule performance index (SPI): {spi:.2f} | |
| - Projected finish: period {finish:.0f} (baseline {planned}) -> slippage {slippage:+.0f} periods | |
| - Projected final cost (EAC): {eac:,.0f} vs budget {bac:,.0f} ({overrun:+.0%}) | |
| - Probability of cost overrun above 10%: {p_overrun:.0%} | |
| Risk commentary:""" | |
| GGUF_REPO = "openbmb/MiniCPM5-1B-GGUF" | |
| GGUF_FILE = "MiniCPM5-1B-Q4_K_M.gguf" # 657 MB; swap for -Q8_0 / -F16 for fidelity | |
| _NUM_KEYS = ("pct_complete", "cpi", "spi", "finish", "slippage", "eac", "bac", "overrun", "p_overrun", "planned") | |
| def _coerce(summary: dict) -> dict: | |
| """Numeric fields can arrive as strings over the JSON/API boundary; float them so the f-string | |
| format specs (`:.2f`, `:%`) and the `<` comparisons below don't crash.""" | |
| out = dict(summary) | |
| for k in _NUM_KEYS: | |
| if k in out: | |
| try: | |
| out[k] = float(out[k]) | |
| except (TypeError, ValueError): | |
| pass | |
| return out | |
| def fallback_narrative(s: dict) -> str: | |
| """Deterministic, no-LLM narrative from the forecast summary (used when llama.cpp / | |
| the MiniCPM5 GGUF is unavailable, e.g. on a fresh checkout).""" | |
| s = _coerce(s) | |
| sched = "behind" if s["spi"] < 0.97 else "ahead of" if s["spi"] > 1.03 else "on" | |
| cost = "over" if s["cpi"] < 0.97 else "under" if s["cpi"] > 1.03 else "on" | |
| slip = s["slippage"] | |
| return ( | |
| f"At {s['pct_complete']:.0%} complete, the project is {sched} schedule " | |
| f"(SPI {s['spi']:.2f}) and {cost} budget (CPI {s['cpi']:.2f}). The forecast projects " | |
| f"completion around period {s['finish']:.0f} versus a baseline of {s['planned']} " | |
| f"({slip:+.0f} periods), and a final cost near {s['eac']:,.0f} against a " | |
| f"{s['bac']:,.0f} budget ({s['overrun']:+.0%}). Estimated chance of a cost overrun " | |
| f"beyond 10%: {s['p_overrun']:.0%}." | |
| ) | |
| _llm = None | |
| def load_llm(gguf_path: str | None = None, repo: str = GGUF_REPO, | |
| filename: str = GGUF_FILE, n_ctx: int = 8192): | |
| """Load (and cache) MiniCPM5-1B. Order of preference, all offline-friendly: | |
| 1. an explicit `gguf_path`; | |
| 2. the **distilled** GGUF under ./models - reuses the single Llama instance the agent | |
| already loaded (no second download, no extra memory); | |
| 3. otherwise download the base MiniCPM5-1B GGUF from the Hub once and cache it.""" | |
| global _llm | |
| if _llm is None: | |
| if gguf_path is None: # reuse the local distilled agent model if present | |
| try: | |
| from . import local_llm | |
| if local_llm.is_available(): | |
| g, t = local_llm.ensure_local() # download the distilled GGUF if needed | |
| _llm, _ = local_llm.load(g, t, n_ctx=n_ctx) | |
| return _llm | |
| except Exception: | |
| pass | |
| from llama_cpp import Llama | |
| if gguf_path: | |
| _llm = Llama(model_path=gguf_path, n_ctx=n_ctx, verbose=False) | |
| else: | |
| _llm = Llama.from_pretrained(repo_id=repo, filename=filename, n_ctx=n_ctx, verbose=False) | |
| return _llm | |
| def generate_narrative(summary: dict, max_tokens: int = 220, temperature: float = 0.7, | |
| top_p: float = 0.95, **load_kw) -> str: | |
| """`summary` must contain the keys referenced in PROMPT_TEMPLATE. Uses MiniCPM5 | |
| no-think sampling defaults (temp 0.7, top_p 0.95).""" | |
| llm = load_llm(**load_kw) | |
| prompt = PROMPT_TEMPLATE.format(**_coerce(summary)) | |
| out = llm.create_chat_completion( | |
| messages=[{"role": "user", "content": prompt}], | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ) | |
| return out["choices"][0]["message"]["content"].strip() | |