from typing import Optional, List import time import cohere from settings import ( COHERE_API_KEY, COHERE_API_URL, COHERE_MODEL_PRIMARY, COHERE_EMBED_MODEL, MODEL_SETTINGS, USE_OPEN_FALLBACKS ) # Optional open-model fallback (only used if USE_OPEN_FALLBACKS=True) try: from local_llm import LocalLLM _HAS_LOCAL = True except Exception: _HAS_LOCAL = False _client: Optional[cohere.Client] = None def _co_client() -> Optional[cohere.Client]: global _client if _client is not None: return _client if not COHERE_API_KEY: return None # NOTE: The Cohere Python SDK auto-selects API base; you can pass a custom base if provided. if COHERE_API_URL: _client = cohere.Client(api_key=COHERE_API_KEY, base_url=COHERE_API_URL, timeout=MODEL_SETTINGS.get("timeout_s", 45)) else: _client = cohere.Client(api_key=COHERE_API_KEY, timeout=MODEL_SETTINGS.get("timeout_s", 45)) return _client def _retry(fn, attempts=3, backoff=0.8): last = None for i in range(attempts): try: return fn() except Exception as e: last = e time.sleep(backoff * (2 ** i)) raise last if last else RuntimeError("Unknown error") def cohere_chat(prompt: str) -> Optional[str]: cli = _co_client() if not cli: return None def _call(): resp = cli.chat( model=COHERE_MODEL_PRIMARY, message=prompt, temperature=MODEL_SETTINGS["temperature"], max_tokens=MODEL_SETTINGS["max_new_tokens"], ) # SDK shape may provide .text, .reply, or generations if hasattr(resp, "text") and resp.text: return resp.text if hasattr(resp, "reply") and resp.reply: return resp.reply if hasattr(resp, "generations") and resp.generations: return resp.generations[0].text return None try: return _retry(_call, attempts=3) except Exception: return None def open_fallback_chat(prompt: str) -> Optional[str]: if not USE_OPEN_FALLBACKS or not _HAS_LOCAL: return None try: return LocalLLM().chat(prompt) except Exception: return None def cohere_embed(texts: List[str]) -> List[List[float]]: cli = _co_client() if not cli or not texts: return [] def _call(): resp = cli.embed(texts=texts, model=COHERE_EMBED_MODEL) # Newer SDK: resp.embeddings; older: resp.data return getattr(resp, "embeddings", None) or getattr(resp, "data", []) or [] try: return _retry(_call, attempts=3) except Exception: return [] def generate_narrative(scenario_text: str, structured_sections_md: str, rag_snippets: List[str]) -> str: grounding = "\n\n".join([f"[RAG {i+1}]\n{t}" for i, t in enumerate(rag_snippets or [])]) prompt = f"""You are a Canadian healthcare operations copilot. Follow the scenario's requested deliverables exactly. Use the structured computations provided (already calculated deterministically) and the RAG snippets for grounding. # Scenario {scenario_text} # Deterministic Results (already computed) {structured_sections_md} # Grounding (Canadian sources, snippets) {grounding} Write a concise, decision-ready report tailored to provincial operations leaders. Do not invent numbers. If data are missing, say so clearly. """ out = cohere_chat(prompt) if out: return out out = open_fallback_chat(prompt) if out: return out return "Unable to generate narrative at this time."