| | |
| | """MedGenesis – **Gemini** (Google Generative AI) async helper. |
| | |
| | Key behaviours |
| | ~~~~~~~~~~~~~~ |
| | * Tries the fast **`gemini-1.5-flash`** model first → falls back to |
| | **`gemini-pro`** when flash unavailable or quota‑exceeded. |
| | * Exponential back‑off retry (2×, 4×) for transient 5xx/429. |
| | * Singleton model cache to avoid re‑instantiation cost. |
| | * Returns **empty string** on irrecoverable errors so orchestrator can |
| | gracefully pivot to OpenAI. |
| | """ |
| | from __future__ import annotations |
| |
|
| | import os, asyncio, functools |
| | from typing import Dict |
| |
|
| | import google.generativeai as genai |
| | from google.api_core import exceptions as gexc |
| |
|
| | _API_KEY = os.getenv("GEMINI_KEY") |
| | if not _API_KEY: |
| | raise RuntimeError("GEMINI_KEY env variable missing – set it in HF Secrets") |
| |
|
| | genai.configure(api_key=_API_KEY) |
| |
|
| | |
| | |
| | |
| | @functools.lru_cache(maxsize=4) |
| | def _get_model(name: str): |
| | return genai.GenerativeModel(name) |
| |
|
| |
|
| | async def _generate(prompt: str, model_name: str, *, temperature: float = 0.3, retries: int = 3) -> str: |
| | """Run generation inside a ThreadPool – Gemini SDK is blocking.""" |
| | delay = 2 |
| | for _ in range(retries): |
| | try: |
| | resp = await asyncio.to_thread( |
| | _get_model(model_name).generate_content, |
| | prompt, |
| | generation_config={"temperature": temperature}, |
| | ) |
| | return resp.text.strip() |
| | except (gexc.ResourceExhausted, gexc.ServiceUnavailable): |
| | await asyncio.sleep(delay) |
| | delay *= 2 |
| | except (gexc.NotFound, gexc.PermissionDenied): |
| | return "" |
| | return "" |
| |
|
| | |
| | |
| | |
| | async def gemini_summarize(text: str, *, words: int = 150) -> str: |
| | prompt = f"Summarize in ≤{words} words:\n\n{text[:12000]}" |
| | out = await _generate(prompt, "gemini-1.5-flash") |
| | if not out: |
| | out = await _generate(prompt, "gemini-pro") |
| | return out |
| |
|
| | async def gemini_qa(question: str, *, context: str = "") -> str: |
| | prompt = ( |
| | "You are an advanced biomedical research agent. Use the context to answer concisely.\n\n" |
| | f"Context:\n{context[:10000]}\n\nQ: {question}\nA:" |
| | ) |
| | out = await _generate(prompt, "gemini-1.5-flash") |
| | if not out: |
| | out = await _generate(prompt, "gemini-pro") |
| | return out or "Gemini could not answer (model/key unavailable)." |
| |
|