Spaces:

mgbam
/

MCP_Res

Runtime error

App Files Files Community

MCP_Res / mcp /gemini.py

mgbam

Update mcp/gemini.py

0e47704 verified 9 months ago

raw

history blame contribute delete

2.71 kB

	#!/usr/bin/env python3
	"""MedGenesis – Gemini (Google Generative AI) async helper.

	Key behaviours
	~~~~~~~~~~~~~~
	* Tries the fast `gemini-1.5-flash` model first → falls back to
	`gemini-pro` when flash unavailable or quota‑exceeded.
	* Exponential back‑off retry (2×, 4×) for transient 5xx/429.
	* Singleton model cache to avoid re‑instantiation cost.
	* Returns empty string on irrecoverable errors so orchestrator can
	gracefully pivot to OpenAI.
	"""
	from __future__ import annotations

	import os, asyncio, functools
	from typing import Dict

	import google.generativeai as genai
	from google.api_core import exceptions as gexc

	_API_KEY = os.getenv("GEMINI_KEY")
	if not _API_KEY:
	raise RuntimeError("GEMINI_KEY env variable missing – set it in HF Secrets")

	genai.configure(api_key=_API_KEY)

	# ---------------------------------------------------------------------
	# Model cache
	# ---------------------------------------------------------------------
	@functools.lru_cache(maxsize=4)
	def _get_model(name: str):
	return genai.GenerativeModel(name)


	async def _generate(prompt: str, model_name: str, *, temperature: float = 0.3, retries: int = 3) -> str:
	"""Run generation inside a ThreadPool – Gemini SDK is blocking."""
	delay = 2
	for _ in range(retries):
	try:
	resp = await asyncio.to_thread(
	_get_model(model_name).generate_content,
	prompt,
	generation_config={"temperature": temperature},
	)
	return resp.text.strip()
	except (gexc.ResourceExhausted, gexc.ServiceUnavailable):
	await asyncio.sleep(delay)
	delay *= 2
	except (gexc.NotFound, gexc.PermissionDenied):
	return "" # unrecoverable – model/key unavailable
	return "" # after retries

	# ---------------------------------------------------------------------
	# Public wrappers
	# ---------------------------------------------------------------------
	async def gemini_summarize(text: str, *, words: int = 150) -> str:
	prompt = f"Summarize in ≤{words} words:\n\n{text[:12000]}"
	out = await _generate(prompt, "gemini-1.5-flash")
	if not out:
	out = await _generate(prompt, "gemini-pro")
	return out

	async def gemini_qa(question: str, *, context: str = "") -> str:
	prompt = (
	"You are an advanced biomedical research agent. Use the context to answer concisely.\n\n"
	f"Context:\n{context[:10000]}\n\nQ: {question}\nA:"
	)
	out = await _generate(prompt, "gemini-1.5-flash")
	if not out:
	out = await _generate(prompt, "gemini-pro")
	return out or "Gemini could not answer (model/key unavailable)."