Spaces:

remyxai
/

Feature-Finder

Paused

App Files Files Community

Feature-Finder / llm_api.py

salma-remyx

Replace with MHPD paper recommender (v1.2 backend) to reuse Zero-GPU allocation

f4145b9 verified 13 days ago

raw

history blame contribute delete

3.88 kB

	"""
	llm_api.py — Unified Gemini client shared by team_context and PR-body
	generation paths.

	Used in two places:
	- team_context.extract_team_context: corpus-format summarization of
	GitHub commits. The 2B base hallucinates on this task (tested 2026-
	05-25), so Gemini is the floor for trustworthy extraction.
	- PR body generation when MHPD_GENERATION_BACKEND=gemini (alternate
	to the local 2B path). Tradeoff is faster UX + more Zero-GPU quota
	at the cost of ~$0.001 per request.

	Configured by env vars:
	GEMINI_API_KEY required
	GEMINI_MODEL default gemini-2.0-flash-exp
	"""
	from __future__ import annotations

	import os
	from typing import Optional

	import google.generativeai as genai

	# gemini-3.1-flash-lite chosen empirically (2026-05-25 testing) over
	# gemini-3.5-flash and gemini-2.0-flash-exp:
	# - 3.5-flash: outputs truncated by thinking-mode token consumption;
	# all 800 max_output_tokens spent on internal reasoning, ~0 emitted.
	# - 2.0-flash-exp: deprecated, returns 404.
	# - 3.1-flash-lite: non-thinking variant, fast (~3s for our 800-token
	# prompts), produces well-structured corpus-format output grounded
	# in the provided commits. Quality validated against 4 corpus repos.
	GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-3.1-flash-lite")

	_configured = False
	_model_cache: Optional[genai.GenerativeModel] = None


	def _ensure_configured() -> genai.GenerativeModel:
	global _configured, _model_cache
	if not _configured:
	api_key = os.environ.get("GEMINI_API_KEY")
	if not api_key:
	raise RuntimeError(
	"GEMINI_API_KEY is required. Set it as a Space secret or "
	"export it locally. Alternatively, set MHPD_GENERATION_BACKEND=local "
	"and ensure team_context's heuristic fallback is acceptable."
	)
	genai.configure(api_key=api_key)
	_configured = True
	if _model_cache is None:
	_model_cache = genai.GenerativeModel(GEMINI_MODEL)
	return _model_cache


	def gemini_call(prompt: str, *, max_output_tokens: int = 800,
	temperature: float = 0.2) -> str:
	"""Single-turn generation. Raises on configuration / API failure;
	callers decide whether to retry or fall back."""
	model = _ensure_configured()
	resp = model.generate_content(
	prompt,
	generation_config={
	"temperature": temperature,
	"max_output_tokens": max_output_tokens,
	},
	)
	return (resp.text or "").strip()


	def gemini_generate_pr_body(team_ctx: str, paper_text: str) -> str:
	"""Same task as model_io._generate_pr_body_local but via Gemini Flash.

	Activated by MHPD_GENERATION_BACKEND=gemini. ~$0.0001-$0.001 per call,
	~2-3s latency, fully avoids burning GPU time on free-text generation
	(which the 2B base does but slowly, ~3-6s on GPU)."""
	prompt = (
	"You are a software engineer integrating a research paper into your "
	"team's codebase. Write a concise pull request body proposing the "
	"integration. Be specific and technical — your output will be handed "
	"directly to a coding agent for implementation.\n\n"
	f"Team context:\n{team_ctx[:1500]}\n\n"
	f"Paper:\n{paper_text[:1800]}\n\n"
	"Use this structure exactly:\n\n"
	"## Summary\n(1-2 sentences: what does this PR do?)\n\n"
	"## Motivation\n(why this paper fits the team's direction — reference "
	"specific commits or themes from the team context)\n\n"
	"## Implementation plan\n(3-5 concrete bullet points a coding agent "
	"would follow; include file paths or module names where you can infer them)\n\n"
	"## Open questions\n(1-2 items genuinely worth clarifying before merge)"
	)
	return gemini_call(prompt, max_output_tokens=700, temperature=0.3)