Spaces:

gimjungwook
/

getitdone-api

Running

gimjungwook Claude Opus 4.5 commited on 28 days ago

Commit

e886743

1 Parent(s): 4dc70fb

fix: Gemini 3 호환 - thinking_budget → thinking_level 마이그레이션

- Gemini 2.5 모델 전부 제거, gemini-3-flash-preview만 남김
- MODELS_WITH_THINKING → GEMINI3_MODELS 상수 변경
- _supports_thinking() → _is_gemini3() 메서드 변경
- ThinkingConfig: thinking_budget 제거, include_thoughts=True만 (기본값 HIGH)
- fallback 에러 키워드에 "level" 추가
- google-genai>=1.0.0 → >=1.51.0 (Gemini 3 SDK 지원)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (2) hide show

requirements.txt +1 -1
src/opencode_api/provider/gemini.py +47 -92

requirements.txt CHANGED Viewed

@@ -6,7 +6,7 @@ uvicorn[standard]>=0.27.0
 anthropic>=0.40.0
 openai>=1.50.0
 litellm>=1.50.0
-google-genai>=1.0.0
 # Validation and serialization
 pydantic>=2.6.0

 anthropic>=0.40.0
 openai>=1.50.0
 litellm>=1.50.0
+google-genai>=1.51.0
 # Validation and serialization
 pydantic>=2.6.0

src/opencode_api/provider/gemini.py CHANGED Viewed

@@ -7,82 +7,41 @@ from .provider import BaseProvider, ModelInfo, Message, StreamChunk, ToolCall
 logger = logging.getLogger(__name__)
-MODELS_WITH_THINKING = {
-    "gemini-2.5-pro",
-    "gemini-2.5-flash",
-    "gemini-2.5-flash-lite",
-}
-THINKING_BUDGET_MIN = {
-    "gemini-2.5-pro": 128,
-    "gemini-2.5-flash": 1,
-    "gemini-2.5-flash-lite": 1,
 }
 class GeminiProvider(BaseProvider):
     def __init__(self, api_key: Optional[str] = None):
         self._api_key = api_key or os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")
         self._client = None
     @property
     def id(self) -> str:
         return "gemini"
     @property
     def name(self) -> str:
         return "Google Gemini"
     @property
     def models(self) -> Dict[str, ModelInfo]:
         return {
-            "gemini-2.5-pro": ModelInfo(
-                id="gemini-2.5-pro",
-                name="Gemini 2.5 Pro",
-                provider_id="gemini",
-                context_limit=1048576,
-                output_limit=65536,
-                supports_tools=True,
-                supports_streaming=True,
-                cost_input=1.25,
-                cost_output=10.0,
-            ),
-            "gemini-2.5-flash": ModelInfo(
-                id="gemini-2.5-flash",
-                name="Gemini 2.5 Flash",
                 provider_id="gemini",
                 context_limit=1048576,
                 output_limit=65536,
                 supports_tools=True,
                 supports_streaming=True,
-                cost_input=0.15,
-                cost_output=0.6,
-            ),
-            "gemini-2.5-flash-lite": ModelInfo(
-                id="gemini-2.5-flash-lite",
-                name="Gemini 2.5 Flash Lite",
-                provider_id="gemini",
-                context_limit=1048576,
-                output_limit=65536,
-                supports_tools=True,
-                supports_streaming=True,
-                cost_input=0.075,
-                cost_output=0.3,
-            ),
-            "gemini-2.0-flash": ModelInfo(
-                id="gemini-2.0-flash",
-                name="Gemini 2.0 Flash",
-                provider_id="gemini",
-                context_limit=1048576,
-                output_limit=8192,
-                supports_tools=True,
-                supports_streaming=True,
-                cost_input=0.075,
-                cost_output=0.3,
             ),
         }
     def _get_client(self):
         if self._client is None:
             try:
@@ -91,14 +50,10 @@ class GeminiProvider(BaseProvider):
             except ImportError:
                 raise ImportError("google-genai package is required. Install with: pip install google-genai")
         return self._client
-    def _supports_thinking(self, model_id: str) -> bool:
-        return model_id in MODELS_WITH_THINKING
-    def _get_thinking_budget(self, model_id: str) -> int:
-        min_budget = THINKING_BUDGET_MIN.get(model_id, 128)
-        return max(min_budget, 1024)
     async def stream(
         self,
         model_id: str,
@@ -109,16 +64,16 @@ class GeminiProvider(BaseProvider):
         max_tokens: Optional[int] = None,
     ) -> AsyncGenerator[StreamChunk, None]:
         from google.genai import types
         client = self._get_client()
         contents = []
         print(f"[Gemini DEBUG] Building contents from {len(messages)} messages", flush=True)
         for msg in messages:
             role = "user" if msg.role == "user" else "model"
             content = msg.content
             print(f"[Gemini DEBUG] msg.role={msg.role}, content type={type(content)}, content={repr(content)[:100]}", flush=True)
             if isinstance(content, str) and content:
                 contents.append(types.Content(
                     role=role,
@@ -128,26 +83,26 @@ class GeminiProvider(BaseProvider):
                 parts = [types.Part(text=c.text) for c in content if c.text]
                 if parts:
                     contents.append(types.Content(role=role, parts=parts))
         print(f"[Gemini DEBUG] Built {len(contents)} contents", flush=True)
         config_kwargs: Dict[str, Any] = {}
         if system:
             config_kwargs["system_instruction"] = system
         if temperature is not None:
             config_kwargs["temperature"] = temperature
         if max_tokens is not None:
             config_kwargs["max_output_tokens"] = max_tokens
-        if self._supports_thinking(model_id):
             config_kwargs["thinking_config"] = types.ThinkingConfig(
-                thinking_budget=self._get_thinking_budget(model_id),
-                include_thoughts=True  # Include thinking content in response
             )
         if tools:
             gemini_tools = []
             for t in tools:
@@ -158,14 +113,14 @@ class GeminiProvider(BaseProvider):
                 )
                 gemini_tools.append(types.Tool(function_declarations=[func_decl]))
             config_kwargs["tools"] = gemini_tools
         config = types.GenerateContentConfig(**config_kwargs)
         async for chunk in self._stream_with_fallback(
             client, model_id, contents, config, config_kwargs, types
         ):
             yield chunk
     async def _stream_with_fallback(
         self, client, model_id: str, contents, config, config_kwargs: Dict[str, Any], types
     ):
@@ -175,33 +130,33 @@ class GeminiProvider(BaseProvider):
         except Exception as e:
             error_str = str(e).lower()
             has_thinking = "thinking_config" in config_kwargs
-            if has_thinking and ("thinking" in error_str or "budget" in error_str or "unsupported" in error_str):
                 logger.warning(f"Thinking not supported for {model_id}, retrying without thinking config")
                 del config_kwargs["thinking_config"]
                 fallback_config = types.GenerateContentConfig(**config_kwargs)
                 async for chunk in self._do_stream(client, model_id, contents, fallback_config):
                     yield chunk
             else:
                 logger.error(f"Gemini stream error: {e}")
                 yield StreamChunk(type="error", error=str(e))
     async def _do_stream(self, client, model_id: str, contents, config):
         response_stream = await client.aio.models.generate_content_stream(
             model=model_id,
             contents=contents,
             config=config,
         )
         pending_tool_calls = []
         async for chunk in response_stream:
             if not chunk.candidates:
                 continue
             candidate = chunk.candidates[0]
             if candidate.content and candidate.content.parts:
                 for part in candidate.content.parts:
                     if hasattr(part, 'thought') and part.thought:
@@ -217,13 +172,13 @@ class GeminiProvider(BaseProvider):
                         pending_tool_calls.append(tool_call)
                     elif part.text:
                         yield StreamChunk(type="text", text=part.text)
             finish_reason = getattr(candidate, 'finish_reason', None)
             if finish_reason:
                 print(f"[Gemini] finish_reason: {finish_reason}, pending_tool_calls: {len(pending_tool_calls)}", flush=True)
                 for tc in pending_tool_calls:
                     yield StreamChunk(type="tool_call", tool_call=tc)
                 # IMPORTANT: If there are pending tool calls, ALWAYS return "tool_calls"
                 # regardless of Gemini's finish_reason (which is often STOP even with tool calls)
                 if pending_tool_calls:
@@ -231,7 +186,7 @@ class GeminiProvider(BaseProvider):
                 else:
                     stop_reason = self._map_stop_reason(finish_reason)
                 print(f"[Gemini] Mapped stop_reason: {stop_reason}", flush=True)
                 usage = None
                 if hasattr(chunk, 'usage_metadata') and chunk.usage_metadata:
                     usage = {
@@ -240,15 +195,15 @@ class GeminiProvider(BaseProvider):
                     }
                     if hasattr(chunk.usage_metadata, 'thoughts_token_count'):
                         usage["thinking_tokens"] = chunk.usage_metadata.thoughts_token_count
                 yield StreamChunk(type="done", usage=usage, stop_reason=stop_reason)
                 return
         yield StreamChunk(type="done", stop_reason="end_turn")
     def _map_stop_reason(self, gemini_finish_reason) -> str:
         reason_name = str(gemini_finish_reason).lower() if gemini_finish_reason else ""
         if "stop" in reason_name or "end" in reason_name:
             return "end_turn"
         elif "tool" in reason_name or "function" in reason_name:

 logger = logging.getLogger(__name__)
+GEMINI3_MODELS = {
+    "gemini-3-flash-preview",
 }
 class GeminiProvider(BaseProvider):
     def __init__(self, api_key: Optional[str] = None):
         self._api_key = api_key or os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")
         self._client = None
     @property
     def id(self) -> str:
         return "gemini"
     @property
     def name(self) -> str:
         return "Google Gemini"
     @property
     def models(self) -> Dict[str, ModelInfo]:
         return {
+            "gemini-3-flash-preview": ModelInfo(
+                id="gemini-3-flash-preview",
+                name="Gemini 3.0 Flash",
                 provider_id="gemini",
                 context_limit=1048576,
                 output_limit=65536,
                 supports_tools=True,
                 supports_streaming=True,
+                cost_input=0.5,
+                cost_output=3.0,
             ),
         }
     def _get_client(self):
         if self._client is None:
             try:
             except ImportError:
                 raise ImportError("google-genai package is required. Install with: pip install google-genai")
         return self._client
+    def _is_gemini3(self, model_id: str) -> bool:
+        return model_id in GEMINI3_MODELS
     async def stream(
         self,
         model_id: str,
         max_tokens: Optional[int] = None,
     ) -> AsyncGenerator[StreamChunk, None]:
         from google.genai import types
         client = self._get_client()
         contents = []
         print(f"[Gemini DEBUG] Building contents from {len(messages)} messages", flush=True)
         for msg in messages:
             role = "user" if msg.role == "user" else "model"
             content = msg.content
             print(f"[Gemini DEBUG] msg.role={msg.role}, content type={type(content)}, content={repr(content)[:100]}", flush=True)
             if isinstance(content, str) and content:
                 contents.append(types.Content(
                     role=role,
                 parts = [types.Part(text=c.text) for c in content if c.text]
                 if parts:
                     contents.append(types.Content(role=role, parts=parts))
         print(f"[Gemini DEBUG] Built {len(contents)} contents", flush=True)
         config_kwargs: Dict[str, Any] = {}
         if system:
             config_kwargs["system_instruction"] = system
         if temperature is not None:
             config_kwargs["temperature"] = temperature
         if max_tokens is not None:
             config_kwargs["max_output_tokens"] = max_tokens
+        if self._is_gemini3(model_id):
             config_kwargs["thinking_config"] = types.ThinkingConfig(
+                include_thoughts=True
             )
+            # thinking_level 미설정 → 기본값 "high" (동적 reasoning)
         if tools:
             gemini_tools = []
             for t in tools:
                 )
                 gemini_tools.append(types.Tool(function_declarations=[func_decl]))
             config_kwargs["tools"] = gemini_tools
         config = types.GenerateContentConfig(**config_kwargs)
         async for chunk in self._stream_with_fallback(
             client, model_id, contents, config, config_kwargs, types
         ):
             yield chunk
     async def _stream_with_fallback(
         self, client, model_id: str, contents, config, config_kwargs: Dict[str, Any], types
     ):
         except Exception as e:
             error_str = str(e).lower()
             has_thinking = "thinking_config" in config_kwargs
+            if has_thinking and ("thinking" in error_str or "budget" in error_str or "level" in error_str or "unsupported" in error_str):
                 logger.warning(f"Thinking not supported for {model_id}, retrying without thinking config")
                 del config_kwargs["thinking_config"]
                 fallback_config = types.GenerateContentConfig(**config_kwargs)
                 async for chunk in self._do_stream(client, model_id, contents, fallback_config):
                     yield chunk
             else:
                 logger.error(f"Gemini stream error: {e}")
                 yield StreamChunk(type="error", error=str(e))
     async def _do_stream(self, client, model_id: str, contents, config):
         response_stream = await client.aio.models.generate_content_stream(
             model=model_id,
             contents=contents,
             config=config,
         )
         pending_tool_calls = []
         async for chunk in response_stream:
             if not chunk.candidates:
                 continue
             candidate = chunk.candidates[0]
             if candidate.content and candidate.content.parts:
                 for part in candidate.content.parts:
                     if hasattr(part, 'thought') and part.thought:
                         pending_tool_calls.append(tool_call)
                     elif part.text:
                         yield StreamChunk(type="text", text=part.text)
             finish_reason = getattr(candidate, 'finish_reason', None)
             if finish_reason:
                 print(f"[Gemini] finish_reason: {finish_reason}, pending_tool_calls: {len(pending_tool_calls)}", flush=True)
                 for tc in pending_tool_calls:
                     yield StreamChunk(type="tool_call", tool_call=tc)
                 # IMPORTANT: If there are pending tool calls, ALWAYS return "tool_calls"
                 # regardless of Gemini's finish_reason (which is often STOP even with tool calls)
                 if pending_tool_calls:
                 else:
                     stop_reason = self._map_stop_reason(finish_reason)
                 print(f"[Gemini] Mapped stop_reason: {stop_reason}", flush=True)
                 usage = None
                 if hasattr(chunk, 'usage_metadata') and chunk.usage_metadata:
                     usage = {
                     }
                     if hasattr(chunk.usage_metadata, 'thoughts_token_count'):
                         usage["thinking_tokens"] = chunk.usage_metadata.thoughts_token_count
                 yield StreamChunk(type="done", usage=usage, stop_reason=stop_reason)
                 return
         yield StreamChunk(type="done", stop_reason="end_turn")
     def _map_stop_reason(self, gemini_finish_reason) -> str:
         reason_name = str(gemini_finish_reason).lower() if gemini_finish_reason else ""
         if "stop" in reason_name or "end" in reason_name:
             return "end_turn"
         elif "tool" in reason_name or "function" in reason_name: