gimjungwook Claude Opus 4.5 commited on
Commit
e886743
·
1 Parent(s): 4dc70fb

fix: Gemini 3 호환 - thinking_budget → thinking_level 마이그레이션

Browse files

- Gemini 2.5 모델 전부 제거, gemini-3-flash-preview만 남김
- MODELS_WITH_THINKING → GEMINI3_MODELS 상수 변경
- _supports_thinking() → _is_gemini3() 메서드 변경
- ThinkingConfig: thinking_budget 제거, include_thoughts=True만 (기본값 HIGH)
- fallback 에러 키워드에 "level" 추가
- google-genai>=1.0.0 → >=1.51.0 (Gemini 3 SDK 지원)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

requirements.txt CHANGED
@@ -6,7 +6,7 @@ uvicorn[standard]>=0.27.0
6
  anthropic>=0.40.0
7
  openai>=1.50.0
8
  litellm>=1.50.0
9
- google-genai>=1.0.0
10
 
11
  # Validation and serialization
12
  pydantic>=2.6.0
 
6
  anthropic>=0.40.0
7
  openai>=1.50.0
8
  litellm>=1.50.0
9
+ google-genai>=1.51.0
10
 
11
  # Validation and serialization
12
  pydantic>=2.6.0
src/opencode_api/provider/gemini.py CHANGED
@@ -7,82 +7,41 @@ from .provider import BaseProvider, ModelInfo, Message, StreamChunk, ToolCall
7
  logger = logging.getLogger(__name__)
8
 
9
 
10
- MODELS_WITH_THINKING = {
11
- "gemini-2.5-pro",
12
- "gemini-2.5-flash",
13
- "gemini-2.5-flash-lite",
14
- }
15
-
16
- THINKING_BUDGET_MIN = {
17
- "gemini-2.5-pro": 128,
18
- "gemini-2.5-flash": 1,
19
- "gemini-2.5-flash-lite": 1,
20
  }
21
 
22
 
23
  class GeminiProvider(BaseProvider):
24
-
25
  def __init__(self, api_key: Optional[str] = None):
26
  self._api_key = api_key or os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")
27
  self._client = None
28
-
29
  @property
30
  def id(self) -> str:
31
  return "gemini"
32
-
33
  @property
34
  def name(self) -> str:
35
  return "Google Gemini"
36
-
37
  @property
38
  def models(self) -> Dict[str, ModelInfo]:
39
  return {
40
- "gemini-2.5-pro": ModelInfo(
41
- id="gemini-2.5-pro",
42
- name="Gemini 2.5 Pro",
43
- provider_id="gemini",
44
- context_limit=1048576,
45
- output_limit=65536,
46
- supports_tools=True,
47
- supports_streaming=True,
48
- cost_input=1.25,
49
- cost_output=10.0,
50
- ),
51
- "gemini-2.5-flash": ModelInfo(
52
- id="gemini-2.5-flash",
53
- name="Gemini 2.5 Flash",
54
  provider_id="gemini",
55
  context_limit=1048576,
56
  output_limit=65536,
57
  supports_tools=True,
58
  supports_streaming=True,
59
- cost_input=0.15,
60
- cost_output=0.6,
61
- ),
62
- "gemini-2.5-flash-lite": ModelInfo(
63
- id="gemini-2.5-flash-lite",
64
- name="Gemini 2.5 Flash Lite",
65
- provider_id="gemini",
66
- context_limit=1048576,
67
- output_limit=65536,
68
- supports_tools=True,
69
- supports_streaming=True,
70
- cost_input=0.075,
71
- cost_output=0.3,
72
- ),
73
- "gemini-2.0-flash": ModelInfo(
74
- id="gemini-2.0-flash",
75
- name="Gemini 2.0 Flash",
76
- provider_id="gemini",
77
- context_limit=1048576,
78
- output_limit=8192,
79
- supports_tools=True,
80
- supports_streaming=True,
81
- cost_input=0.075,
82
- cost_output=0.3,
83
  ),
84
  }
85
-
86
  def _get_client(self):
87
  if self._client is None:
88
  try:
@@ -91,14 +50,10 @@ class GeminiProvider(BaseProvider):
91
  except ImportError:
92
  raise ImportError("google-genai package is required. Install with: pip install google-genai")
93
  return self._client
94
-
95
- def _supports_thinking(self, model_id: str) -> bool:
96
- return model_id in MODELS_WITH_THINKING
97
-
98
- def _get_thinking_budget(self, model_id: str) -> int:
99
- min_budget = THINKING_BUDGET_MIN.get(model_id, 128)
100
- return max(min_budget, 1024)
101
-
102
  async def stream(
103
  self,
104
  model_id: str,
@@ -109,16 +64,16 @@ class GeminiProvider(BaseProvider):
109
  max_tokens: Optional[int] = None,
110
  ) -> AsyncGenerator[StreamChunk, None]:
111
  from google.genai import types
112
-
113
  client = self._get_client()
114
-
115
  contents = []
116
  print(f"[Gemini DEBUG] Building contents from {len(messages)} messages", flush=True)
117
  for msg in messages:
118
  role = "user" if msg.role == "user" else "model"
119
  content = msg.content
120
  print(f"[Gemini DEBUG] msg.role={msg.role}, content type={type(content)}, content={repr(content)[:100]}", flush=True)
121
-
122
  if isinstance(content, str) and content:
123
  contents.append(types.Content(
124
  role=role,
@@ -128,26 +83,26 @@ class GeminiProvider(BaseProvider):
128
  parts = [types.Part(text=c.text) for c in content if c.text]
129
  if parts:
130
  contents.append(types.Content(role=role, parts=parts))
131
-
132
  print(f"[Gemini DEBUG] Built {len(contents)} contents", flush=True)
133
-
134
  config_kwargs: Dict[str, Any] = {}
135
-
136
  if system:
137
  config_kwargs["system_instruction"] = system
138
-
139
  if temperature is not None:
140
  config_kwargs["temperature"] = temperature
141
-
142
  if max_tokens is not None:
143
  config_kwargs["max_output_tokens"] = max_tokens
144
-
145
- if self._supports_thinking(model_id):
146
  config_kwargs["thinking_config"] = types.ThinkingConfig(
147
- thinking_budget=self._get_thinking_budget(model_id),
148
- include_thoughts=True # Include thinking content in response
149
  )
150
-
 
151
  if tools:
152
  gemini_tools = []
153
  for t in tools:
@@ -158,14 +113,14 @@ class GeminiProvider(BaseProvider):
158
  )
159
  gemini_tools.append(types.Tool(function_declarations=[func_decl]))
160
  config_kwargs["tools"] = gemini_tools
161
-
162
  config = types.GenerateContentConfig(**config_kwargs)
163
-
164
  async for chunk in self._stream_with_fallback(
165
  client, model_id, contents, config, config_kwargs, types
166
  ):
167
  yield chunk
168
-
169
  async def _stream_with_fallback(
170
  self, client, model_id: str, contents, config, config_kwargs: Dict[str, Any], types
171
  ):
@@ -175,33 +130,33 @@ class GeminiProvider(BaseProvider):
175
  except Exception as e:
176
  error_str = str(e).lower()
177
  has_thinking = "thinking_config" in config_kwargs
178
-
179
- if has_thinking and ("thinking" in error_str or "budget" in error_str or "unsupported" in error_str):
180
  logger.warning(f"Thinking not supported for {model_id}, retrying without thinking config")
181
  del config_kwargs["thinking_config"]
182
  fallback_config = types.GenerateContentConfig(**config_kwargs)
183
-
184
  async for chunk in self._do_stream(client, model_id, contents, fallback_config):
185
  yield chunk
186
  else:
187
  logger.error(f"Gemini stream error: {e}")
188
  yield StreamChunk(type="error", error=str(e))
189
-
190
  async def _do_stream(self, client, model_id: str, contents, config):
191
  response_stream = await client.aio.models.generate_content_stream(
192
  model=model_id,
193
  contents=contents,
194
  config=config,
195
  )
196
-
197
  pending_tool_calls = []
198
-
199
  async for chunk in response_stream:
200
  if not chunk.candidates:
201
  continue
202
-
203
  candidate = chunk.candidates[0]
204
-
205
  if candidate.content and candidate.content.parts:
206
  for part in candidate.content.parts:
207
  if hasattr(part, 'thought') and part.thought:
@@ -217,13 +172,13 @@ class GeminiProvider(BaseProvider):
217
  pending_tool_calls.append(tool_call)
218
  elif part.text:
219
  yield StreamChunk(type="text", text=part.text)
220
-
221
  finish_reason = getattr(candidate, 'finish_reason', None)
222
  if finish_reason:
223
  print(f"[Gemini] finish_reason: {finish_reason}, pending_tool_calls: {len(pending_tool_calls)}", flush=True)
224
  for tc in pending_tool_calls:
225
  yield StreamChunk(type="tool_call", tool_call=tc)
226
-
227
  # IMPORTANT: If there are pending tool calls, ALWAYS return "tool_calls"
228
  # regardless of Gemini's finish_reason (which is often STOP even with tool calls)
229
  if pending_tool_calls:
@@ -231,7 +186,7 @@ class GeminiProvider(BaseProvider):
231
  else:
232
  stop_reason = self._map_stop_reason(finish_reason)
233
  print(f"[Gemini] Mapped stop_reason: {stop_reason}", flush=True)
234
-
235
  usage = None
236
  if hasattr(chunk, 'usage_metadata') and chunk.usage_metadata:
237
  usage = {
@@ -240,15 +195,15 @@ class GeminiProvider(BaseProvider):
240
  }
241
  if hasattr(chunk.usage_metadata, 'thoughts_token_count'):
242
  usage["thinking_tokens"] = chunk.usage_metadata.thoughts_token_count
243
-
244
  yield StreamChunk(type="done", usage=usage, stop_reason=stop_reason)
245
  return
246
-
247
  yield StreamChunk(type="done", stop_reason="end_turn")
248
-
249
  def _map_stop_reason(self, gemini_finish_reason) -> str:
250
  reason_name = str(gemini_finish_reason).lower() if gemini_finish_reason else ""
251
-
252
  if "stop" in reason_name or "end" in reason_name:
253
  return "end_turn"
254
  elif "tool" in reason_name or "function" in reason_name:
 
7
  logger = logging.getLogger(__name__)
8
 
9
 
10
+ GEMINI3_MODELS = {
11
+ "gemini-3-flash-preview",
 
 
 
 
 
 
 
 
12
  }
13
 
14
 
15
  class GeminiProvider(BaseProvider):
16
+
17
  def __init__(self, api_key: Optional[str] = None):
18
  self._api_key = api_key or os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")
19
  self._client = None
20
+
21
  @property
22
  def id(self) -> str:
23
  return "gemini"
24
+
25
  @property
26
  def name(self) -> str:
27
  return "Google Gemini"
28
+
29
  @property
30
  def models(self) -> Dict[str, ModelInfo]:
31
  return {
32
+ "gemini-3-flash-preview": ModelInfo(
33
+ id="gemini-3-flash-preview",
34
+ name="Gemini 3.0 Flash",
 
 
 
 
 
 
 
 
 
 
 
35
  provider_id="gemini",
36
  context_limit=1048576,
37
  output_limit=65536,
38
  supports_tools=True,
39
  supports_streaming=True,
40
+ cost_input=0.5,
41
+ cost_output=3.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  ),
43
  }
44
+
45
  def _get_client(self):
46
  if self._client is None:
47
  try:
 
50
  except ImportError:
51
  raise ImportError("google-genai package is required. Install with: pip install google-genai")
52
  return self._client
53
+
54
+ def _is_gemini3(self, model_id: str) -> bool:
55
+ return model_id in GEMINI3_MODELS
56
+
 
 
 
 
57
  async def stream(
58
  self,
59
  model_id: str,
 
64
  max_tokens: Optional[int] = None,
65
  ) -> AsyncGenerator[StreamChunk, None]:
66
  from google.genai import types
67
+
68
  client = self._get_client()
69
+
70
  contents = []
71
  print(f"[Gemini DEBUG] Building contents from {len(messages)} messages", flush=True)
72
  for msg in messages:
73
  role = "user" if msg.role == "user" else "model"
74
  content = msg.content
75
  print(f"[Gemini DEBUG] msg.role={msg.role}, content type={type(content)}, content={repr(content)[:100]}", flush=True)
76
+
77
  if isinstance(content, str) and content:
78
  contents.append(types.Content(
79
  role=role,
 
83
  parts = [types.Part(text=c.text) for c in content if c.text]
84
  if parts:
85
  contents.append(types.Content(role=role, parts=parts))
86
+
87
  print(f"[Gemini DEBUG] Built {len(contents)} contents", flush=True)
88
+
89
  config_kwargs: Dict[str, Any] = {}
90
+
91
  if system:
92
  config_kwargs["system_instruction"] = system
93
+
94
  if temperature is not None:
95
  config_kwargs["temperature"] = temperature
96
+
97
  if max_tokens is not None:
98
  config_kwargs["max_output_tokens"] = max_tokens
99
+
100
+ if self._is_gemini3(model_id):
101
  config_kwargs["thinking_config"] = types.ThinkingConfig(
102
+ include_thoughts=True
 
103
  )
104
+ # thinking_level 미설정 → 기본값 "high" (동적 reasoning)
105
+
106
  if tools:
107
  gemini_tools = []
108
  for t in tools:
 
113
  )
114
  gemini_tools.append(types.Tool(function_declarations=[func_decl]))
115
  config_kwargs["tools"] = gemini_tools
116
+
117
  config = types.GenerateContentConfig(**config_kwargs)
118
+
119
  async for chunk in self._stream_with_fallback(
120
  client, model_id, contents, config, config_kwargs, types
121
  ):
122
  yield chunk
123
+
124
  async def _stream_with_fallback(
125
  self, client, model_id: str, contents, config, config_kwargs: Dict[str, Any], types
126
  ):
 
130
  except Exception as e:
131
  error_str = str(e).lower()
132
  has_thinking = "thinking_config" in config_kwargs
133
+
134
+ if has_thinking and ("thinking" in error_str or "budget" in error_str or "level" in error_str or "unsupported" in error_str):
135
  logger.warning(f"Thinking not supported for {model_id}, retrying without thinking config")
136
  del config_kwargs["thinking_config"]
137
  fallback_config = types.GenerateContentConfig(**config_kwargs)
138
+
139
  async for chunk in self._do_stream(client, model_id, contents, fallback_config):
140
  yield chunk
141
  else:
142
  logger.error(f"Gemini stream error: {e}")
143
  yield StreamChunk(type="error", error=str(e))
144
+
145
  async def _do_stream(self, client, model_id: str, contents, config):
146
  response_stream = await client.aio.models.generate_content_stream(
147
  model=model_id,
148
  contents=contents,
149
  config=config,
150
  )
151
+
152
  pending_tool_calls = []
153
+
154
  async for chunk in response_stream:
155
  if not chunk.candidates:
156
  continue
157
+
158
  candidate = chunk.candidates[0]
159
+
160
  if candidate.content and candidate.content.parts:
161
  for part in candidate.content.parts:
162
  if hasattr(part, 'thought') and part.thought:
 
172
  pending_tool_calls.append(tool_call)
173
  elif part.text:
174
  yield StreamChunk(type="text", text=part.text)
175
+
176
  finish_reason = getattr(candidate, 'finish_reason', None)
177
  if finish_reason:
178
  print(f"[Gemini] finish_reason: {finish_reason}, pending_tool_calls: {len(pending_tool_calls)}", flush=True)
179
  for tc in pending_tool_calls:
180
  yield StreamChunk(type="tool_call", tool_call=tc)
181
+
182
  # IMPORTANT: If there are pending tool calls, ALWAYS return "tool_calls"
183
  # regardless of Gemini's finish_reason (which is often STOP even with tool calls)
184
  if pending_tool_calls:
 
186
  else:
187
  stop_reason = self._map_stop_reason(finish_reason)
188
  print(f"[Gemini] Mapped stop_reason: {stop_reason}", flush=True)
189
+
190
  usage = None
191
  if hasattr(chunk, 'usage_metadata') and chunk.usage_metadata:
192
  usage = {
 
195
  }
196
  if hasattr(chunk.usage_metadata, 'thoughts_token_count'):
197
  usage["thinking_tokens"] = chunk.usage_metadata.thoughts_token_count
198
+
199
  yield StreamChunk(type="done", usage=usage, stop_reason=stop_reason)
200
  return
201
+
202
  yield StreamChunk(type="done", stop_reason="end_turn")
203
+
204
  def _map_stop_reason(self, gemini_finish_reason) -> str:
205
  reason_name = str(gemini_finish_reason).lower() if gemini_finish_reason else ""
206
+
207
  if "stop" in reason_name or "end" in reason_name:
208
  return "end_turn"
209
  elif "tool" in reason_name or "function" in reason_name: