getitdone-api / test_gemini_tools.py
gimjungwook's picture
feat: Initial commit - OpenCode API (getitdone-api)
4dc70fb
#!/usr/bin/env python3
"""
Test script for Gemini Extended Thinking + Tool Calling integration.
Tests that thinking and tool calls work together correctly.
"""
import asyncio
import os
import sys
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
from dotenv import load_dotenv
load_dotenv()
from opencode_api.provider.gemini import GeminiProvider
from opencode_api.provider.provider import Message
# Define test tools schema
TEST_TOOLS = [
{
"name": "web_search",
"description": "Search the web for current information. Use this when you need to find up-to-date information about any topic.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query"
}
},
"required": ["query"]
}
},
{
"name": "calculator",
"description": "Perform mathematical calculations. Use this for any math operations.",
"parameters": {
"type": "object",
"properties": {
"expression": {
"type": "string",
"description": "The mathematical expression to evaluate"
}
},
"required": ["expression"]
}
}
]
async def test_thinking_only():
"""Test that thinking works without tools."""
print("\n" + "="*60)
print("TEST 1: Thinking Only (No Tools)")
print("="*60)
provider = GeminiProvider(api_key=os.getenv("GOOGLE_API_KEY"))
messages = [
Message(role="user", content="3개의 μƒμžκ°€ μžˆμ–΄. ν•˜λ‚˜μ—λŠ” 금, ν•˜λ‚˜μ—λŠ” 은, ν•˜λ‚˜μ—λŠ” 돌이 μžˆμ–΄. μƒμž1: '여기에 금이 μ—†λ‹€', μƒμž2: '여기에 금이 μžˆλ‹€', μƒμž3: 'κΈˆμ€ μƒμž1에 μžˆλ‹€'. μ •ν™•νžˆ ν•˜λ‚˜λ§Œ 진싀이야. κΈˆμ€ μ–΄λ””?")
]
reasoning_chunks = []
text_chunks = []
tool_calls = []
usage = None
async for chunk in provider.stream(
model_id="gemini-2.5-pro",
messages=messages,
tools=None, # No tools
system="You are a helpful assistant. Think step by step."
):
if chunk.type == "reasoning":
reasoning_chunks.append(chunk.text)
print(f"[REASONING] {chunk.text[:100]}..." if len(chunk.text) > 100 else f"[REASONING] {chunk.text}")
elif chunk.type == "text":
text_chunks.append(chunk.text)
print(f"[TEXT] {chunk.text}", end="", flush=True)
elif chunk.type == "tool_call":
tool_calls.append(chunk.tool_call)
print(f"\n[TOOL_CALL] {chunk.tool_call.name}({chunk.tool_call.arguments})")
elif chunk.type == "done":
usage = chunk.usage
print(f"\n[DONE] stop_reason={chunk.stop_reason}")
elif chunk.type == "error":
print(f"\n[ERROR] {chunk.error}")
print("\n--- Summary ---")
print(f"Reasoning chunks: {len(reasoning_chunks)}")
print(f"Text chunks: {len(text_chunks)}")
print(f"Tool calls: {len(tool_calls)}")
if usage:
print(f"Usage: input={usage.get('input_tokens', 0)}, output={usage.get('output_tokens', 0)}, thinking={usage.get('thinking_tokens', 0)}")
return len(reasoning_chunks) > 0
async def test_tool_calling():
"""Test that tool calling works."""
print("\n" + "="*60)
print("TEST 2: Tool Calling (Should trigger web_search)")
print("="*60)
provider = GeminiProvider(api_key=os.getenv("GOOGLE_API_KEY"))
messages = [
Message(role="user", content="2024λ…„ 노벨 물리학상 μˆ˜μƒμžκ°€ λˆ„κ΅¬μ•Ό? μ›Ή κ²€μƒ‰ν•΄μ„œ μ•Œλ €μ€˜.")
]
reasoning_chunks = []
text_chunks = []
tool_calls = []
usage = None
async for chunk in provider.stream(
model_id="gemini-2.5-pro",
messages=messages,
tools=TEST_TOOLS,
system="You are a helpful assistant. Use tools when needed."
):
if chunk.type == "reasoning":
reasoning_chunks.append(chunk.text)
print(f"[REASONING] {chunk.text[:100]}..." if len(chunk.text) > 100 else f"[REASONING] {chunk.text}")
elif chunk.type == "text":
text_chunks.append(chunk.text)
print(f"[TEXT] {chunk.text}", end="", flush=True)
elif chunk.type == "tool_call":
tool_calls.append(chunk.tool_call)
print(f"\n[TOOL_CALL] {chunk.tool_call.name}({chunk.tool_call.arguments})")
elif chunk.type == "done":
usage = chunk.usage
print(f"\n[DONE] stop_reason={chunk.stop_reason}")
elif chunk.type == "error":
print(f"\n[ERROR] {chunk.error}")
print("\n--- Summary ---")
print(f"Reasoning chunks: {len(reasoning_chunks)}")
print(f"Text chunks: {len(text_chunks)}")
print(f"Tool calls: {len(tool_calls)}")
for tc in tool_calls:
print(f" - {tc.name}: {tc.arguments}")
if usage:
print(f"Usage: input={usage.get('input_tokens', 0)}, output={usage.get('output_tokens', 0)}, thinking={usage.get('thinking_tokens', 0)}")
return len(tool_calls) > 0
async def test_thinking_with_tools():
"""Test that thinking AND tool calling work together."""
print("\n" + "="*60)
print("TEST 3: Thinking + Tool Calling Together")
print("="*60)
provider = GeminiProvider(api_key=os.getenv("GOOGLE_API_KEY"))
# Complex question that requires both thinking and tool use
messages = [
Message(role="user", content="λ³΅μž‘ν•œ μˆ˜ν•™ λ¬Έμ œμ•Ό: (17 * 23) + (45 / 9) - 12^2 λ₯Ό κ³„μ‚°ν•΄μ€˜. 계산기 도ꡬλ₯Ό μ‚¬μš©ν•΄μ„œ μ •ν™•ν•œ 닡을 κ΅¬ν•΄μ€˜.")
]
reasoning_chunks = []
text_chunks = []
tool_calls = []
usage = None
async for chunk in provider.stream(
model_id="gemini-2.5-pro",
messages=messages,
tools=TEST_TOOLS,
system="You are a helpful assistant. Use the calculator tool for mathematical operations. Think through the problem step by step."
):
if chunk.type == "reasoning":
reasoning_chunks.append(chunk.text)
print(f"[REASONING] {chunk.text[:100]}..." if len(chunk.text) > 100 else f"[REASONING] {chunk.text}")
elif chunk.type == "text":
text_chunks.append(chunk.text)
print(f"[TEXT] {chunk.text}", end="", flush=True)
elif chunk.type == "tool_call":
tool_calls.append(chunk.tool_call)
print(f"\n[TOOL_CALL] {chunk.tool_call.name}({chunk.tool_call.arguments})")
elif chunk.type == "done":
usage = chunk.usage
print(f"\n[DONE] stop_reason={chunk.stop_reason}")
elif chunk.type == "error":
print(f"\n[ERROR] {chunk.error}")
print("\n--- Summary ---")
print(f"Reasoning chunks: {len(reasoning_chunks)}")
print(f"Text chunks: {len(text_chunks)}")
print(f"Tool calls: {len(tool_calls)}")
for tc in tool_calls:
print(f" - {tc.name}: {tc.arguments}")
if usage:
print(f"Usage: input={usage.get('input_tokens', 0)}, output={usage.get('output_tokens', 0)}, thinking={usage.get('thinking_tokens', 0)}")
# Success if we got both reasoning and tool calls, OR just tool calls (model might not always think)
return len(tool_calls) > 0
async def test_flash_model():
"""Test that Flash model also works with thinking + tools."""
print("\n" + "="*60)
print("TEST 4: Gemini 2.5 Flash with Thinking + Tools")
print("="*60)
provider = GeminiProvider(api_key=os.getenv("GOOGLE_API_KEY"))
messages = [
Message(role="user", content="였늘 μ„œμšΈ 날씨 μ–΄λ•Œ? μ›Ήμ—μ„œ κ²€μƒ‰ν•΄μ€˜.")
]
reasoning_chunks = []
text_chunks = []
tool_calls = []
usage = None
async for chunk in provider.stream(
model_id="gemini-2.5-flash",
messages=messages,
tools=TEST_TOOLS,
system="You are a helpful assistant. Use tools when needed."
):
if chunk.type == "reasoning":
reasoning_chunks.append(chunk.text)
print(f"[REASONING] {chunk.text[:100]}..." if len(chunk.text) > 100 else f"[REASONING] {chunk.text}")
elif chunk.type == "text":
text_chunks.append(chunk.text)
print(f"[TEXT] {chunk.text}", end="", flush=True)
elif chunk.type == "tool_call":
tool_calls.append(chunk.tool_call)
print(f"\n[TOOL_CALL] {chunk.tool_call.name}({chunk.tool_call.arguments})")
elif chunk.type == "done":
usage = chunk.usage
print(f"\n[DONE] stop_reason={chunk.stop_reason}")
elif chunk.type == "error":
print(f"\n[ERROR] {chunk.error}")
print("\n--- Summary ---")
print(f"Reasoning chunks: {len(reasoning_chunks)}")
print(f"Text chunks: {len(text_chunks)}")
print(f"Tool calls: {len(tool_calls)}")
if usage:
print(f"Usage: input={usage.get('input_tokens', 0)}, output={usage.get('output_tokens', 0)}, thinking={usage.get('thinking_tokens', 0)}")
return True # Flash might not always use tools
async def main():
print("="*60)
print("Gemini Extended Thinking + Tool Calling Integration Test")
print("="*60)
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
print("ERROR: GOOGLE_API_KEY not set in environment")
return
print(f"API Key: {api_key[:10]}...{api_key[-4:]}")
results = {}
# Run tests
try:
results["thinking_only"] = await test_thinking_only()
except Exception as e:
print(f"TEST 1 FAILED: {e}")
results["thinking_only"] = False
try:
results["tool_calling"] = await test_tool_calling()
except Exception as e:
print(f"TEST 2 FAILED: {e}")
results["tool_calling"] = False
try:
results["thinking_with_tools"] = await test_thinking_with_tools()
except Exception as e:
print(f"TEST 3 FAILED: {e}")
results["thinking_with_tools"] = False
try:
results["flash_model"] = await test_flash_model()
except Exception as e:
print(f"TEST 4 FAILED: {e}")
results["flash_model"] = False
# Final summary
print("\n" + "="*60)
print("FINAL RESULTS")
print("="*60)
for test_name, passed in results.items():
status = "PASS" if passed else "FAIL"
print(f" {test_name}: {status}")
all_passed = all(results.values())
print(f"\nOverall: {'ALL TESTS PASSED' if all_passed else 'SOME TESTS FAILED'}")
if __name__ == "__main__":
asyncio.run(main())