Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Test Kiro LLM API | |
| Usage: | |
| python autoreg/llm/test_api.py | |
| """ | |
| import requests | |
| import json | |
| API_URL = "http://localhost:8421" | |
| def test_health(): | |
| """Test health endpoint.""" | |
| print("\n[1] Testing /health...") | |
| resp = requests.get(f"{API_URL}/health") | |
| print(f"Status: {resp.status_code}") | |
| print(json.dumps(resp.json(), indent=2)) | |
| return resp.status_code == 200 | |
| def test_models(): | |
| """Test models endpoint.""" | |
| print("\n[2] Testing /v1/models...") | |
| resp = requests.get(f"{API_URL}/v1/models") | |
| print(f"Status: {resp.status_code}") | |
| data = resp.json() | |
| print(f"Models: {[m['id'] for m in data.get('data', [])]}") | |
| return resp.status_code == 200 | |
| def test_pool_status(): | |
| """Test pool status endpoint.""" | |
| print("\n[3] Testing /pool/status...") | |
| resp = requests.get(f"{API_URL}/pool/status") | |
| print(f"Status: {resp.status_code}") | |
| data = resp.json() | |
| print(f"Total tokens: {data.get('total', 0)}") | |
| print(f"Available: {data.get('available', 0)}") | |
| print(f"Banned: {data.get('banned', 0)}") | |
| return resp.status_code == 200 | |
| def test_chat_completion(): | |
| """Test chat completion (non-streaming).""" | |
| print("\n[4] Testing /v1/chat/completions (non-streaming)...") | |
| resp = requests.post( | |
| f"{API_URL}/v1/chat/completions", | |
| json={ | |
| "model": "claude-sonnet-4-20250514", | |
| "messages": [ | |
| {"role": "user", "content": "Say 'Hello from Kiro LLM API!' in exactly those words."} | |
| ], | |
| "stream": False, | |
| "max_tokens": 50 | |
| } | |
| ) | |
| print(f"Status: {resp.status_code}") | |
| if resp.status_code == 200: | |
| data = resp.json() | |
| content = data.get("choices", [{}])[0].get("message", {}).get("content", "") | |
| print(f"Response: {content[:200]}") | |
| return True | |
| else: | |
| print(f"Error: {resp.text[:200]}") | |
| return False | |
| def test_chat_streaming(): | |
| """Test chat completion (streaming).""" | |
| print("\n[5] Testing /v1/chat/completions (streaming)...") | |
| resp = requests.post( | |
| f"{API_URL}/v1/chat/completions", | |
| json={ | |
| "model": "claude-sonnet-4-20250514", | |
| "messages": [ | |
| {"role": "user", "content": "Count from 1 to 5, one number per line."} | |
| ], | |
| "stream": True, | |
| "max_tokens": 100 | |
| }, | |
| stream=True | |
| ) | |
| print(f"Status: {resp.status_code}") | |
| if resp.status_code == 200: | |
| print("Streaming response:") | |
| full_content = "" | |
| for line in resp.iter_lines(): | |
| if line: | |
| line = line.decode('utf-8') | |
| if line.startswith("data: "): | |
| data = line[6:] | |
| if data == "[DONE]": | |
| break | |
| try: | |
| chunk = json.loads(data) | |
| content = chunk.get("choices", [{}])[0].get("delta", {}).get("content", "") | |
| if content: | |
| print(content, end="", flush=True) | |
| full_content += content | |
| except json.JSONDecodeError: | |
| pass | |
| print() | |
| return len(full_content) > 0 | |
| else: | |
| print(f"Error: {resp.text[:200]}") | |
| return False | |
| def test_openai_client(): | |
| """Test with OpenAI Python client.""" | |
| print("\n[6] Testing with OpenAI client...") | |
| try: | |
| from openai import OpenAI | |
| client = OpenAI( | |
| base_url=f"{API_URL}/v1", | |
| api_key="not-needed" | |
| ) | |
| response = client.chat.completions.create( | |
| model="claude-sonnet-4-20250514", | |
| messages=[ | |
| {"role": "user", "content": "What is 2+2? Answer with just the number."} | |
| ], | |
| max_tokens=10 | |
| ) | |
| content = response.choices[0].message.content | |
| print(f"Response: {content}") | |
| return "4" in content | |
| except ImportError: | |
| print("OpenAI client not installed. Run: pip install openai") | |
| return None | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| return False | |
| def main(): | |
| print("=" * 60) | |
| print("Kiro LLM API Test Suite") | |
| print("=" * 60) | |
| results = {} | |
| # Basic tests | |
| results["health"] = test_health() | |
| results["models"] = test_models() | |
| results["pool_status"] = test_pool_status() | |
| # Chat tests (only if pool has tokens) | |
| try: | |
| pool_resp = requests.get(f"{API_URL}/pool/status") | |
| if pool_resp.status_code == 200: | |
| pool_data = pool_resp.json() | |
| if pool_data.get("available", 0) > 0: | |
| results["chat"] = test_chat_completion() | |
| results["streaming"] = test_chat_streaming() | |
| results["openai_client"] = test_openai_client() | |
| else: | |
| print("\n[!] No available tokens - skipping chat tests") | |
| except: | |
| pass | |
| # Summary | |
| print("\n" + "=" * 60) | |
| print("Results:") | |
| print("=" * 60) | |
| for test, result in results.items(): | |
| if result is None: | |
| status = "SKIP" | |
| elif result: | |
| status = "PASS" | |
| else: | |
| status = "FAIL" | |
| print(f" {test}: {status}") | |
| if __name__ == "__main__": | |
| main() | |