| | import requests |
| | import json |
| | import time |
| | import sys |
| |
|
| | BASE_URL = "http://localhost:8000/v1" |
| | MODEL_NAME = "RWKV-GLM-4.7-Flash-Preview-v0.1" |
| |
|
| | |
| | |
| | |
| | def print_section(title): |
| | print("\n" + "=" * 60) |
| | print(title) |
| | print("=" * 60) |
| |
|
| |
|
| | def safe_json(resp): |
| | try: |
| | return resp.json() |
| | except: |
| | print("❌ JSON decode failed") |
| | print(resp.text) |
| | sys.exit(1) |
| |
|
| |
|
| | |
| | |
| | |
| | def test_models(): |
| | print_section("TEST: /v1/models") |
| |
|
| | resp = requests.get(f"{BASE_URL}/models") |
| | assert resp.status_code == 200, "Models API failed" |
| |
|
| | data = safe_json(resp) |
| |
|
| | assert "data" in data, "No model list returned" |
| | assert len(data["data"]) > 0, "Empty model list" |
| |
|
| | print("✅ Models endpoint OK") |
| | print("Available models:", [m["id"] for m in data["data"]]) |
| |
|
| |
|
| | |
| | |
| | |
| | def test_basic_completion(): |
| | print_section("TEST: Basic Non-Streaming Completion") |
| |
|
| | payload = { |
| | "model": MODEL_NAME, |
| | "messages": [{"role": "user", "content": "Say hello."}], |
| | "max_tokens": 30, |
| | "stream": False |
| | } |
| |
|
| | resp = requests.post( |
| | f"{BASE_URL}/chat/completions", |
| | headers={"Content-Type": "application/json"}, |
| | data=json.dumps(payload) |
| | ) |
| |
|
| | assert resp.status_code == 200, "Completion failed" |
| |
|
| | data = safe_json(resp) |
| |
|
| | assert "choices" in data, "No choices returned" |
| | assert "usage" in data, "No usage returned" |
| |
|
| | print("Assistant:", data["choices"][0]["message"]["content"]) |
| | print("Usage:", data["usage"]) |
| | print("✅ Basic completion OK") |
| |
|
| |
|
| | |
| | |
| | |
| | def test_streaming(): |
| | print_section("TEST: Streaming Completion") |
| |
|
| | payload = { |
| | "model": MODEL_NAME, |
| | "messages": [{"role": "user", "content": "Count from 1 to 5."}], |
| | "max_tokens": 50, |
| | "stream": True |
| | } |
| |
|
| | full_text = "" |
| |
|
| | with requests.post( |
| | f"{BASE_URL}/chat/completions", |
| | headers={"Content-Type": "application/json"}, |
| | data=json.dumps(payload), |
| | stream=True |
| | ) as resp: |
| |
|
| | assert resp.status_code == 200, "Streaming failed" |
| |
|
| | for line in resp.iter_lines(): |
| | if line: |
| | decoded = line.decode("utf-8") |
| |
|
| | if decoded.startswith("data: "): |
| | content = decoded[len("data: "):] |
| |
|
| | if content == "[DONE]": |
| | break |
| |
|
| | chunk = json.loads(content) |
| | delta = chunk["choices"][0]["delta"] |
| |
|
| | if "content" in delta: |
| | print(delta["content"], end="", flush=True) |
| | full_text += delta["content"] |
| |
|
| | print("\n\n✅ Streaming OK") |
| | assert len(full_text) > 0, "Streaming returned empty" |
| |
|
| |
|
| | |
| | |
| | |
| | def test_sampling_variations(): |
| | print_section("TEST: Sampling Variations") |
| |
|
| | base_payload = { |
| | "model": MODEL_NAME, |
| | "messages": [{"role": "user", "content": "Write a creative sentence about AI."}], |
| | "max_tokens": 50, |
| | "stream": False |
| | } |
| |
|
| | configs = [ |
| | {"temperature": 0.0}, |
| | {"temperature": 0.7}, |
| | {"top_p": 0.8}, |
| | {"top_k": 20}, |
| | {"repetition_penalty": 1.2}, |
| | {"presence_penalty": 0.5}, |
| | {"frequency_penalty": 0.5} |
| | ] |
| |
|
| | for cfg in configs: |
| | payload = base_payload.copy() |
| | payload.update(cfg) |
| |
|
| | resp = requests.post( |
| | f"{BASE_URL}/chat/completions", |
| | headers={"Content-Type": "application/json"}, |
| | data=json.dumps(payload) |
| | ) |
| |
|
| | assert resp.status_code == 200, f"Sampling failed: {cfg}" |
| |
|
| | data = safe_json(resp) |
| |
|
| | text = data["choices"][0]["message"]["content"] |
| |
|
| | print(f"\nConfig: {cfg}") |
| | print("Output:", text[:120], "...") |
| |
|
| | print("\n✅ Sampling parameter variations OK") |
| |
|
| |
|
| | |
| | |
| | |
| | def test_deterministic(): |
| | print_section("TEST: Deterministic Mode (temperature=0)") |
| |
|
| | payload = { |
| | "model": MODEL_NAME, |
| | "messages": [{"role": "user", "content": "Define gravity in one sentence."}], |
| | "temperature": 0.0, |
| | "max_tokens": 50, |
| | "stream": False |
| | } |
| |
|
| | resp1 = requests.post(f"{BASE_URL}/chat/completions", |
| | headers={"Content-Type": "application/json"}, |
| | data=json.dumps(payload)) |
| | resp2 = requests.post(f"{BASE_URL}/chat/completions", |
| | headers={"Content-Type": "application/json"}, |
| | data=json.dumps(payload)) |
| |
|
| | out1 = safe_json(resp1)["choices"][0]["message"]["content"] |
| | out2 = safe_json(resp2)["choices"][0]["message"]["content"] |
| |
|
| | print("Run1:", out1) |
| | print("Run2:", out2) |
| |
|
| | assert out1 == out2, "❌ Deterministic mode not deterministic" |
| | print("✅ Deterministic check OK") |
| |
|
| |
|
| | |
| | |
| | |
| | def test_error_handling(): |
| | print_section("TEST: Error Handling") |
| |
|
| | payload = { |
| | "model": MODEL_NAME, |
| | |
| | } |
| |
|
| | resp = requests.post( |
| | f"{BASE_URL}/chat/completions", |
| | headers={"Content-Type": "application/json"}, |
| | data=json.dumps(payload) |
| | ) |
| |
|
| | if resp.status_code != 200: |
| | print("✅ Server correctly handled bad request") |
| | else: |
| | print("⚠️ Warning: server did not reject bad request") |
| |
|
| |
|
| | |
| | |
| | |
| | if __name__ == "__main__": |
| | start = time.time() |
| |
|
| | test_models() |
| | test_basic_completion() |
| | test_streaming() |
| | test_sampling_variations() |
| | test_deterministic() |
| | test_error_handling() |
| |
|
| | print_section("ALL TESTS PASSED") |
| | print(f"Total time: {round(time.time() - start, 2)} sec") |