RWKV-GLM-4.7-Flash-Preview-v0.1 / test_client_api.py
OpenMOSE's picture
Upload folder using huggingface_hub
e3bb7ae
import requests
import json
import time
import sys
BASE_URL = "http://localhost:8000/v1"
MODEL_NAME = "RWKV-GLM-4.7-Flash-Preview-v0.1"
# ==========================================================
# Utility
# ==========================================================
def print_section(title):
print("\n" + "=" * 60)
print(title)
print("=" * 60)
def safe_json(resp):
try:
return resp.json()
except:
print("❌ JSON decode failed")
print(resp.text)
sys.exit(1)
# ==========================================================
# 1️⃣ Models API
# ==========================================================
def test_models():
print_section("TEST: /v1/models")
resp = requests.get(f"{BASE_URL}/models")
assert resp.status_code == 200, "Models API failed"
data = safe_json(resp)
assert "data" in data, "No model list returned"
assert len(data["data"]) > 0, "Empty model list"
print("✅ Models endpoint OK")
print("Available models:", [m["id"] for m in data["data"]])
# ==========================================================
# 2️⃣ Non-stream basic
# ==========================================================
def test_basic_completion():
print_section("TEST: Basic Non-Streaming Completion")
payload = {
"model": MODEL_NAME,
"messages": [{"role": "user", "content": "Say hello."}],
"max_tokens": 30,
"stream": False
}
resp = requests.post(
f"{BASE_URL}/chat/completions",
headers={"Content-Type": "application/json"},
data=json.dumps(payload)
)
assert resp.status_code == 200, "Completion failed"
data = safe_json(resp)
assert "choices" in data, "No choices returned"
assert "usage" in data, "No usage returned"
print("Assistant:", data["choices"][0]["message"]["content"])
print("Usage:", data["usage"])
print("✅ Basic completion OK")
# ==========================================================
# 3️⃣ Streaming
# ==========================================================
def test_streaming():
print_section("TEST: Streaming Completion")
payload = {
"model": MODEL_NAME,
"messages": [{"role": "user", "content": "Count from 1 to 5."}],
"max_tokens": 50,
"stream": True
}
full_text = ""
with requests.post(
f"{BASE_URL}/chat/completions",
headers={"Content-Type": "application/json"},
data=json.dumps(payload),
stream=True
) as resp:
assert resp.status_code == 200, "Streaming failed"
for line in resp.iter_lines():
if line:
decoded = line.decode("utf-8")
if decoded.startswith("data: "):
content = decoded[len("data: "):]
if content == "[DONE]":
break
chunk = json.loads(content)
delta = chunk["choices"][0]["delta"]
if "content" in delta:
print(delta["content"], end="", flush=True)
full_text += delta["content"]
print("\n\n✅ Streaming OK")
assert len(full_text) > 0, "Streaming returned empty"
# ==========================================================
# 4️⃣ Sampling Variations
# ==========================================================
def test_sampling_variations():
print_section("TEST: Sampling Variations")
base_payload = {
"model": MODEL_NAME,
"messages": [{"role": "user", "content": "Write a creative sentence about AI."}],
"max_tokens": 50,
"stream": False
}
configs = [
{"temperature": 0.0},
{"temperature": 0.7},
{"top_p": 0.8},
{"top_k": 20},
{"repetition_penalty": 1.2},
{"presence_penalty": 0.5},
{"frequency_penalty": 0.5}
]
for cfg in configs:
payload = base_payload.copy()
payload.update(cfg)
resp = requests.post(
f"{BASE_URL}/chat/completions",
headers={"Content-Type": "application/json"},
data=json.dumps(payload)
)
assert resp.status_code == 200, f"Sampling failed: {cfg}"
data = safe_json(resp)
text = data["choices"][0]["message"]["content"]
print(f"\nConfig: {cfg}")
print("Output:", text[:120], "...")
print("\n✅ Sampling parameter variations OK")
# ==========================================================
# 5️⃣ Deterministic Check (temperature=0)
# ==========================================================
def test_deterministic():
print_section("TEST: Deterministic Mode (temperature=0)")
payload = {
"model": MODEL_NAME,
"messages": [{"role": "user", "content": "Define gravity in one sentence."}],
"temperature": 0.0,
"max_tokens": 50,
"stream": False
}
resp1 = requests.post(f"{BASE_URL}/chat/completions",
headers={"Content-Type": "application/json"},
data=json.dumps(payload))
resp2 = requests.post(f"{BASE_URL}/chat/completions",
headers={"Content-Type": "application/json"},
data=json.dumps(payload))
out1 = safe_json(resp1)["choices"][0]["message"]["content"]
out2 = safe_json(resp2)["choices"][0]["message"]["content"]
print("Run1:", out1)
print("Run2:", out2)
assert out1 == out2, "❌ Deterministic mode not deterministic"
print("✅ Deterministic check OK")
# ==========================================================
# 6️⃣ Error Handling
# ==========================================================
def test_error_handling():
print_section("TEST: Error Handling")
payload = {
"model": MODEL_NAME,
# missing messages intentionally
}
resp = requests.post(
f"{BASE_URL}/chat/completions",
headers={"Content-Type": "application/json"},
data=json.dumps(payload)
)
if resp.status_code != 200:
print("✅ Server correctly handled bad request")
else:
print("⚠️ Warning: server did not reject bad request")
# ==========================================================
# Main
# ==========================================================
if __name__ == "__main__":
start = time.time()
test_models()
test_basic_completion()
test_streaming()
test_sampling_variations()
test_deterministic()
test_error_handling()
print_section("ALL TESTS PASSED")
print(f"Total time: {round(time.time() - start, 2)} sec")