Manojb's picture
Upload folder using huggingface_hub
221ca5c verified
#!/usr/bin/env python3
"""Benchmark Qwen uncensored Q4_K_M speed at different contexts."""
import json, urllib.request, time
URL = "http://localhost:8081/v1/chat/completions"
TOOLS = [{"type": "function", "function": {"name": "browser_use", "description": "Browser",
"parameters": {"type": "object", "properties": {"action": {"type": "string"}, "url": {"type": "string"}}, "required": ["action"]}}}]
def test(name, messages, tools=True):
data = {"model": "test", "stream": False, "messages": messages}
if tools:
data["tools"] = TOOLS
req = urllib.request.Request(URL, data=json.dumps(data).encode(),
headers={"Content-Type": "application/json"})
try:
resp = urllib.request.urlopen(req, timeout=120)
result = json.loads(resp.read())
t = result.get("timings", {})
m = result["choices"][0]["message"]
tc = bool(m.get("tool_calls"))
print(f"{name:30s} {t.get('prompt_per_second',0):7.1f} p/s {t.get('predicted_per_second',0):6.1f} g/s tc={tc} in={t.get('prompt_n',0)} out={t.get('predicted_n',0)}")
except Exception as e:
print(f"{name:30s} ERROR: {e}")
print(f"{'Test':30s} {'Prompt':>7s} {'Gen':>6s} {'Tool':>4s} {'In':>4s} {'Out':>4s}")
print("-" * 80)
test("Short + tools", [{"role": "user", "content": "Navigate to google.com"}])
test("Medium + tools", [{"role": "system", "content": "You are a browser agent. Use tools."}, {"role": "user", "content": "Go to duckduckgo and search weather Dubai"}])
FILLER = "This is padding text for benchmarking context length impact. " * 100
test("Long ~2K + tools", [{"role": "system", "content": "Agent. " + FILLER[:2000]}, {"role": "user", "content": "Navigate to google.com"}])
test("Very long ~4K + tools", [{"role": "system", "content": "Agent. " + FILLER}, {"role": "user", "content": "Navigate to google.com"}])
test("Short no tools", [{"role": "user", "content": "What is 2+2? Answer briefly."}], tools=False)