#!/usr/bin/env python3 """Benchmark Qwen uncensored Q4_K_M speed at different contexts.""" import json, urllib.request, time URL = "http://localhost:8081/v1/chat/completions" TOOLS = [{"type": "function", "function": {"name": "browser_use", "description": "Browser", "parameters": {"type": "object", "properties": {"action": {"type": "string"}, "url": {"type": "string"}}, "required": ["action"]}}}] def test(name, messages, tools=True): data = {"model": "test", "stream": False, "messages": messages} if tools: data["tools"] = TOOLS req = urllib.request.Request(URL, data=json.dumps(data).encode(), headers={"Content-Type": "application/json"}) try: resp = urllib.request.urlopen(req, timeout=120) result = json.loads(resp.read()) t = result.get("timings", {}) m = result["choices"][0]["message"] tc = bool(m.get("tool_calls")) print(f"{name:30s} {t.get('prompt_per_second',0):7.1f} p/s {t.get('predicted_per_second',0):6.1f} g/s tc={tc} in={t.get('prompt_n',0)} out={t.get('predicted_n',0)}") except Exception as e: print(f"{name:30s} ERROR: {e}") print(f"{'Test':30s} {'Prompt':>7s} {'Gen':>6s} {'Tool':>4s} {'In':>4s} {'Out':>4s}") print("-" * 80) test("Short + tools", [{"role": "user", "content": "Navigate to google.com"}]) test("Medium + tools", [{"role": "system", "content": "You are a browser agent. Use tools."}, {"role": "user", "content": "Go to duckduckgo and search weather Dubai"}]) FILLER = "This is padding text for benchmarking context length impact. " * 100 test("Long ~2K + tools", [{"role": "system", "content": "Agent. " + FILLER[:2000]}, {"role": "user", "content": "Navigate to google.com"}]) test("Very long ~4K + tools", [{"role": "system", "content": "Agent. " + FILLER}, {"role": "user", "content": "Navigate to google.com"}]) test("Short no tools", [{"role": "user", "content": "What is 2+2? Answer briefly."}], tools=False)