| |
| """Benchmark Qwen uncensored Q4_K_M speed at different contexts.""" |
| import json, urllib.request, time |
|
|
| URL = "http://localhost:8081/v1/chat/completions" |
| TOOLS = [{"type": "function", "function": {"name": "browser_use", "description": "Browser", |
| "parameters": {"type": "object", "properties": {"action": {"type": "string"}, "url": {"type": "string"}}, "required": ["action"]}}}] |
|
|
| def test(name, messages, tools=True): |
| data = {"model": "test", "stream": False, "messages": messages} |
| if tools: |
| data["tools"] = TOOLS |
| req = urllib.request.Request(URL, data=json.dumps(data).encode(), |
| headers={"Content-Type": "application/json"}) |
| try: |
| resp = urllib.request.urlopen(req, timeout=120) |
| result = json.loads(resp.read()) |
| t = result.get("timings", {}) |
| m = result["choices"][0]["message"] |
| tc = bool(m.get("tool_calls")) |
| print(f"{name:30s} {t.get('prompt_per_second',0):7.1f} p/s {t.get('predicted_per_second',0):6.1f} g/s tc={tc} in={t.get('prompt_n',0)} out={t.get('predicted_n',0)}") |
| except Exception as e: |
| print(f"{name:30s} ERROR: {e}") |
|
|
| print(f"{'Test':30s} {'Prompt':>7s} {'Gen':>6s} {'Tool':>4s} {'In':>4s} {'Out':>4s}") |
| print("-" * 80) |
|
|
| test("Short + tools", [{"role": "user", "content": "Navigate to google.com"}]) |
| test("Medium + tools", [{"role": "system", "content": "You are a browser agent. Use tools."}, {"role": "user", "content": "Go to duckduckgo and search weather Dubai"}]) |
| FILLER = "This is padding text for benchmarking context length impact. " * 100 |
| test("Long ~2K + tools", [{"role": "system", "content": "Agent. " + FILLER[:2000]}, {"role": "user", "content": "Navigate to google.com"}]) |
| test("Very long ~4K + tools", [{"role": "system", "content": "Agent. " + FILLER}, {"role": "user", "content": "Navigate to google.com"}]) |
| test("Short no tools", [{"role": "user", "content": "What is 2+2? Answer briefly."}], tools=False) |
|
|