Manojb
/

CUA_benchmark_local_small_models

Model card Files Files and versions

CUA_benchmark_local_small_models / bench /bench_quant.py

Manojb's picture

Upload folder using huggingface_hub

221ca5c verified about 1 month ago

history blame contribute delete

2 kB

	#!/usr/bin/env python3
	"""Benchmark Qwen uncensored Q4_K_M speed at different contexts."""
	import json, urllib.request, time

	URL = "http://localhost:8081/v1/chat/completions"
	TOOLS = [{"type": "function", "function": {"name": "browser_use", "description": "Browser",
	"parameters": {"type": "object", "properties": {"action": {"type": "string"}, "url": {"type": "string"}}, "required": ["action"]}}}]

	def test(name, messages, tools=True):
	data = {"model": "test", "stream": False, "messages": messages}
	if tools:
	data["tools"] = TOOLS
	req = urllib.request.Request(URL, data=json.dumps(data).encode(),
	headers={"Content-Type": "application/json"})
	try:
	resp = urllib.request.urlopen(req, timeout=120)
	result = json.loads(resp.read())
	t = result.get("timings", {})
	m = result["choices"][0]["message"]
	tc = bool(m.get("tool_calls"))
	print(f"{name:30s} {t.get('prompt_per_second',0):7.1f} p/s {t.get('predicted_per_second',0):6.1f} g/s tc={tc} in={t.get('prompt_n',0)} out={t.get('predicted_n',0)}")
	except Exception as e:
	print(f"{name:30s} ERROR: {e}")

	print(f"{'Test':30s} {'Prompt':>7s} {'Gen':>6s} {'Tool':>4s} {'In':>4s} {'Out':>4s}")
	print("-" * 80)

	test("Short + tools", [{"role": "user", "content": "Navigate to google.com"}])
	test("Medium + tools", [{"role": "system", "content": "You are a browser agent. Use tools."}, {"role": "user", "content": "Go to duckduckgo and search weather Dubai"}])
	FILLER = "This is padding text for benchmarking context length impact. " * 100
	test("Long ~2K + tools", [{"role": "system", "content": "Agent. " + FILLER[:2000]}, {"role": "user", "content": "Navigate to google.com"}])
	test("Very long ~4K + tools", [{"role": "system", "content": "Agent. " + FILLER}, {"role": "user", "content": "Navigate to google.com"}])
	test("Short no tools", [{"role": "user", "content": "What is 2+2? Answer briefly."}], tools=False)