Spaces:

build-small-hackathon
/

chalchitra

Sleeping

App Files Files Community

chalchitra / scripts /ab_prompt.py

ajit3259

test: add prompt/model A/B comparison script

b862a8b 22 days ago

Raw

History Blame Contribute Delete

1.97 kB

	"""Compare prompt variants and models on one evocative fixture.

	Runs (model x baseline/refined) combos and prints each interpretation + films
	so we can judge whether the refined prompt earns its place as default, and
	whether 12B is worth the latency over E4B.

	.venv/bin/python -m scripts.ab_prompt
	"""

	import os
	import time

	from dotenv import load_dotenv

	load_dotenv()

	from scripts.smoke_test import moody_jpeg # noqa: E402

	# One layered fixture: warm dusk window + a personal line.
	IMAGES = [
	moody_jpeg((22, 26, 44), (96, 72, 58), blob=(255, 198, 120, 64)),
	moody_jpeg((12, 14, 18), (44, 52, 70)),
	]
	FRAGMENT = "couldn't sleep again, the streetlight through the curtain"

	COMBOS = [
	("google/gemma-4-e4b", False),
	("google/gemma-4-e4b", True),
	("google/gemma-4-12b", False),
	("google/gemma-4-12b", True),
	]


	def run_combo(model: str, refined: bool) -> None:
	os.environ["CHALCHITRA_MODEL"] = model
	# One direct call per combo (no auto-retry) so baseline stays baseline.
	from backend.oracle import MAX_TOKENS, _build_content, _extract_json, _validate
	from backend.prompt import system_prompt
	from backend.providers import get_provider

	label = f"{model.split('/')[-1]} \| {'refined' if refined else 'baseline'}"
	print(f"\n{'='74}\n{label}\n{'-'74}")
	content = _build_content(IMAGES, FRAGMENT, [])
	t0 = time.time()
	try:
	raw = get_provider().complete(system_prompt(refined=refined), content, MAX_TOKENS)
	r = _validate(_extract_json(raw))
	except Exception as e: # noqa: BLE001
	print(f" ERROR: {e}")
	return
	dt = time.time() - t0
	print(r["interpretation"])
	print("\nfilms: " + " · ".join(f"{f['title']} ({f['year']})" for f in r["films"]))
	print(f"[{dt:.1f}s]")


	def main() -> None:
	print(f"fixture fragment: {FRAGMENT!r}")
	for model, refined in COMBOS:
	run_combo(model, refined)


	if __name__ == "__main__":
	main()