chalchitra / scripts /ab_live.py
ajit3259's picture
test: add live-Space A/B harness for baseline vs refined prompt
91467c7
Raw
History Blame Contribute Delete
1.92 kB
"""A/B the baseline vs refined prompt against the LIVE Space (Qwen2.5-VL).
Encodes local image files and POSTs each set to /api/interpret twice — once with
the baseline prompt, once refined — using the per-request `refined` override, so
both variants see the exact same image on the same model. Prints them to compare.
Usage:
.venv/bin/python -m scripts.ab_live IMG [IMG ...] [--fragment "a line"]
Up to 4 images make one "moment".
"""
import argparse
import base64
import mimetypes
import sys
import time
import httpx
SPACE = "https://build-small-hackathon-chalchitra.hf.space/api/interpret"
def to_data_url(path: str) -> str:
mime = mimetypes.guess_type(path)[0] or "image/jpeg"
with open(path, "rb") as f:
return f"data:{mime};base64," + base64.b64encode(f.read()).decode()
def run(images, fragment, refined):
body = {"images": images, "fragment": fragment, "refined": refined}
t0 = time.time()
r = httpx.post(SPACE, json=body, timeout=600)
dt = time.time() - t0
if r.status_code != 200:
return None, dt, f"HTTP {r.status_code}: {r.text[:200]}"
return r.json(), dt, None
def show(label, data, dt, err):
print(f"\n{'='*74}\n{label} [{dt:.1f}s]\n{'-'*74}")
if err:
print(f" ERROR: {err}")
return
print(data["interpretation"])
print("\nfilms: " + " · ".join(f"{f['title']} ({f['year']})" for f in data["films"]))
def main():
ap = argparse.ArgumentParser()
ap.add_argument("images", nargs="+")
ap.add_argument("--fragment", default="")
args = ap.parse_args()
images = [to_data_url(p) for p in args.images[:4]]
print(f"{len(images)} image(s); fragment={args.fragment!r}\nlive: {SPACE}")
for refined in (False, True):
data, dt, err = run(images, args.fragment, refined)
show("REFINED" if refined else "BASELINE", data, dt, err)
if __name__ == "__main__":
main()