Spaces:
Sleeping
Sleeping
| """Compare prompt variants and models on one evocative fixture. | |
| Runs (model x baseline/refined) combos and prints each interpretation + films | |
| so we can judge whether the refined prompt earns its place as default, and | |
| whether 12B is worth the latency over E4B. | |
| .venv/bin/python -m scripts.ab_prompt | |
| """ | |
| import os | |
| import time | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| from scripts.smoke_test import moody_jpeg # noqa: E402 | |
| # One layered fixture: warm dusk window + a personal line. | |
| IMAGES = [ | |
| moody_jpeg((22, 26, 44), (96, 72, 58), blob=(255, 198, 120, 64)), | |
| moody_jpeg((12, 14, 18), (44, 52, 70)), | |
| ] | |
| FRAGMENT = "couldn't sleep again, the streetlight through the curtain" | |
| COMBOS = [ | |
| ("google/gemma-4-e4b", False), | |
| ("google/gemma-4-e4b", True), | |
| ("google/gemma-4-12b", False), | |
| ("google/gemma-4-12b", True), | |
| ] | |
| def run_combo(model: str, refined: bool) -> None: | |
| os.environ["CHALCHITRA_MODEL"] = model | |
| # One direct call per combo (no auto-retry) so baseline stays baseline. | |
| from backend.oracle import MAX_TOKENS, _build_content, _extract_json, _validate | |
| from backend.prompt import system_prompt | |
| from backend.providers import get_provider | |
| label = f"{model.split('/')[-1]} | {'refined' if refined else 'baseline'}" | |
| print(f"\n{'='*74}\n{label}\n{'-'*74}") | |
| content = _build_content(IMAGES, FRAGMENT, []) | |
| t0 = time.time() | |
| try: | |
| raw = get_provider().complete(system_prompt(refined=refined), content, MAX_TOKENS) | |
| r = _validate(_extract_json(raw)) | |
| except Exception as e: # noqa: BLE001 | |
| print(f" ERROR: {e}") | |
| return | |
| dt = time.time() - t0 | |
| print(r["interpretation"]) | |
| print("\nfilms: " + " · ".join(f"{f['title']} ({f['year']})" for f in r["films"])) | |
| print(f"[{dt:.1f}s]") | |
| def main() -> None: | |
| print(f"fixture fragment: {FRAGMENT!r}") | |
| for model, refined in COMBOS: | |
| run_combo(model, refined) | |
| if __name__ == "__main__": | |
| main() | |