from backend import run_llm, generate_variants from evaluator import final_score def optimize_prompt(base_prompt: str, reference_answer: str = ""): variants = generate_variants(base_prompt) results = [] for p in variants: output = run_llm(p) score = final_score(output, reference_answer) results.append({ "prompt": p, "output": output, "score": score }) results.sort(key=lambda x: x["score"], reverse=True) return results