import re import tempfile import gradio as gr from db import init_db, save_evaluation, export_to_excel from providers import ( MODEL_NAMES, call_model, call_custom_endpoint, MODEL_REGISTRY, get_model_defaults, ) # --------------------------------------------------------------------------- # Initialise database on import # --------------------------------------------------------------------------- init_db() # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- URL_RE = re.compile(r"^https?://\S+$") def _sanitize_nickname(nick: str) -> str: return nick.strip()[:50] def _validate_url(url: str) -> bool: return bool(URL_RE.match(url.strip())) def on_model_select(display_name: str): """When user picks a model from dropdown, populate base_url and model_id.""" base_url, model_id = get_model_defaults(display_name) return base_url, model_id # --------------------------------------------------------------------------- # Event handlers # --------------------------------------------------------------------------- def send_to_both( prompt: str, left_url: str, left_model: str, left_key: str, right_name: str, right_base_url: str, right_model_id: str, right_key: str, ): """Call both models and return their responses.""" if not prompt or not prompt.strip(): raise gr.Error("Please enter a prompt.") # Left — Dify endpoint left_response = "" left_err = "" if left_url and left_url.strip(): if not _validate_url(left_url): left_err = "⚠️ Invalid URL format. Use http:// or https://." else: try: left_response = call_custom_endpoint( left_url.strip(), left_model.strip() or "default", prompt, left_key ) except Exception as e: left_err = f"⚠️ Left model error: {e}" # Right — registry model (with optional user overrides) right_response = "" right_err = "" try: right_response = call_model( right_name, prompt, right_key, right_base_url, right_model_id ) except Exception as e: right_err = f"⚠️ Right model error: {e}" return ( left_response if not left_err else left_err, right_response if not right_err else right_err, ) def submit_evaluation( nickname: str, prompt: str, left_url: str, left_model: str, left_response: str, left_comment: str, left_grade: int, right_name: str, right_model_id: str, right_response: str, right_comment: str, right_grade: int, ): """Validate and persist an evaluation.""" nickname = _sanitize_nickname(nickname) if not nickname: raise gr.Error("Nickname is required.") if not prompt or not prompt.strip(): raise gr.Error("Prompt is empty — send a prompt first.") if not left_response.strip() and not right_response.strip(): raise gr.Error("No responses to evaluate — send a prompt first.") if left_grade < 1 or left_grade > 10: raise gr.Error("Left grade must be between 1 and 10.") if right_grade < 1 or right_grade > 10: raise gr.Error("Right grade must be between 1 and 10.") entry = MODEL_REGISTRY.get(right_name, {}) right_provider = entry.get("provider", "unknown") save_evaluation( nickname=nickname, prompt=prompt, left_model_name=left_model.strip() or "custom", left_model_endpoint=left_url.strip(), left_response=left_response, left_comment=left_comment, left_grade=int(left_grade), right_model_name=right_model_id.strip() or right_name, right_provider=right_provider, right_response=right_response, right_comment=right_comment, right_grade=int(right_grade), ) gr.Info("✅ Evaluation saved!") def download_report(): """Export all evaluations to a temp .xlsx and return as a downloadable file.""" tmp = tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) export_to_excel(tmp.name) return tmp.name # --------------------------------------------------------------------------- # Gradio Blocks UI # --------------------------------------------------------------------------- # Pre-compute initial defaults for first model _init_base_url, _init_model_id = get_model_defaults(MODEL_NAMES[0]) with gr.Blocks(title="LLM Compare") as demo: gr.Markdown("# 🔍 LLM Compare\nSide-by-side comparison: your Dify app vs reference models.") # ---- Top bar: nickname --------------------------------------------------- with gr.Row(): nickname = gr.Textbox( label="Your Nickname", placeholder="Enter a nickname (required)", scale=2, ) # ---- Prompt area --------------------------------------------------------- with gr.Row(): prompt = gr.Textbox( label="Prompt", placeholder="Type your prompt here…", lines=4, scale=4, ) send_btn = gr.Button("🚀 Send to Both", variant="primary", scale=1) # ---- Two-column layout --------------------------------------------------- with gr.Row(equal_height=True): # ---- LEFT: Dify model ------------------------------------------------ with gr.Column(): gr.Markdown("### 🧪 Your Model (Dify Endpoint)") left_url = gr.Textbox( label="Dify API Base URL", placeholder="https://api.dify.ai/v1", ) left_model = gr.Textbox( label="App Name (for display only)", placeholder="e.g. my-dify-app", ) left_key = gr.Textbox( label="Dify Secret Key", placeholder="app-xxxxxxxxxxxx", type="password", ) left_response = gr.Textbox( label="Response", lines=12, interactive=False, ) left_comment = gr.Textbox( label="Comment", placeholder="Your thoughts on this response…", lines=2, ) left_grade = gr.Slider( minimum=1, maximum=10, step=1, value=5, label="Grade (1–10)", ) # ---- RIGHT: reference model ------------------------------------------ with gr.Column(): gr.Markdown("### 📚 Reference Model") right_name = gr.Dropdown( choices=MODEL_NAMES, value=MODEL_NAMES[0], label="Select Model", ) right_base_url = gr.Textbox( label="Base URL (auto-filled, editable)", value=_init_base_url, placeholder="e.g. https://api.openai.com/v1", ) right_model_id = gr.Textbox( label="Model ID (auto-filled, editable)", value=_init_model_id, placeholder="e.g. gpt-4o", ) right_key = gr.Textbox( label="API Key (optional — uses env default)", placeholder="Leave blank to use default key", type="password", ) right_response = gr.Textbox( label="Response", lines=12, interactive=False, ) right_comment = gr.Textbox( label="Comment", placeholder="Your thoughts on this response…", lines=2, ) right_grade = gr.Slider( minimum=1, maximum=10, step=1, value=5, label="Grade (1–10)", ) # ---- Action buttons ------------------------------------------------------ with gr.Row(): submit_btn = gr.Button("💾 Submit Evaluation", variant="primary") download_btn = gr.Button("📥 Download Report (.xlsx)") report_file = gr.File(label="Report", visible=False) # ---- Wiring -------------------------------------------------------------- # Auto-fill base_url and model_id when dropdown changes right_name.change( fn=on_model_select, inputs=[right_name], outputs=[right_base_url, right_model_id], ) send_btn.click( fn=send_to_both, inputs=[ prompt, left_url, left_model, left_key, right_name, right_base_url, right_model_id, right_key, ], outputs=[left_response, right_response], ) submit_btn.click( fn=submit_evaluation, inputs=[ nickname, prompt, left_url, left_model, left_response, left_comment, left_grade, right_name, right_model_id, right_response, right_comment, right_grade, ], outputs=[], ) download_btn.click( fn=download_report, inputs=[], outputs=[report_file], ).then(lambda: gr.update(visible=True), outputs=[report_file]) if __name__ == "__main__": demo.launch(theme=gr.themes.Soft())