File size: 2,220 Bytes
5333591
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import gradio as gr
from src.evaluation import evaluate_with_judges

def create_app():
    # Load prompt template
    try:
        with open("prompts/carelock.txt", "r", encoding="utf-8") as f:
            PROMPT_TEMPLATE = f.read()
    except FileNotFoundError:
        raise FileNotFoundError("carelock.txt not found in repository root. Please upload it.")

    from src.api_clients import BACKENDS
    with gr.Blocks(title="Therapist LLM Evaluator – Care-Lock") as app:
        gr.Markdown("## 🧠 Therapist LLM Evaluator – Care-Lock Variant")
        convo = gr.Textbox(lines=12, label="Paste Full Conversation")
        models = gr.CheckboxGroup(
            list(BACKENDS.keys()),
            value=list(BACKENDS.keys()),
            label="Evaluator Models"
        )
        variant = gr.Radio(["Care-Lock"], value="Care-Lock", label="Variant")
        temp = gr.Slider(0.0, 1.5, step=0.1, value=0.0, label="Temperature")
        weight_labels = [
            "Empathy", "Emotional Relevance", "Tone", "Boundary Awareness",
            "Supportiveness", "Ethical Safety", "Clarity", "Consistency",
            "Self-Awareness", "Adaptability"
        ]
        sliders = [
            gr.Slider(0, 1, step=0.01, value=d, label=l)
            for l, d in zip(weight_labels,
                            [0.2, 0.15, 0.10, 0.10, 0.10, 0.10, 0.05, 0.05, 0.05, 0.10])
        ]
        generate = gr.Button("🔍 Generate Evaluation")
        metrics_table = gr.DataFrame(label="Metrics by Model (with Total)")
        comments_json = gr.JSON(label="Parsed JSON per Model")
        tokens_json = gr.JSON(label="Tokens Used per Model")
        pros_json = gr.JSON(label="Pros per Model")
        cons_json = gr.JSON(label="Cons per Model")
        summary_json = gr.JSON(label="Summary per Model")
        file_out = gr.File(label="Download Combined JSON")

        generate.click(
            fn=lambda *args: evaluate_with_judges(*args, prompt_template=PROMPT_TEMPLATE),
            inputs=[convo, models, variant, *sliders, temp],
            outputs=[
                metrics_table, comments_json, tokens_json,
                pros_json, cons_json, summary_json, file_out
            ]
        )
    return app