Spaces:

BrainDrive
/

Therapy-Model-Evaluator

Sleeping

App Files Files Community

navaneethkrishnan commited on Jul 29, 2025

Commit

5333591

verified ·

1 Parent(s): 95a0abc

Upload 3 files

Browse files

Files changed (3) hide show

app.py +7 -0
requirements.txt +4 -0
ui.py +50 -0

app.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import gradio as gr
+from ui import create_app
+# HF Spaces entry point
+if __name__ == "__main__":
+    demo = create_app()
+    demo.launch(show_error=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==4.44.0
+openai>=1.30.0
+anthropic>=0.34.0
+pandas==2.2.3

ui.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import gradio as gr
+from src.evaluation import evaluate_with_judges
+def create_app():
+    # Load prompt template
+    try:
+        with open("prompts/carelock.txt", "r", encoding="utf-8") as f:
+            PROMPT_TEMPLATE = f.read()
+    except FileNotFoundError:
+        raise FileNotFoundError("carelock.txt not found in repository root. Please upload it.")
+    from src.api_clients import BACKENDS
+    with gr.Blocks(title="Therapist LLM Evaluator – Care-Lock") as app:
+        gr.Markdown("## 🧠 Therapist LLM Evaluator – Care-Lock Variant")
+        convo = gr.Textbox(lines=12, label="Paste Full Conversation")
+        models = gr.CheckboxGroup(
+            list(BACKENDS.keys()),
+            value=list(BACKENDS.keys()),
+            label="Evaluator Models"
+        )
+        variant = gr.Radio(["Care-Lock"], value="Care-Lock", label="Variant")
+        temp = gr.Slider(0.0, 1.5, step=0.1, value=0.0, label="Temperature")
+        weight_labels = [
+            "Empathy", "Emotional Relevance", "Tone", "Boundary Awareness",
+            "Supportiveness", "Ethical Safety", "Clarity", "Consistency",
+            "Self-Awareness", "Adaptability"
+        ]
+        sliders = [
+            gr.Slider(0, 1, step=0.01, value=d, label=l)
+            for l, d in zip(weight_labels,
+                            [0.2, 0.15, 0.10, 0.10, 0.10, 0.10, 0.05, 0.05, 0.05, 0.10])
+        ]
+        generate = gr.Button("🔍 Generate Evaluation")
+        metrics_table = gr.DataFrame(label="Metrics by Model (with Total)")
+        comments_json = gr.JSON(label="Parsed JSON per Model")
+        tokens_json = gr.JSON(label="Tokens Used per Model")
+        pros_json = gr.JSON(label="Pros per Model")
+        cons_json = gr.JSON(label="Cons per Model")
+        summary_json = gr.JSON(label="Summary per Model")
+        file_out = gr.File(label="Download Combined JSON")
+        generate.click(
+            fn=lambda *args: evaluate_with_judges(*args, prompt_template=PROMPT_TEMPLATE),
+            inputs=[convo, models, variant, *sliders, temp],
+            outputs=[
+                metrics_table, comments_json, tokens_json,
+                pros_json, cons_json, summary_json, file_out
+            ]
+        )
+    return app