navaneethkrishnan commited on
Commit
5333591
·
verified ·
1 Parent(s): 95a0abc

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +7 -0
  2. requirements.txt +4 -0
  3. ui.py +50 -0
app.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from ui import create_app
3
+
4
+ # HF Spaces entry point
5
+ if __name__ == "__main__":
6
+ demo = create_app()
7
+ demo.launch(show_error=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==4.44.0
2
+ openai>=1.30.0
3
+ anthropic>=0.34.0
4
+ pandas==2.2.3
ui.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from src.evaluation import evaluate_with_judges
3
+
4
+ def create_app():
5
+ # Load prompt template
6
+ try:
7
+ with open("prompts/carelock.txt", "r", encoding="utf-8") as f:
8
+ PROMPT_TEMPLATE = f.read()
9
+ except FileNotFoundError:
10
+ raise FileNotFoundError("carelock.txt not found in repository root. Please upload it.")
11
+
12
+ from src.api_clients import BACKENDS
13
+ with gr.Blocks(title="Therapist LLM Evaluator – Care-Lock") as app:
14
+ gr.Markdown("## 🧠 Therapist LLM Evaluator – Care-Lock Variant")
15
+ convo = gr.Textbox(lines=12, label="Paste Full Conversation")
16
+ models = gr.CheckboxGroup(
17
+ list(BACKENDS.keys()),
18
+ value=list(BACKENDS.keys()),
19
+ label="Evaluator Models"
20
+ )
21
+ variant = gr.Radio(["Care-Lock"], value="Care-Lock", label="Variant")
22
+ temp = gr.Slider(0.0, 1.5, step=0.1, value=0.0, label="Temperature")
23
+ weight_labels = [
24
+ "Empathy", "Emotional Relevance", "Tone", "Boundary Awareness",
25
+ "Supportiveness", "Ethical Safety", "Clarity", "Consistency",
26
+ "Self-Awareness", "Adaptability"
27
+ ]
28
+ sliders = [
29
+ gr.Slider(0, 1, step=0.01, value=d, label=l)
30
+ for l, d in zip(weight_labels,
31
+ [0.2, 0.15, 0.10, 0.10, 0.10, 0.10, 0.05, 0.05, 0.05, 0.10])
32
+ ]
33
+ generate = gr.Button("🔍 Generate Evaluation")
34
+ metrics_table = gr.DataFrame(label="Metrics by Model (with Total)")
35
+ comments_json = gr.JSON(label="Parsed JSON per Model")
36
+ tokens_json = gr.JSON(label="Tokens Used per Model")
37
+ pros_json = gr.JSON(label="Pros per Model")
38
+ cons_json = gr.JSON(label="Cons per Model")
39
+ summary_json = gr.JSON(label="Summary per Model")
40
+ file_out = gr.File(label="Download Combined JSON")
41
+
42
+ generate.click(
43
+ fn=lambda *args: evaluate_with_judges(*args, prompt_template=PROMPT_TEMPLATE),
44
+ inputs=[convo, models, variant, *sliders, temp],
45
+ outputs=[
46
+ metrics_table, comments_json, tokens_json,
47
+ pros_json, cons_json, summary_json, file_out
48
+ ]
49
+ )
50
+ return app