navaneethkrishnan commited on
Commit
b2171fc
·
verified ·
1 Parent(s): ca7e448

Upload 3 files

Browse files
Files changed (3) hide show
  1. src/api_clients.py +117 -0
  2. src/conversation.py +20 -0
  3. src/evaluation.py +82 -0
src/api_clients.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import anthropic
3
+ import openai
4
+ from openai import OpenAI
5
+ from anthropic import Anthropic
6
+
7
+ # ✅ Debug: log versions and proxy env vars
8
+ print(f"Anthropic version: {anthropic.__version__}")
9
+ print(f"OpenAI version: {openai.__version__}")
10
+ print(f"Proxy env vars before cleanup: HTTP_PROXY={os.getenv('HTTP_PROXY')}, HTTPS_PROXY={os.getenv('HTTPS_PROXY')}, NO_PROXY={os.getenv('NO_PROXY')}")
11
+
12
+ # ✅ Forcefully disable proxies for HF Spaces
13
+ os.environ["HTTP_PROXY"] = ""
14
+ os.environ["HTTPS_PROXY"] = ""
15
+ os.environ["NO_PROXY"] = "*"
16
+
17
+ # ✅ Patch any client constructors to ignore 'proxies'
18
+ def safe_init(client_cls):
19
+ """Wrap client __init__ to strip 'proxies' kwarg injected by Spaces."""
20
+ orig_init = client_cls.__init__
21
+ def wrapped_init(self, *args, **kwargs):
22
+ if "proxies" in kwargs:
23
+ print(f"[Patch] Stripped unexpected 'proxies' from {client_cls.__name__}")
24
+ kwargs.pop("proxies", None)
25
+ return orig_init(self, *args, **kwargs)
26
+ client_cls.__init__ = wrapped_init
27
+
28
+ # Apply patch to both clients
29
+ safe_init(OpenAI)
30
+ safe_init(Anthropic)
31
+
32
+ def init_clients():
33
+ """Initialize API clients using HF Spaces Repository Secrets."""
34
+ try:
35
+ openai_key = os.getenv("OPENAI_API_KEY")
36
+ anthropic_key = os.getenv("ANTHROPIC_API_KEY")
37
+ deepseek_key = os.getenv("DEEPSEEK_API_KEY")
38
+
39
+ if not all([openai_key, anthropic_key, deepseek_key]):
40
+ raise ValueError("Missing one or more API keys in HF Spaces Repository Secrets.")
41
+
42
+ # ✅ Initialize OpenAI client
43
+ openai_client = OpenAI(api_key=openai_key)
44
+
45
+ # ✅ Initialize Anthropic client
46
+ anthropic_client = Anthropic(api_key=anthropic_key)
47
+
48
+ # ✅ Initialize DeepSeek client (via OpenAI interface)
49
+ deepseek_client = OpenAI(
50
+ api_key=deepseek_key,
51
+ base_url="https://api.deepseek.com/v1"
52
+ )
53
+
54
+ return openai_client, anthropic_client, deepseek_client
55
+
56
+ except Exception as e:
57
+ raise Exception(f"Failed to initialize API clients: {str(e)}")
58
+
59
+
60
+ def gpt4_mini_backend(system_msg, user_prompt, temperature):
61
+ """Call GPT-4o Mini API."""
62
+ openai_client, _, _ = init_clients()
63
+ try:
64
+ r = openai_client.chat.completions.create(
65
+ model="gpt-4o-mini",
66
+ messages=[
67
+ {"role": "system", "content": system_msg},
68
+ {"role": "user", "content": user_prompt}
69
+ ],
70
+ temperature=temperature
71
+ )
72
+ return r.choices[0].message.content, r.usage.total_tokens
73
+ except Exception as e:
74
+ raise Exception(f"GPT-4o-mini error: {str(e)}")
75
+
76
+
77
+ def anthropic_backend(system_msg, user_prompt, temperature):
78
+ """Call Anthropic Claude API."""
79
+ _, anthropic_client, _ = init_clients()
80
+ try:
81
+ r = anthropic_client.messages.create(
82
+ model="claude-3-5-sonnet-20241022",
83
+ system=system_msg,
84
+ messages=[{"role": "user", "content": user_prompt}],
85
+ max_tokens=2000,
86
+ temperature=temperature
87
+ )
88
+ text = r.content[0].text.strip()
89
+ toks = r.usage.input_tokens + r.usage.output_tokens
90
+ return text, toks
91
+ except Exception as e:
92
+ raise Exception(f"Anthropic error: {str(e)}")
93
+
94
+
95
+ def deepseek_backend(system_msg, user_prompt, temperature):
96
+ """Call DeepSeek API."""
97
+ _, _, deepseek_client = init_clients()
98
+ try:
99
+ r = deepseek_client.chat.completions.create(
100
+ model="deepseek-chat",
101
+ messages=[
102
+ {"role": "system", "content": system_msg},
103
+ {"role": "user", "content": user_prompt}
104
+ ],
105
+ temperature=temperature
106
+ )
107
+ return r.choices[0].message.content, r.usage.total_tokens
108
+ except Exception as e:
109
+ raise Exception(f"DeepSeek error: {str(e)}")
110
+
111
+
112
+ # ✅ Register backends
113
+ BACKENDS = {
114
+ "GPT-4o Mini": gpt4_mini_backend,
115
+ "Claude 3.5 Sonnet": anthropic_backend,
116
+ "DeepSeek Chat": deepseek_backend
117
+ }
src/conversation.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.api_clients import init_clients
2
+
3
+ def structure_conversation(raw_text):
4
+ _, anthropic_client, _ = init_clients()
5
+ formatter_prompt = (
6
+ "Convert this dialogue into a turn-by-turn transcript where each line "
7
+ "starts with 'HUMAN:' or 'AI:'. Do not add any other commentary.\n\n"
8
+ + raw_text
9
+ )
10
+ try:
11
+ resp = anthropic_client.messages.create(
12
+ model="claude-3-5-sonnet-20241022",
13
+ system="You are a conversation formatter.",
14
+ messages=[{"role": "user", "content": formatter_prompt}],
15
+ max_tokens=1000,
16
+ temperature=0.0
17
+ )
18
+ return resp.content[0].text.strip()
19
+ except Exception as e:
20
+ raise Exception(f"Error in structuring conversation: {str(e)}")
src/evaluation.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+ import datetime
4
+ import pandas as pd
5
+ from src.api_clients import BACKENDS
6
+
7
+ def split_json_objects(s):
8
+ objs, depth, start = [], 0, None
9
+ for i, ch in enumerate(s):
10
+ if ch == "{":
11
+ if depth == 0:
12
+ start = i
13
+ depth += 1
14
+ elif ch == "}":
15
+ depth -= 1
16
+ if depth == 0 and start is not None:
17
+ objs.append(s[start:i+1])
18
+ return objs
19
+
20
+ def evaluate_with_judges(conversation, selected_models, variant, *weights_and_temp, prompt_template):
21
+ weights, temperature = list(weights_and_temp[:-1]), weights_and_temp[-1]
22
+ if not conversation.strip():
23
+ raise ValueError("Conversation input is empty.")
24
+ from src.conversation import structure_conversation
25
+ structured = structure_conversation(conversation)
26
+ system_msg = (
27
+ "You are Judge-Care-Lock, a rigorous evaluator of AI-therapist dialogues.\n"
28
+ "1. Use ONLY the transcript—quote it for every decision.\n"
29
+ "2. Apply the multi-layer rubric exactly; do NOT invent scales.\n"
30
+ "3. Return valid JSON matching the schema; no extra text."
31
+ )
32
+ user_prompt = prompt_template.replace("{CONVERSATION}", structured)
33
+
34
+ metrics_rows = []
35
+ comments_map = {}
36
+ tokens_map = {}
37
+ pros_map = {}
38
+ cons_map = {}
39
+ summary_map = {}
40
+
41
+ for model_name in selected_models:
42
+ fn = BACKENDS[model_name]
43
+ raw, toks = fn(system_msg, user_prompt, temperature)
44
+ tokens_map[model_name] = toks
45
+
46
+ clean = re.sub(r"^```(?:json)?\s*|\s*```$", "", raw, flags=re.MULTILINE).strip()
47
+ objs = split_json_objects(clean)
48
+ if not objs:
49
+ raise ValueError(f"No valid JSON from {model_name}:\n{clean}")
50
+ try:
51
+ parsed = json.loads(objs[0])
52
+ except json.JSONDecodeError as e:
53
+ raise ValueError(f"Invalid JSON from {model_name}: {str(e)}")
54
+
55
+ row = {"Model": model_name}
56
+ total_score = 0.0
57
+ for idx, (m, data) in enumerate(parsed["metrics"].items()):
58
+ score = data.get("score", 0.0)
59
+ row[m] = score
60
+ total_score += score * weights[idx]
61
+ row["Total"] = round(total_score, 2)
62
+ metrics_rows.append(row)
63
+ comments_map[model_name] = parsed
64
+ pros_map[model_name] = parsed.get("positive", [])
65
+ cons_map[model_name] = parsed.get("negative", [])
66
+ summary_map[model_name] = parsed.get("summary", "")
67
+
68
+ ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
69
+ filename = f"/tmp/carelock_eval_{ts}.json"
70
+ combined = {
71
+ "metrics_table": metrics_rows,
72
+ "parsed_per_model": comments_map,
73
+ "tokens_per_model": tokens_map,
74
+ "pros_per_model": pros_map,
75
+ "cons_per_model": cons_map,
76
+ "summary_per_model": summary_map
77
+ }
78
+ with open(filename, "w", encoding="utf-8") as f:
79
+ json.dump(combined, f, indent=2)
80
+
81
+ return (pd.DataFrame(metrics_rows), comments_map, tokens_map,
82
+ pros_map, cons_map, summary_map, filename)