| import os |
| import json |
| import re |
| import concurrent.futures |
| from openai import OpenAI |
|
|
| class MedicalClaimVerifier: |
| def __init__(self): |
| |
| api_file = "/home/mshahidul/api_new.json" |
| with open(api_file, "r") as f: |
| api_keys = json.load(f) |
| self.api_key = api_keys["openai"] |
| |
| self.model_name = "gpt-5-nano" |
| self.client = OpenAI(api_key=self.api_key) |
|
|
| self.thresholds = { |
| "low": {"comp": 1.0, "cov": 0.3226}, |
| "intermediate": {"comp": 1.0, "cov": 0.4091}, |
| "proficient": {"comp": 1.0, "cov": 0.9347}, |
| } |
|
|
| def get_prompt(self,context,claim): |
| prompt = f""" |
| CONTEXT: |
| {context} |
| |
| CLAIM TO VERIFY: |
| {claim} |
| |
| INSTRUCTION: |
| Does the CONTEXT above provide enough evidence to support the CLAIM? |
| - Answer 'supported' if the claim is explicitly stated or logically followable. |
| - Answer 'not_supported' if the claim is missing, contradicts the text, or requires outside info. |
| |
| Output only one word: 'supported' or 'not_supported'. |
| """ |
| return prompt |
|
|
| def check_support_api(self, prompt): |
| try: |
| response = self.client.chat.completions.create( |
| model=self.model_name, |
| messages=[{"role": "user", "content": prompt}], |
| ) |
| res = response.choices[0].message.content.strip().lower() |
| return 1.0 if "supported" in res and "not_supported" not in res else 0.0 |
| except Exception: |
| return 0.0 |
|
|
| def evaluate_level(self, gen_text, gold_subs, full_subs): |
| if not gen_text or not gold_subs or not full_subs: |
| return 0.0, 0.0 |
| |
| |
| all_claims = gold_subs + full_subs |
| with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: |
| results = list(executor.map(self.check_support_api, [self.get_prompt(gen_text, s) for s in all_claims])) |
| |
| comp_results = results[:len(gold_subs)] |
| cov_results = results[len(gold_subs):] |
| |
| comp_score = sum(comp_results) / len(gold_subs) |
| cov_score = sum(cov_results) / len(full_subs) |
| return comp_score, cov_score |
|
|
| verifier = MedicalClaimVerifier() |
|
|
| def compute_score(data_source, solution_str, ground_truth, extra_info=None): |
| gold_subs = ground_truth.get('summary_subclaims', []) |
| full_subs = ground_truth.get('fulltext_subclaims', []) |
| |
| if not gold_subs or not full_subs: |
| return 0.0 |
|
|
| |
| try: |
| cleaned_str = solution_str.strip() |
| if "```json" in cleaned_str: |
| cleaned_str = cleaned_str.split("```json")[1].split("```")[0].strip() |
| elif "```" in cleaned_str: |
| cleaned_str = cleaned_str.split("```")[1].split("```")[0].strip() |
| data = json.loads(cleaned_str) |
| except Exception: |
| return -5.0 |
|
|
| levels = ["low", "intermediate", "proficient"] |
| scores = {} |
| |
| |
| for lvl in levels: |
| gen_text = data.get(f"{lvl}_health_literacy", "") |
| if not gen_text: |
| scores[lvl] = {"comp": 0.0, "cov": 0.0, "missing": True} |
| else: |
| comp, cov = verifier.evaluate_level(gen_text, gold_subs, full_subs) |
| scores[lvl] = {"comp": comp, "cov": cov, "missing": False} |
|
|
| |
| total_reward = 0.0 |
| |
| low_cov = scores["low"]["cov"] |
| int_cov = scores["intermediate"]["cov"] |
| pro_cov = scores["proficient"]["cov"] |
|
|
| |
| |
| hierarchy_penalty = 0.0 |
| if not (low_cov <= int_cov <= pro_cov): |
| hierarchy_penalty = -2.0 |
|
|
| for lvl in levels: |
| if scores[lvl]["missing"]: |
| total_reward -= 1.0 |
| continue |
|
|
| comp_s = scores[lvl]["comp"] |
| cov_s = scores[lvl]["cov"] |
| thresh = verifier.thresholds[lvl] |
|
|
| |
| |
| total_reward += (comp_s - thresh["comp"]) |
| total_reward += (cov_s - thresh["cov"]) |
|
|
| return total_reward + hierarchy_penalty |