File size: 4,819 Bytes
1db7196 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | import os, json
def return_prompts(reference_summary, generated_summary, subclaims_json, difficulty_level):
prompt=f'''
You are a **medical summarization quality evaluator**.
Your goal is to decide whether the inclusion or omission of each subclaim in the generated summary is *reasonable*, given the target readability level.
---
### **Input**
```
Readability Level: {difficulty_level}
Reference Summary:
{reference_summary}
Generated Summary:
{generated_summary}
Subclaims with Support Results:
{subclaims_json}
```
---
### **Task**
For each subclaim:
1. Read `result`:
* `1` = the subclaim is supported or clearly mentioned in the generated summary.
* `0` = the subclaim is missing or not supported.
2. Based on readability level and medical relevance, decide whether this inclusion/omission is **reasonable**, **partially reasonable**, or **unreasonable**.
3. Provide a short justification (1–2 sentences) explaining your reasoning.
---
### **Output Format**
Return structured JSON:
```json
{{
"readability_level": "<easy/intermediate/hard>",
"evaluations": [
{{
"subclaim_id": <id>,
"subclaim_text": "<text>",
"result": <0 or 1>,
"reasonableness": "<reasonable | partially_reasonable | unreasonable>",
"justification": "<short explanation>"
}},
...
]
}}
```
---
### **Evaluation Guidelines**
| Readability Level | Reasonable Omission | Unreasonable Omission |
| ----------------- | ------------------------------------------------------------ | ------------------------------------------------- |
| **Easy** | Technical, anatomical, quantitative, or procedural details. | Key clinical findings, diagnoses, or outcomes. |
| **Intermediate** | Minor imaging details or measurements. | Any main diagnostic finding or cause–effect link. |
| **Hard** | Very few omissions acceptable; mostly stylistic compression. | Any missing clinical or diagnostic information. |
'''
return prompt
from openai import OpenAI
file_path = "/home/mshahidul/api_new.json"
with open(file_path, "r") as file:
api_keys = json.load(file)
openai_api_key = api_keys.get("openai")
client = OpenAI(api_key=openai_api_key)
def openai_return(prompt):
response = client.chat.completions.create(
model="gpt-5",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
)
cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "")
return json.loads(cleaned_response)
import json
file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json"
with open(file_path, 'r') as f:
synthetic_data = json.load(f)
file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"
with open(file_path_qwen3_32B, 'r') as f:
qwen3_32B_results = json.load(f)
# dict_keys(['id', 'full_text', 'ref_summary', 'readability_versions'])
# print(f"Full text: {synthetic_data[0]['full_text']}")
res=[]
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/syn_data_resonability_check_20_gpt5.json"
if os.path.exists(save_path):
with open(save_path, 'r') as f:
res = json.load(f)
exist_check_ids = set([(item['id'], item['difficulty_level']) for item in res])
print(f"Resuming from {len(res)} entries")
import tqdm
for ind in tqdm.tqdm(range(0,20)):
print(f"Processing index: {ind}")
for version in ["easy", "intermediate", "hard"]:
if (synthetic_data[ind]['id'], version) in exist_check_ids:
print(f"Skipping {synthetic_data[ind]['id']} {version}")
continue
ref_summary = (f"{synthetic_data[ind]['ref_summary']['text']}")
generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}")
subclaims_results = (f"{qwen3_32B_results[ind]['completeness']['results']}")
try:
prompt = return_prompts(ref_summary, generated_summary, subclaims_results, version)
res.append({
"id": synthetic_data[ind]['id'],
"difficulty_level": version,
"reasonableness": openai_return(prompt)
})
if len(res)%2==0:
print(f"Completed {len(res)} out of 300")
with open(save_path, 'w') as outfile:
json.dump(res, outfile, indent=2)
except Exception as e:
print(f"Error at {ind} {version}: {e}")
# print(prompt)
# assert False
with open(save_path, 'w') as outfile:
json.dump(res, outfile, indent=2) |