File size: 4,819 Bytes
1db7196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os, json
def return_prompts(reference_summary, generated_summary, subclaims_json, difficulty_level):
    prompt=f'''
You are a **medical summarization quality evaluator**.
Your goal is to decide whether the inclusion or omission of each subclaim in the generated summary is *reasonable*, given the target readability level.

---

### **Input**

```
Readability Level: {difficulty_level}

Reference Summary:
{reference_summary}

Generated Summary:
{generated_summary}

Subclaims with Support Results:
{subclaims_json}
```

---

### **Task**

For each subclaim:

1. Read `result`:

   * `1` = the subclaim is supported or clearly mentioned in the generated summary.
   * `0` = the subclaim is missing or not supported.

2. Based on readability level and medical relevance, decide whether this inclusion/omission is **reasonable**, **partially reasonable**, or **unreasonable**.

3. Provide a short justification (1–2 sentences) explaining your reasoning.

---

### **Output Format**

Return structured JSON:

```json
{{
  "readability_level": "<easy/intermediate/hard>",
  "evaluations": [
    {{
      "subclaim_id": <id>,
      "subclaim_text": "<text>",
      "result": <0 or 1>,
      "reasonableness": "<reasonable | partially_reasonable | unreasonable>",
      "justification": "<short explanation>"
    }},
    ...
  ]
}}
```

---

### **Evaluation Guidelines**

| Readability Level | Reasonable Omission                                          | Unreasonable Omission                             |
| ----------------- | ------------------------------------------------------------ | ------------------------------------------------- |
| **Easy**          | Technical, anatomical, quantitative, or procedural details.  | Key clinical findings, diagnoses, or outcomes.    |
| **Intermediate**  | Minor imaging details or measurements.                       | Any main diagnostic finding or cause–effect link. |
| **Hard**          | Very few omissions acceptable; mostly stylistic compression. | Any missing clinical or diagnostic information.   |

'''
    return prompt

from openai import OpenAI

file_path = "/home/mshahidul/api_new.json"
with open(file_path, "r") as file:
    api_keys = json.load(file)

openai_api_key = api_keys.get("openai")

client = OpenAI(api_key=openai_api_key)
def openai_return(prompt):
    response = client.chat.completions.create(
        model="gpt-5",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "")
    return json.loads(cleaned_response)

import json
file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json"

with open(file_path, 'r') as f:
    synthetic_data = json.load(f)

file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"

with open(file_path_qwen3_32B, 'r') as f:
    qwen3_32B_results = json.load(f)

# dict_keys(['id', 'full_text', 'ref_summary', 'readability_versions'])
# print(f"Full text: {synthetic_data[0]['full_text']}")
res=[]
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/syn_data_resonability_check_20_gpt5.json"
if os.path.exists(save_path):
    with open(save_path, 'r') as f:
        res = json.load(f)
exist_check_ids = set([(item['id'], item['difficulty_level']) for item in res])
print(f"Resuming from {len(res)} entries")
import tqdm
for ind in tqdm.tqdm(range(0,20)):
    print(f"Processing index: {ind}")
    for version in ["easy", "intermediate", "hard"]:
        if (synthetic_data[ind]['id'], version) in exist_check_ids:
            print(f"Skipping {synthetic_data[ind]['id']} {version}")
            continue
        ref_summary = (f"{synthetic_data[ind]['ref_summary']['text']}")
        generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}")
        subclaims_results = (f"{qwen3_32B_results[ind]['completeness']['results']}")
        try:
            prompt = return_prompts(ref_summary, generated_summary, subclaims_results, version)
            res.append({
                "id": synthetic_data[ind]['id'],
                "difficulty_level": version,
                "reasonableness": openai_return(prompt)
            })
            if len(res)%2==0:
                print(f"Completed {len(res)} out of 300")
                with open(save_path, 'w') as outfile:
                    json.dump(res, outfile, indent=2)
        except Exception as e:
            print(f"Error at {ind} {version}: {e}")
        # print(prompt)
        # assert False
with open(save_path, 'w') as outfile:
    json.dump(res, outfile, indent=2)