File size: 6,143 Bytes
c7a6fe6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os, json
def return_promptst(reference_summary, generated_summary, subclaims_json, difficulty_level):
    prompt=f'''
            **SYSTEM / ROLE INSTRUCTION:**
            You are a **medical readability evaluator**.
            Your task is to judge whether omitted subclaims (those with `"result": 0"`) from a generated summary are *reasonably omitted* based on the intended **readability level**: *easy*, *intermediate*, or *hard*.
            You evaluate this from the standpoint of clarity, faithfulness, and readability goals.

            ---

            ### **READABILITY GUIDELINES**

            | Level            | Target Audience                          | Content Expectation                                             | Technical Detail Allowed                                         |
            | :--------------- | :--------------------------------------- | :-------------------------------------------------------------- | :--------------------------------------------------------------- |
            | **Easy**         | General public                           | Focus on main events, outcomes, and diagnoses in plain Spanish. | Minimal — avoid measurements, anatomy, and test results.         |
            | **Intermediate** | Educated lay readers or medical students | Include key findings and procedures in simplified form.         | Moderate — basic terms and causes allowed.                       |
            | **Hard**         | Medical professionals                    | Retain most technical information and precision.                | High — measurements, anatomy, and test interpretations expected. |

            ---

            ### **INPUT FIELDS**

            **Reference summary:**
            {reference_summary}

            **Generated summary ({difficulty_level}):**
            {generated_summary}

            **Subclaims and results:**
            {subclaims_json}

            ---

            ### **TASK INSTRUCTIONS**

            1. Focus on subclaims with `"result": 0"` (not supported by the generated summary).
            2. For each omitted subclaim:

            * Decide whether omission is **reasonable** given the readability level.
            * Label as: `"yes"`, `"no"`, or `"borderline"`.
            * Write a brief justification (1–2 sentences).
            3. After individual evaluations, assign a **reasonableness score (0–5)** using this scale:

            * **5** = All omissions appropriate for target readability.
            * **4** = Minor omissions could improve completeness.
            * **3** = Some omissions reduce understanding or medical clarity.
            * **2** = Many important omissions harm faithfulness.
            * **1** = Major omissions misrepresent case.
            * **0** = Summary fails to reflect key medical information.
            4. End with an **overall explanation (3–5 sentences)** describing:

            * The main reasoning behind the score.
            * Whether the summary fits its intended readability level.
            * Suggestions for improvement if needed.

            ---

            ### **OUTPUT FORMAT (strict JSON)**

            ```json
            {{
            "evaluation_table": [
                {{
                "id": <subclaim_id>,
                "subclaim": "<text>",
                "reasonable_omission": "<yes | no | borderline>",
                "explanation": "<short reason>"
                }}
            ],
            "reasonableness_score": <0-5>,
            "overall_explanation": "<concise paragraph>"
            }}
            ```
            '''
    return prompt

from openai import OpenAI

file_path = "/home/mshahidul/api_new.json"
with open(file_path, "r") as file:
    api_keys = json.load(file)

openai_api_key = api_keys.get("openai")

client = OpenAI(api_key=openai_api_key)
def openai_return(prompt):
    response = client.chat.completions.create(
        model="gpt-5-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "")
    return json.loads(cleaned_response)

import json
file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json"

with open(file_path, 'r') as f:
    synthetic_data = json.load(f)

file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"

with open(file_path_qwen3_32B, 'r') as f:
    qwen3_32B_results = json.load(f)

# dict_keys(['id', 'full_text', 'ref_summary', 'readability_versions'])
# print(f"Full text: {synthetic_data[0]['full_text']}")
res=[]
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/resonability_check_100_gpt5.json"
if os.path.exists(save_path):
    with open(save_path, 'r') as f:
        res = json.load(f)
print(f"Resuming from {len(res)} entries")
import tqdm
for ind in tqdm.tqdm(range(len(res),100)):
    print(f"Processing index: {ind}")
    for version in ["easy", "intermediate", "hard"]:
        ref_summary = (f"{synthetic_data[ind]['ref_summary']['text']}")
        generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}")
        subclaims_results = (f"{qwen3_32B_results[ind]['completeness']['results']}")
        try:
            prompt = return_promptst(ref_summary, generated_summary, subclaims_results, version)
            res.append({
                "id": synthetic_data[ind]['id'],
                "difficulty_level": version,
                "prompt": openai_return(prompt)
            })
            if len(res)%2==0:
                print(f"Completed {len(res)} out of 300")
                with open(save_path, 'w') as outfile:
                    json.dump(res, outfile, indent=2)
        except Exception as e:
            print(f"Error at {ind} {version}: {e}")
        # print(prompt)
        # assert False
with open(save_path, 'w') as outfile:
    json.dump(res, outfile, indent=2)