File size: 5,775 Bytes
c7a6fe6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import os
import json
import tqdm
from openai import OpenAI

# =====================================================
# 1️⃣  Setup: Load API key, initialize client
# =====================================================

api_file = "/home/mshahidul/api_new.json"
with open(api_file, "r") as f:
    api_keys = json.load(f)
openai_api_key = api_keys["openai"]

client = OpenAI(api_key=openai_api_key)


# =====================================================
# 2️⃣  OpenAI call helper
# =====================================================

def openai_return(prompt, model="gpt-5"):
    """Send a prompt to GPT and parse JSON."""
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    content = response.choices[0].message.content.strip()
    cleaned = content.replace("```json", "").replace("```", "").strip()
    try:
        return json.loads(cleaned)
    except json.JSONDecodeError:
        print("⚠️ JSON parse failed — storing raw text.")
        return cleaned


# =====================================================
# 3️⃣  Multi‑subclaim attribution prompt builder
# =====================================================

def return_prompts_attribution_multi(reference_full_text, generated_summary, subclaims_json, difficulty_level):
    return f"""
### **SYSTEM / ROLE INSTRUCTION**

You are a **medical factuality and attribution evaluator**.
You will analyze all subclaims found in a generated summary, each labeled with a `"result"` flag:
- `1` = supported by the reference
- `0` = unsupported by the reference

Your main task is to **evaluate only the unsupported subclaims (`"result": 0"`)**, judging whether each is a *reasonable addition* given the specified readability level (*easy / intermediate / hard*).

The presence of supported items (`"result": 1"`) helps you understand the full context of what is confirmed versus speculative,
but you will not rate those. Their inclusion enriches the training data diversity and realism.

---

### **READABILITY & ATTRIBUTION GUIDELINES**

| Level | Audience | Linguistic & Stylistic Profile | Allowable Additions |
| :-- | :-- | :-- | :-- |
| **Easy (FH 70–100)** | General public | Short, simple, concrete sentences | General explanations only; no new factual claims |
| **Intermediate (FH 50–69)** | Educated layperson | Moderate complexity and precision | Clarifying causal links aligned with the text |
| **Hard (FH 0–49)** | Professionals | Formal, technical, multi‑clause detail | Must strictly reflect source evidence |

---

### **Input**
Readability Level: {difficulty_level}

Reference Full Text:
{reference_full_text}

Generated Summary:
{generated_summary}

All Subclaims with Support Results:
{subclaims_json}

---

### **TASK INSTRUCTIONS**

For **each subclaim where** `"result": 0"`, classify it as:

- `"reasonable"` – legitimate simplification aligned with readability needs  
- `"partially_reasonable"` – harmless addition or neutral paraphrase  
- `"unreasonable"` – misleading, speculative, or factually unsupported  

Support your judgment with a 1–2 sentence justification per item.

Do **not** modify or comment on subclaims where `"result": 1"`.

---

### **Output JSON Format**

```json
{{
  "evaluations": [
    {{
      "subclaim_id": <id>,
      "subclaim": "<verbatim_subclaim>",
      "result": <0 or 1>,
      "reasonableness": "<reasonable | partially_reasonable | unreasonable | not_applicable>",
      "justification": "<short justification for result=0; for result=1, just write 'supported, no evaluation required'>"
    }},
    ...
  ]
}}
"""
file_synth = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json"
file_qwen_results = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/syn_attribution_resonability_check_100_gpt5_train_v2.json"

with open(file_synth, 'r') as f:
    synthetic_data = json.load(f)
with open(file_qwen_results, 'r') as f:
    qwen3_32B_results = json.load(f) 
res = []
if os.path.exists(save_path):
    with open(save_path, 'r') as f:
        res = json.load(f)
print(f"🔁 Resuming from {len(res)} entries")

existing = set((e["id"], e["difficulty_level"]) for e in res)

for ind in tqdm.tqdm(range(0, 30)):
    entry = synthetic_data[ind]
    subclaims_results = qwen3_32B_results[ind]['attribution']['results']
    subclaims_json = json.dumps(subclaims_results, indent=2, ensure_ascii=False)
    for level in ["easy", "intermediate", "hard"]:
        if (entry["id"], level) in existing:
            print(f"⏭️ Skipping {entry['id']} ({level})")
            continue

        ref_full_text = entry["full_text"]
        generated_summary = entry["readability_versions"][level]["text"]

        prompt = return_prompts_attribution_multi(
            ref_full_text,
            generated_summary,
            subclaims_json,
            level
        )
        # print(prompt)
        # assert False

        try:
            response = openai_return(prompt)
            res.append({
                "id": entry["id"],
                "difficulty_level": level,
                "response": response
            })

            # save periodically
            if len(res) % 2 == 0:
                with open(save_path, 'w') as f:
                    json.dump(res, f, indent=2, ensure_ascii=False)
                print(f"💾 Saved after {len(res)} entries")

        except Exception as e:
            print(f"❌ Error at index {ind}, level {level}: {e}")