File size: 5,775 Bytes
c7a6fe6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | import os
import json
import tqdm
from openai import OpenAI
# =====================================================
# 1️⃣ Setup: Load API key, initialize client
# =====================================================
api_file = "/home/mshahidul/api_new.json"
with open(api_file, "r") as f:
api_keys = json.load(f)
openai_api_key = api_keys["openai"]
client = OpenAI(api_key=openai_api_key)
# =====================================================
# 2️⃣ OpenAI call helper
# =====================================================
def openai_return(prompt, model="gpt-5"):
"""Send a prompt to GPT and parse JSON."""
response = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
)
content = response.choices[0].message.content.strip()
cleaned = content.replace("```json", "").replace("```", "").strip()
try:
return json.loads(cleaned)
except json.JSONDecodeError:
print("⚠️ JSON parse failed — storing raw text.")
return cleaned
# =====================================================
# 3️⃣ Multi‑subclaim attribution prompt builder
# =====================================================
def return_prompts_attribution_multi(reference_full_text, generated_summary, subclaims_json, difficulty_level):
return f"""
### **SYSTEM / ROLE INSTRUCTION**
You are a **medical factuality and attribution evaluator**.
You will analyze all subclaims found in a generated summary, each labeled with a `"result"` flag:
- `1` = supported by the reference
- `0` = unsupported by the reference
Your main task is to **evaluate only the unsupported subclaims (`"result": 0"`)**, judging whether each is a *reasonable addition* given the specified readability level (*easy / intermediate / hard*).
The presence of supported items (`"result": 1"`) helps you understand the full context of what is confirmed versus speculative,
but you will not rate those. Their inclusion enriches the training data diversity and realism.
---
### **READABILITY & ATTRIBUTION GUIDELINES**
| Level | Audience | Linguistic & Stylistic Profile | Allowable Additions |
| :-- | :-- | :-- | :-- |
| **Easy (FH 70–100)** | General public | Short, simple, concrete sentences | General explanations only; no new factual claims |
| **Intermediate (FH 50–69)** | Educated layperson | Moderate complexity and precision | Clarifying causal links aligned with the text |
| **Hard (FH 0–49)** | Professionals | Formal, technical, multi‑clause detail | Must strictly reflect source evidence |
---
### **Input**
Readability Level: {difficulty_level}
Reference Full Text:
{reference_full_text}
Generated Summary:
{generated_summary}
All Subclaims with Support Results:
{subclaims_json}
---
### **TASK INSTRUCTIONS**
For **each subclaim where** `"result": 0"`, classify it as:
- `"reasonable"` – legitimate simplification aligned with readability needs
- `"partially_reasonable"` – harmless addition or neutral paraphrase
- `"unreasonable"` – misleading, speculative, or factually unsupported
Support your judgment with a 1–2 sentence justification per item.
Do **not** modify or comment on subclaims where `"result": 1"`.
---
### **Output JSON Format**
```json
{{
"evaluations": [
{{
"subclaim_id": <id>,
"subclaim": "<verbatim_subclaim>",
"result": <0 or 1>,
"reasonableness": "<reasonable | partially_reasonable | unreasonable | not_applicable>",
"justification": "<short justification for result=0; for result=1, just write 'supported, no evaluation required'>"
}},
...
]
}}
"""
file_synth = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json"
file_qwen_results = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/syn_attribution_resonability_check_100_gpt5_train_v2.json"
with open(file_synth, 'r') as f:
synthetic_data = json.load(f)
with open(file_qwen_results, 'r') as f:
qwen3_32B_results = json.load(f)
res = []
if os.path.exists(save_path):
with open(save_path, 'r') as f:
res = json.load(f)
print(f"🔁 Resuming from {len(res)} entries")
existing = set((e["id"], e["difficulty_level"]) for e in res)
for ind in tqdm.tqdm(range(0, 30)):
entry = synthetic_data[ind]
subclaims_results = qwen3_32B_results[ind]['attribution']['results']
subclaims_json = json.dumps(subclaims_results, indent=2, ensure_ascii=False)
for level in ["easy", "intermediate", "hard"]:
if (entry["id"], level) in existing:
print(f"⏭️ Skipping {entry['id']} ({level})")
continue
ref_full_text = entry["full_text"]
generated_summary = entry["readability_versions"][level]["text"]
prompt = return_prompts_attribution_multi(
ref_full_text,
generated_summary,
subclaims_json,
level
)
# print(prompt)
# assert False
try:
response = openai_return(prompt)
res.append({
"id": entry["id"],
"difficulty_level": level,
"response": response
})
# save periodically
if len(res) % 2 == 0:
with open(save_path, 'w') as f:
json.dump(res, f, indent=2, ensure_ascii=False)
print(f"💾 Saved after {len(res)} entries")
except Exception as e:
print(f"❌ Error at index {ind}, level {level}: {e}")
|