| | def return_prompts_attribution(reference_full_text, generated_summary, subclaims_json, difficulty_level): |
| | return f''' |
| | ### **SYSTEM / ROLE INSTRUCTION** |
| | |
| | You are a **medical factuality and attribution evaluator**. |
| | You will assess whether **unsupported subclaims** in a generated summary (those with `"result": 0"`) are *reasonable additions* based on the readability level (*easy / intermediate / hard*). |
| | |
| | The goal is to determine whether these **extra pieces of information** are acceptable simplifications or *hallucinations* that reduce factual faithfulness. |
| | |
| | --- |
| | |
| | ### **READABILITY & ATTRIBUTION GUIDELINES** |
| | |
| | | Level | Audience | Content Goal | Allowable Additions | |
| | | :--------------- | :------------------------------- | :--------------------------------------------------------------------- | :--------------------------------------------------------------------------------- | |
| | | **Easy** | General public | Simplify and clarify events | Allow general background info or lay explanations, but not new facts or diagnoses. | |
| | | **Intermediate** | Educated layperson / med student | Add brief clarifications or causal context if consistent with the text | Allow inferred, non-contradictory context; avoid adding unconfirmed data. | |
| | | **Hard** | Medical professional | Maintain factual precision | No additions; everything must be supported by source text. | |
| | |
| | --- |
| | |
| | ### **INPUT FIELDS** |
| | |
| | **Reference full text:** |
| | {reference_full_text} |
| | |
| | **Generated summary ({difficulty_level}):** |
| | {generated_summary} |
| | |
| | **Subclaims and results:** |
| | {subclaims_json} |
| | |
| | --- |
| | |
| | ### **TASK INSTRUCTIONS** |
| | |
| | 1. Focus only on subclaims with `"result": 0"` (not supported by the input text). |
| | 2. For each unsupported subclaim: |
| | |
| | * Judge whether adding it is **reasonable** for the given readability level. |
| | * Choose one of: `"reasonable addition"`, `"unnecessary but harmless"`, `"misleading / hallucinated"`. |
| | * Provide a **1–2 sentence justification** explaining your reasoning. |
| | 3. After all evaluations, assign a **numerical attribution score (0–5)**: |
| | |
| | * **5** = All additions are reasonable or harmless simplifications. |
| | * **4** = Mostly reasonable; minor harmless additions. |
| | * **3** = Some misleading or unjustified additions. |
| | * **2** = Many factual inaccuracies. |
| | * **1** = Serious hallucinations; distorts source meaning. |
| | * **0** = Highly unfaithful; mostly invented content. |
| | 4. End with an **overall explanation (3–5 sentences)** summarizing your reasoning and suggestions. |
| | |
| | --- |
| | |
| | ### **OUTPUT FORMAT (strict JSON)** |
| | |
| | ```json |
| | {{ |
| | "evaluation_table": [ |
| | {{ |
| | "id": <subclaim_id>, |
| | "subclaim": "<text>", |
| | "evaluation": "<reasonable addition | unnecessary but harmless | misleading / hallucinated>", |
| | "explanation": "<short justification>" |
| | }} |
| | ], |
| | "attribution_score": <0-5>, |
| | "overall_explanation": "<concise summary of your judgment>" |
| | }} |
| | ``` |
| | ''' |
| | from openai import OpenAI |
| | import json |
| | file_path = "/home/mshahidul/api_new.json" |
| | with open(file_path, "r") as file: |
| | api_keys = json.load(file) |
| |
|
| | openai_api_key = api_keys.get("openai") |
| |
|
| | client = OpenAI(api_key=openai_api_key) |
| | def openai_return(prompt): |
| | response = client.chat.completions.create( |
| | model="gpt-5-mini", |
| | messages=[ |
| | {"role": "system", "content": "You are a helpful assistant."}, |
| | {"role": "user", "content": prompt} |
| | ] |
| | ) |
| | cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "") |
| | return json.loads(cleaned_response) |
| |
|
| |
|
| | import json |
| | file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json" |
| |
|
| | with open(file_path, 'r') as f: |
| | synthetic_data = json.load(f) |
| |
|
| | file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json" |
| |
|
| | with open(file_path_qwen3_32B, 'r') as f: |
| | qwen3_32B_results = json.load(f) |
| |
|
| | |
| | |
| | import os |
| |
|
| | res=[] |
| | temp="" |
| | save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/resonability_check_100_gpt5_attribution.json" |
| | if os.path.exists(save_path): |
| | with open(save_path, 'r') as f: |
| | res = json.load(f) |
| | print(f"Resuming from {len(res)} entries") |
| | existing_check=set((entry['id'], entry['difficulty_level']) for entry in res) |
| | import tqdm |
| | for ind in tqdm.tqdm(range(len(res),100)): |
| | for version in ["easy", "intermediate", "hard"]: |
| | if (synthetic_data[ind]['id'], version) in existing_check: |
| | print(f"Skipping {synthetic_data[ind]['id']}, {version}") |
| | continue |
| | ref_full_text_summary = (f"{synthetic_data[ind]['full_text']}") |
| | generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}") |
| | subclaims_results = (f"{qwen3_32B_results[ind]['attribution']['results']}") |
| | prompt = return_prompts_attribution(ref_full_text_summary, generated_summary, subclaims_results, version) |
| | try: |
| | ans=openai_return(prompt) |
| | res.append({ |
| | "id": synthetic_data[ind]['id'], |
| | "difficulty_level": version, |
| | "response": ans |
| | }) |
| | |
| | if len(res)%2==0: |
| | print(f"Completed {len(res)} out of 300") |
| | with open(save_path, 'w') as outfile: |
| | json.dump(res, outfile, indent=2) |
| | except Exception as e: |
| | print(f"Error at index {ind}, version {version}: {e}") |
| |
|
| | with open(save_path, 'w') as outfile: |
| | json.dump(res, outfile, indent=2) |