| def revised_results(reference_summary, generated_summary, list_of_missing_subclaims, difficulty_level): |
| return f''' |
| ### **SYSTEM / ROLE INSTRUCTION** |
| |
| You are a **medical text rewriting assistant** that improves summaries while maintaining the intended readability level (*easy / intermediate / hard*). |
| You will receive: |
| |
| * The **original reference summary** (the factual source) |
| * The **current generated summary** |
| * A list of **important missing subclaims** to be reintroduced |
| * The **target readability level** |
| |
| Your task: |
| Revise the generated summary so that it **adds the missing information** naturally, while keeping: |
| |
| * The same **tone, vocabulary, and sentence simplicity** of the given readability level. |
| * Logical **flow and coherence**. |
| * No extra, invented information beyond what’s in the reference summary. |
| |
| --- |
| |
| ### **INPUT FIELDS** |
| |
| **Reference summary:** |
| {reference_summary} |
| |
| **Current generated summary ({difficulty_level}):** |
| {generated_summary} |
| |
| **Missing important subclaims to add back:** |
| {list_of_missing_subclaims} |
| |
| **Target readability level:** |
| {difficulty_level} |
| |
| |
| --- |
| |
| ### **TASK INSTRUCTIONS** |
| |
| 1. Integrate the missing subclaims **smoothly** into the generated summary. |
| 2. Do **not** add any new facts beyond those listed. |
| 3. Maintain the **same readability level**: |
| |
| * **Easy:** conversational, short sentences, no jargon. |
| * **Intermediate:** light medical terms, brief explanations. |
| * **Hard:** concise clinical tone with correct terminology. |
| 4. Keep the summary approximately the same length; avoid redundancy. |
| 5. Ensure the resulting text remains **fluent, coherent, and faithful** to the reference summary. |
| |
| --- |
| |
| ### **OUTPUT FORMAT** |
| |
| ```json |
| {{ |
| "revised_summary": "<the new version of the summary, rewritten with the added subclaims>", |
| "explanation": "<brief note explaining how the missing subclaims were added while preserving readability>" |
| }} |
| ``` |
| |
| ''' |
| from openai import OpenAI |
| import json |
| file_path = "/home/mshahidul/api_new.json" |
| with open(file_path, "r") as file: |
| api_keys = json.load(file) |
|
|
| openai_api_key = api_keys.get("openai") |
|
|
| client = OpenAI(api_key=openai_api_key) |
| def openai_return(prompt): |
| response = client.chat.completions.create( |
| model="gpt-5-mini", |
| messages=[ |
| {"role": "system", "content": "You are a helpful assistant."}, |
| {"role": "user", "content": prompt} |
| ] |
| ) |
| cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "") |
| return json.loads(cleaned_response) |
| import json |
| file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json" |
|
|
| with open(file_path, 'r') as f: |
| synthetic_data = json.load(f) |
|
|
| |
|
|
|
|
|
|
| with open("/home/mshahidul/readctrl/results/dataset_quality_check/resonability_check_100_gpt5_completeness.json", 'r') as f: |
| readability_reasoning = json.load(f) |
| |
| |
| reason_info={} |
| for item in readability_reasoning: |
| id=item['id'] |
| difficulty_level=item['difficulty_level'] |
| data_temp=item['prompt'] |
| for _data in data_temp['evaluation_table']: |
| if _data['reasonable_omission'] == "no": |
| key=(id, difficulty_level) |
| if key not in reason_info: |
| reason_info[key]=[] |
| reason_info[key].append(_data['subclaim']) |
|
|
| file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json" |
|
|
| with open(file_path_qwen3_32B, 'r') as f: |
| qwen3_32B_results = json.load(f) |
|
|
| |
| |
| import os |
| |
| res=[] |
| temp="" |
| save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/results_revised_100_gpt5.json" |
| if os.path.exists(save_path): |
| with open(save_path, 'r') as f: |
| res = json.load(f) |
| existing_check=set((entry['id'], entry['difficulty_level']) for entry in res) |
| print(f"Resuming from {len(res)} entries") |
| import tqdm |
| for ind in tqdm.tqdm(range(0,100)): |
| for version in ["easy", "intermediate", "hard"]: |
| reference_summary = (f"{synthetic_data[ind]['ref_summary']['text']}") |
| generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}") |
| if (synthetic_data[ind]['id'],version) in existing_check: |
| continue |
| if (synthetic_data[ind]['id'],version) not in reason_info: |
| continue |
| subclaims_results = reason_info[(synthetic_data[ind]['id'],version)] |
| prompt = revised_results(reference_summary, generated_summary, subclaims_results, version) |
| try: |
| ans=openai_return(prompt) |
| res.append({ |
| "id": synthetic_data[ind]['id'], |
| "difficulty_level": version, |
| "prompt": prompt, |
| "response": ans |
| }) |
| |
| if len(res)%2==0: |
| print(f"Completed {len(res)} out of 300") |
| with open(save_path, 'w') as outfile: |
| json.dump(res, outfile, indent=2) |
| except Exception as e: |
| print(f"Error at index {ind}, version {version}: {e}") |
|
|
| with open(save_path, 'w') as outfile: |
| json.dump(res, outfile, indent=2) |
|
|
|
|