def revised_results(reference_summary, generated_summary, list_of_missing_subclaims, difficulty_level): return f''' ### **SYSTEM / ROLE INSTRUCTION** You are a **medical text rewriting assistant** that improves summaries while maintaining the intended readability level (*easy / intermediate / hard*). You will receive: * The **original reference summary** (the factual source) * The **current generated summary** * A list of **important missing subclaims** to be reintroduced * The **target readability level** Your task: Revise the generated summary so that it **adds the missing information** naturally, while keeping: * The same **tone, vocabulary, and sentence simplicity** of the given readability level. * Logical **flow and coherence**. * No extra, invented information beyond what’s in the reference summary. --- ### **INPUT FIELDS** **Reference summary:** {reference_summary} **Current generated summary ({difficulty_level}):** {generated_summary} **Missing important subclaims to add back:** {list_of_missing_subclaims} **Target readability level:** {difficulty_level} --- ### **TASK INSTRUCTIONS** 1. Integrate the missing subclaims **smoothly** into the generated summary. 2. Do **not** add any new facts beyond those listed. 3. Maintain the **same readability level**: * **Easy:** conversational, short sentences, no jargon. * **Intermediate:** light medical terms, brief explanations. * **Hard:** concise clinical tone with correct terminology. 4. Keep the summary approximately the same length; avoid redundancy. 5. Ensure the resulting text remains **fluent, coherent, and faithful** to the reference summary. --- ### **OUTPUT FORMAT** ```json {{ "revised_summary": "", "explanation": "" }} ``` ''' from openai import OpenAI import json file_path = "/home/mshahidul/api_new.json" with open(file_path, "r") as file: api_keys = json.load(file) openai_api_key = api_keys.get("openai") client = OpenAI(api_key=openai_api_key) def openai_return(prompt): response = client.chat.completions.create( model="gpt-5-mini", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ] ) cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "") return json.loads(cleaned_response) import json file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json" with open(file_path, 'r') as f: synthetic_data = json.load(f) # /home/mshahidul/readctrl/results/dataset_quality_check/resonability_check_100_gpt5_completeness.json with open("/home/mshahidul/readctrl/results/dataset_quality_check/resonability_check_100_gpt5_completeness.json", 'r') as f: readability_reasoning = json.load(f) # readability_reasoning[0].keys() # dict_keys(['id', 'difficulty_level', 'prompt']) # readability_reasoning[0]['prompt'].keys() # dict_keys(['evaluation_table', 'reasonableness_score', 'overall_explanation']) reason_info={} for item in readability_reasoning: id=item['id'] difficulty_level=item['difficulty_level'] data_temp=item['prompt'] for _data in data_temp['evaluation_table']: if _data['reasonable_omission'] == "no": key=(id, difficulty_level) if key not in reason_info: reason_info[key]=[] reason_info[key].append(_data['subclaim']) file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json" with open(file_path_qwen3_32B, 'r') as f: qwen3_32B_results = json.load(f) # dict_keys(['id', 'full_text', 'ref_summary', 'readability_versions']) # print(f"Full text: {synthetic_data[0]['full_text']}") import os # def revised_results(reference_summary, generated_summary, list_of_missing_subclaims, difficulty_level): res=[] temp="" save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/results_revised_100_gpt5.json" if os.path.exists(save_path): with open(save_path, 'r') as f: res = json.load(f) existing_check=set((entry['id'], entry['difficulty_level']) for entry in res) print(f"Resuming from {len(res)} entries") import tqdm for ind in tqdm.tqdm(range(0,100)): for version in ["easy", "intermediate", "hard"]: reference_summary = (f"{synthetic_data[ind]['ref_summary']['text']}") generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}") if (synthetic_data[ind]['id'],version) in existing_check: continue if (synthetic_data[ind]['id'],version) not in reason_info: continue subclaims_results = reason_info[(synthetic_data[ind]['id'],version)] prompt = revised_results(reference_summary, generated_summary, subclaims_results, version) try: ans=openai_return(prompt) res.append({ "id": synthetic_data[ind]['id'], "difficulty_level": version, "prompt": prompt, "response": ans }) if len(res)%2==0: print(f"Completed {len(res)} out of 300") with open(save_path, 'w') as outfile: json.dump(res, outfile, indent=2) except Exception as e: print(f"Error at index {ind}, version {version}: {e}") with open(save_path, 'w') as outfile: json.dump(res, outfile, indent=2)