| | def inference_prompt_revise_summary(fulltext, ref_summary, generated_summary, version, missing_subclaims): |
| | prompt = f""" |
| | You are a medical summarization model specialized in readability-controlled text revision. |
| | |
| | Your task is to improve the **Generated Summary** by adding back the key missing clinical information listed under **Missing Subclaims**, while keeping the readability style defined for the level **{version}**. |
| | |
| | Do not copy the reference summary. Keep coherence, brevity, and correctness. |
| | |
| | --- |
| | |
| | ### INPUT |
| | |
| | **Full Text (for context):** |
| | {fulltext} |
| | |
| | **Reference Summary (for comparison only):** |
| | {ref_summary} |
| | |
| | **Generated Summary (to revise):** |
| | {generated_summary} |
| | |
| | **Missing Subclaims (to integrate naturally):** |
| | {missing_subclaims} |
| | |
| | --- |
| | |
| | ### READABILITY STYLES |
| | |
| | - **easy (FH 70–100, grade 5–7):** |
| | - Short sentences, familiar vocabulary, concrete ideas. |
| | - Avoid subordinate clauses and medical jargon. |
| | - Tone: explanatory, simple, and friendly. |
| | |
| | - **intermediate (FH 50–69, grade 8–12):** |
| | - Moderate sentence complexity and domain vocabulary. |
| | - Clear and structured explanation. |
| | |
| | - **hard (FH 0–49, university/professional):** |
| | - Use specialized terminology, formal and dense phrasing. |
| | - Include: |
| | - precise domain vocabulary; |
| | - causal or analytical connectors (por consiguiente, sin embargo, dado que…); |
| | - one definition, one process description, and one implication statement if possible; |
| | - optional subordinate clauses for academic rhythm. |
| | |
| | --- |
| | |
| | ### OUTPUT |
| | Return the result in the following JSON format: |
| | |
| | {{ |
| | "revised_summary": "<your revised summary text here>" |
| | }} |
| | |
| | Ensure the text is coherent, medically accurate, and matches the **{version}** readability level. |
| | """ |
| | return prompt |
| |
|
| |
|
| | from openai import OpenAI |
| | import json |
| | file_path = "/home/mshahidul/api_new.json" |
| | with open(file_path, "r") as file: |
| | api_keys = json.load(file) |
| |
|
| | openai_api_key = api_keys.get("openai") |
| |
|
| | client = OpenAI(api_key=openai_api_key) |
| | def openai_return(prompt): |
| | response = client.chat.completions.create( |
| | model="gpt-5", |
| | messages=[ |
| | {"role": "system", "content": "You are a helpful assistant."}, |
| | {"role": "user", "content": prompt} |
| | ] |
| | ) |
| | try: |
| | cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "") |
| | return json.loads(cleaned_response) |
| | except Exception as e: |
| | return response.choices[0].message.content.strip().replace("```json", "").replace("```", "") |
| | import json |
| | file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json" |
| |
|
| | with open(file_path, 'r') as f: |
| | synthetic_data = json.load(f) |
| |
|
| |
|
| |
|
| | with open("/home/mshahidul/readctrl/results/dataset_quality_check/completeness_resonability_check_100_qwen3-32B_v3.json", 'r') as f: |
| | readability_reasoning = json.load(f) |
| |
|
| | import json, ast |
| |
|
| | reason_info = {} |
| |
|
| | for item in readability_reasoning: |
| | id = item['id'] |
| | difficulty_level = item['version'] |
| | data_temp = item['completeness'] |
| | for _data in data_temp['results']: |
| | reasonableness = _data['reasonableness'] |
| | |
| | |
| | if isinstance(reasonableness, str): |
| | parsed = None |
| | try: |
| | parsed = json.loads(reasonableness) |
| | except Exception: |
| | try: |
| | parsed = ast.literal_eval(reasonableness) |
| | except Exception: |
| | |
| | parsed = {"reasonableness": "unknown", "justification": reasonableness} |
| | reasonableness = parsed |
| |
|
| | |
| | if reasonableness.get('reasonableness') in ["reasonable","unknown"]: |
| | continue |
| |
|
| | |
| | key = (id, difficulty_level) |
| | reason_info.setdefault(key, []).append(_data['subclaim']) |
| |
|
| |
|
| |
|
| | file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json" |
| |
|
| | with open(file_path_qwen3_32B, 'r') as f: |
| | qwen3_32B_results = json.load(f) |
| |
|
| | |
| | import os |
| | with open("/home/mshahidul/readctrl/data/testing_data_gs/multiclinsum_gs_train_es.json", "r") as f_train: |
| | multiclinsum_gs_train_es = json.load(f_train) |
| | dat_full_text={} |
| | dat_summary={} |
| | for item in multiclinsum_gs_train_es: |
| | dat_full_text[item['id']]=item['fulltext'] |
| | dat_summary[item['id']]=item['summary'] |
| | res=[] |
| | save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/results_revised_100_gpt5_v3.json" |
| | if os.path.exists(save_path): |
| | with open(save_path, 'r') as f: |
| | res = json.load(f) |
| | existing_check=set((entry['id'], entry['difficulty_level']) for entry in res) |
| | print(f"Resuming from {len(res)} entries") |
| | import tqdm |
| | for ind in tqdm.tqdm(range(0,10)): |
| | for version in ["easy", "intermediate", "hard"]: |
| | reference_summary = (f"{synthetic_data[ind]['ref_summary']['text']}") |
| | generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}") |
| | if (synthetic_data[ind]['id'],version) in existing_check: |
| | continue |
| | if (synthetic_data[ind]['id'],version) not in reason_info or len(reason_info[(synthetic_data[ind]['id'],version)])==0: |
| | continue |
| | missing_subclaims = reason_info[(synthetic_data[ind]['id'],version)] |
| | prompt = inference_prompt_revise_summary(dat_full_text[synthetic_data[ind]['id']], reference_summary, generated_summary, version, missing_subclaims) |
| | try: |
| | ans=openai_return(prompt) |
| | res.append({ |
| | "id": synthetic_data[ind]['id'], |
| | "difficulty_level": version, |
| | "prompt": prompt, |
| | "response": ans |
| | }) |
| | |
| | if len(res)%2==0: |
| | print(f"Completed {len(res)} out of 300") |
| | with open(save_path, 'w') as outfile: |
| | json.dump(res, outfile, indent=2) |
| | except Exception as e: |
| | print(f"Error at index {ind}, version {version}: {e}") |
| |
|
| | with open(save_path, 'w') as outfile: |
| | json.dump(res, outfile, indent=2) |
| |
|
| |
|