readctrl / code /old /resonability_check_completeness_openai_V1.py

Add files using upload-large-folder tool

c7a6fe6 verified 28 days ago

6.14 kB

	import os, json
	def return_promptst(reference_summary, generated_summary, subclaims_json, difficulty_level):
	prompt=f'''
	SYSTEM / ROLE INSTRUCTION:
	You are a medical readability evaluator.
	Your task is to judge whether omitted subclaims (those with `"result": 0"`) from a generated summary are reasonably omitted based on the intended readability level: easy, intermediate, or hard.
	You evaluate this from the standpoint of clarity, faithfulness, and readability goals.

	---

	### READABILITY GUIDELINES

	\| Level \| Target Audience \| Content Expectation \| Technical Detail Allowed \|
	\| :--------------- \| :--------------------------------------- \| :-------------------------------------------------------------- \| :--------------------------------------------------------------- \|
	\| Easy \| General public \| Focus on main events, outcomes, and diagnoses in plain Spanish. \| Minimal — avoid measurements, anatomy, and test results. \|
	\| Intermediate \| Educated lay readers or medical students \| Include key findings and procedures in simplified form. \| Moderate — basic terms and causes allowed. \|
	\| Hard \| Medical professionals \| Retain most technical information and precision. \| High — measurements, anatomy, and test interpretations expected. \|

	---

	### INPUT FIELDS

	Reference summary:
	{reference_summary}

	Generated summary ({difficulty_level}):
	{generated_summary}

	Subclaims and results:
	{subclaims_json}

	---

	### TASK INSTRUCTIONS

	1. Focus on subclaims with `"result": 0"` (not supported by the generated summary).
	2. For each omitted subclaim:

	* Decide whether omission is reasonable given the readability level.
	* Label as: `"yes"`, `"no"`, or `"borderline"`.
	* Write a brief justification (1–2 sentences).
	3. After individual evaluations, assign a reasonableness score (0–5) using this scale:

	* 5 = All omissions appropriate for target readability.
	* 4 = Minor omissions could improve completeness.
	* 3 = Some omissions reduce understanding or medical clarity.
	* 2 = Many important omissions harm faithfulness.
	* 1 = Major omissions misrepresent case.
	* 0 = Summary fails to reflect key medical information.
	4. End with an overall explanation (3–5 sentences) describing:

	* The main reasoning behind the score.
	* Whether the summary fits its intended readability level.
	* Suggestions for improvement if needed.

	---

	### OUTPUT FORMAT (strict JSON)

	```json
	{{
	"evaluation_table": [
	{{
	"id": <subclaim_id>,
	"subclaim": "<text>",
	"reasonable_omission": "<yes \| no \| borderline>",
	"explanation": "<short reason>"
	}}
	],
	"reasonableness_score": <0-5>,
	"overall_explanation": "<concise paragraph>"
	}}
	```
	'''
	return prompt

	from openai import OpenAI

	file_path = "/home/mshahidul/api_new.json"
	with open(file_path, "r") as file:
	api_keys = json.load(file)

	openai_api_key = api_keys.get("openai")

	client = OpenAI(api_key=openai_api_key)
	def openai_return(prompt):
	response = client.chat.completions.create(
	model="gpt-5-mini",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	]
	)
	cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "")
	return json.loads(cleaned_response)

	import json
	file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json"

	with open(file_path, 'r') as f:
	synthetic_data = json.load(f)

	file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"

	with open(file_path_qwen3_32B, 'r') as f:
	qwen3_32B_results = json.load(f)

	# dict_keys(['id', 'full_text', 'ref_summary', 'readability_versions'])
	# print(f"Full text: {synthetic_data[0]['full_text']}")
	res=[]
	save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/resonability_check_100_gpt5.json"
	if os.path.exists(save_path):
	with open(save_path, 'r') as f:
	res = json.load(f)
	print(f"Resuming from {len(res)} entries")
	import tqdm
	for ind in tqdm.tqdm(range(len(res),100)):
	print(f"Processing index: {ind}")
	for version in ["easy", "intermediate", "hard"]:
	ref_summary = (f"{synthetic_data[ind]['ref_summary']['text']}")
	generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}")
	subclaims_results = (f"{qwen3_32B_results[ind]['completeness']['results']}")
	try:
	prompt = return_promptst(ref_summary, generated_summary, subclaims_results, version)
	res.append({
	"id": synthetic_data[ind]['id'],
	"difficulty_level": version,
	"prompt": openai_return(prompt)
	})
	if len(res)%2==0:
	print(f"Completed {len(res)} out of 300")
	with open(save_path, 'w') as outfile:
	json.dump(res, outfile, indent=2)
	except Exception as e:
	print(f"Error at {ind} {version}: {e}")
	# print(prompt)
	# assert False
	with open(save_path, 'w') as outfile:
	json.dump(res, outfile, indent=2)