readctrl / code /old /resonability_check_completeness_openai_V3.py

Add files using upload-large-folder tool

1db7196 verified 27 days ago

4.82 kB

	import os, json
	def return_prompts(reference_summary, generated_summary, subclaims_json, difficulty_level):
	prompt=f'''
	You are a medical summarization quality evaluator.
	Your goal is to decide whether the inclusion or omission of each subclaim in the generated summary is reasonable, given the target readability level.

	---

	### Input

	```
	Readability Level: {difficulty_level}

	Reference Summary:
	{reference_summary}

	Generated Summary:
	{generated_summary}

	Subclaims with Support Results:
	{subclaims_json}
	```

	---

	### Task

	For each subclaim:

	1. Read `result`:

	* `1` = the subclaim is supported or clearly mentioned in the generated summary.
	* `0` = the subclaim is missing or not supported.

	2. Based on readability level and medical relevance, decide whether this inclusion/omission is reasonable, partially reasonable, or unreasonable.

	3. Provide a short justification (1–2 sentences) explaining your reasoning.

	---

	### Output Format

	Return structured JSON:

	```json
	{{
	"readability_level": "<easy/intermediate/hard>",
	"evaluations": [
	{{
	"subclaim_id": <id>,
	"subclaim_text": "<text>",
	"result": <0 or 1>,
	"reasonableness": "<reasonable \| partially_reasonable \| unreasonable>",
	"justification": "<short explanation>"
	}},
	...
	]
	}}
	```

	---

	### Evaluation Guidelines

	\| Readability Level \| Reasonable Omission \| Unreasonable Omission \|
	\| ----------------- \| ------------------------------------------------------------ \| ------------------------------------------------- \|
	\| Easy \| Technical, anatomical, quantitative, or procedural details. \| Key clinical findings, diagnoses, or outcomes. \|
	\| Intermediate \| Minor imaging details or measurements. \| Any main diagnostic finding or cause–effect link. \|
	\| Hard \| Very few omissions acceptable; mostly stylistic compression. \| Any missing clinical or diagnostic information. \|

	'''
	return prompt

	from openai import OpenAI

	file_path = "/home/mshahidul/api_new.json"
	with open(file_path, "r") as file:
	api_keys = json.load(file)

	openai_api_key = api_keys.get("openai")

	client = OpenAI(api_key=openai_api_key)
	def openai_return(prompt):
	response = client.chat.completions.create(
	model="gpt-5",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	]
	)
	cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "")
	return json.loads(cleaned_response)

	import json
	file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json"

	with open(file_path, 'r') as f:
	synthetic_data = json.load(f)

	file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"

	with open(file_path_qwen3_32B, 'r') as f:
	qwen3_32B_results = json.load(f)

	# dict_keys(['id', 'full_text', 'ref_summary', 'readability_versions'])
	# print(f"Full text: {synthetic_data[0]['full_text']}")
	res=[]
	save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/syn_data_resonability_check_20_gpt5.json"
	if os.path.exists(save_path):
	with open(save_path, 'r') as f:
	res = json.load(f)
	exist_check_ids = set([(item['id'], item['difficulty_level']) for item in res])
	print(f"Resuming from {len(res)} entries")
	import tqdm
	for ind in tqdm.tqdm(range(0,20)):
	print(f"Processing index: {ind}")
	for version in ["easy", "intermediate", "hard"]:
	if (synthetic_data[ind]['id'], version) in exist_check_ids:
	print(f"Skipping {synthetic_data[ind]['id']} {version}")
	continue
	ref_summary = (f"{synthetic_data[ind]['ref_summary']['text']}")
	generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}")
	subclaims_results = (f"{qwen3_32B_results[ind]['completeness']['results']}")
	try:
	prompt = return_prompts(ref_summary, generated_summary, subclaims_results, version)
	res.append({
	"id": synthetic_data[ind]['id'],
	"difficulty_level": version,
	"reasonableness": openai_return(prompt)
	})
	if len(res)%2==0:
	print(f"Completed {len(res)} out of 300")
	with open(save_path, 'w') as outfile:
	json.dump(res, outfile, indent=2)
	except Exception as e:
	print(f"Error at {ind} {version}: {e}")
	# print(prompt)
	# assert False
	with open(save_path, 'w') as outfile:
	json.dump(res, outfile, indent=2)