readCtrl_lambda / code /reasoning /ressoning_qwen3-30B-a3b_v2.py

mshahidul

Initial commit of readCtrl code without large models

030876e 7 days ago

5.45 kB

	import os
	import json
	import tqdm
	import argparse
	from openai import OpenAI
	import re

	# -----------------------------
	# CONFIGURATION
	# -----------------------------
	API_URL = "http://172.16.34.29:8004/v1"
	API_KEY = "EMPTY"
	MODEL_NAME = "Qwen/Qwen3-30B-A3B-Instruct-2507"

	client = OpenAI(base_url=API_URL, api_key=API_KEY)

	# -----------------------------
	# REASONING PROMPT
	# -----------------------------
	def reasoning_prompt(reference_text, statement, task_type="attribution"):
	if task_type == "attribution":
	# Checking if a summary subclaim is supported by the source medical text
	return f"""You are a senior clinical data validator. A previous system flagged a subclaim as 'not_supported' by the medical text.
	Verify if this is a False Negative.

	### CONTEXT:
	Medical Text (Source): {reference_text}
	Subclaim (from Summary): {statement}

	### TASK:
	1. Search the Medical Text for paraphrased evidence or implicit support for the Subclaim.
	2. Determine if it is 'supported' or 'not_supported'.

	### OUTPUT FORMAT:
	Provide internal reasoning in <think> tags, then conclude with exactly one word: 'supported' or 'not_supported'."""
	else:
	# Checking if a source fact is actually present in the summary (Completeness)
	return f"""You are a senior clinical data validator. A system flagged that a specific fact from the source medical text is missing ('not_supported') from the summary.
	Verify if the summary actually contains this information.

	### CONTEXT:
	Summary Text: {reference_text}
	Source Fact: {statement}

	### TASK:
	1. Search the Summary Text for the Source Fact. Look for synonyms or condensed mentions.
	2. If the summary contains the info, label it 'supported'. If truly missing, label it 'not_supported'.

	### OUTPUT FORMAT:
	Provide internal reasoning in <think> tags, then conclude with exactly one word: 'supported' or 'not_supported'."""

	# -----------------------------
	# LOGIC TO EXTRACT THINKING & LABEL
	# -----------------------------
	def get_reasoned_verdict(reference: str, statement: str, task_type: str):
	prompt = reasoning_prompt(reference, statement, task_type)

	try:
	response = client.chat.completions.create(
	model=MODEL_NAME,
	messages=[{"role": "user", "content": prompt}],
	temperature=0.1,
	)
	full_content = response.choices[0].message.content

	reasoning = ""
	if "<think>" in full_content and "</think>" in full_content:
	reasoning = re.search(r"<think>(.*?)</think>", full_content, re.DOTALL).group(1).strip()
	final_output = full_content.split("</think>")[-1].strip().lower()
	else:
	reasoning = "No explicit <think> tags provided."
	final_output = full_content.strip().lower()

	if "not_supported" in final_output:
	label = "not_supported"
	elif "supported" in final_output:
	label = "supported"
	else:
	label = "inconclusive"

	return reasoning, label

	except Exception as e:
	return str(e), "error_api"

	# -----------------------------
	# MAIN PROCESSING
	# -----------------------------
	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--input_file", type=str, required=True)
	parser.add_argument("--save_path", type=str, default="/home/mshahidul/readctrl/data/reasoning/")
	args = parser.parse_args()

	os.makedirs(args.save_path, exist_ok=True)

	with open(args.input_file, "r") as f:
	data = json.load(f)

	save_filename = f"refined_v2_{os.path.basename(args.input_file)}"
	full_save_path = os.path.join(args.save_path, save_filename)

	print(f"Processing {len(data)} documents...")

	for doc in tqdm.tqdm(data):
	# We need the source text for Attribution and the summary text for Completeness
	# Assuming 'fulltext' is the source and 'summary' is the generated summary
	source_text = doc.get('fulltext', '')
	summary_text = doc.get('summary', '') # Ensure this key matches your JSON

	# 1. Audit Attribution Details
	if 'attribution_details' in doc:
	for item in doc['attribution_details']:
	if item.get('label') == "not_supported":
	reasoning, new_label = get_reasoned_verdict(source_text, item.get('subclaim', ''), "attribution")
	item['original_label'] = "not_supported"
	item['reasoning_audit'] = reasoning
	item['label'] = new_label
	item['is_refined'] = True

	# 2. Audit Completeness Details
	if 'completeness_details' in doc:
	for item in doc['completeness_details']:
	if item.get('present_in_summary') == "not_supported":
	# Here we check if the 'source_fact' is in the 'summary_text'
	reasoning, new_label = get_reasoned_verdict(summary_text, item.get('source_fact', ''), "completeness")
	item['original_label'] = "not_supported"
	item['reasoning_audit'] = reasoning
	item['present_in_summary'] = new_label
	item['is_refined'] = True

	# Save state periodically
	with open(full_save_path, "w") as f:
	json.dump(data, f, indent=2, ensure_ascii=False)

	print(f"Refinement complete. Saved to {full_save_path}")