readCtrl_lambda / code /reasoning /ressoning_qwen3-30B-a3b.py

mshahidul

Initial commit of readCtrl code without large models

030876e 7 days ago

4.4 kB

	import os
	import json
	import tqdm
	import argparse
	from openai import OpenAI
	import re

	# -----------------------------
	# CONFIGURATION
	# -----------------------------
	# Pointing to your ALREADY RUNNING vLLM server (Qwen3-30B-A3B-Instruct)
	API_URL = "http://172.16.34.29:8004/v1"
	API_KEY = "EMPTY"
	# This model name should match what vLLM expects (often the path or the alias)
	MODEL_NAME = "Qwen/Qwen3-30B-A3B-Instruct-2507"

	client = OpenAI(base_url=API_URL, api_key=API_KEY)

	# -----------------------------
	# REASONING PROMPT
	# -----------------------------
	def reasoning_prompt(text, subclaim):
	return f"""You are a senior clinical data validator. A previous automated system flagged a subclaim as 'not_supported'. Your job is to perform a deep-dive reasoning to verify if that judgment was correct.

	### CONTEXT:
	Medical Text: {text}
	Subclaim: {subclaim}

	### TASK:
	1. Analyze the text for any paraphrased evidence, synonyms, or implicit support for the subclaim.
	2. Determine if the previous 'not_supported' label was a "False Negative" (it actually is supported) or a "True Negative" (it is definitely not in the text).
	3. Be strict: If the text truly doesn't mention the specifics, stick with 'not_supported'.

	### OUTPUT FORMAT:
	Provide your internal reasoning first, then conclude with exactly one word: 'supported' or 'not_supported'."""

	# -----------------------------
	# LOGIC TO EXTRACT THINKING & LABEL
	# -----------------------------
	def get_reasoned_verdict(text: str, subclaim: str):
	prompt = reasoning_prompt(text, subclaim)

	try:
	response = client.chat.completions.create(
	model=MODEL_NAME,
	messages=[{"role": "user", "content": prompt}],
	temperature=0.1, # Keep it low for consistency
	)
	full_content = response.choices[0].message.content

	# Extract reasoning (vLLM usually includes <think> tags for Qwen3-A3B)
	reasoning = ""
	if "<think>" in full_content and "</think>" in full_content:
	reasoning = re.search(r"<think>(.*?)</think>", full_content, re.DOTALL).group(1).strip()
	final_output = full_content.split("</think>")[-1].strip().lower()
	else:
	# Fallback if tags aren't present
	reasoning = "No explicit <think> tags provided."
	final_output = full_content.strip().lower()

	# Final label extraction
	if "not_supported" in final_output:
	label = "not_supported"
	elif "supported" in final_output:
	label = "supported"
	else:
	label = "inconclusive"

	return reasoning, label

	except Exception as e:
	print(f"Error: {e}")
	return str(e), "error_api"

	# -----------------------------
	# MAIN PROCESSING
	# -----------------------------
	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	# Provide the path to the JSON generated by your FIRST script
	parser.add_argument("--input_file", type=str, required=True)
	parser.add_argument("--save_path", type=str, default="/home/mshahidul/readctrl/data/reasoning/")
	args = parser.parse_args()

	with open(args.input_file, "r") as f:
	data = json.load(f)
	save_path = args.save_path+f"refined_{os.path.basename(args.input_file)}"
	print(f"Loaded {len(data)} documents. Starting reasoning audit...")

	for doc in tqdm.tqdm(data):
	full_text = doc.get('fulltext', '')

	for eval_item in doc.get('subclaim_evaluations', []):
	# Only process if the first model said 'not_supported'
	if eval_item['support_label'] == "not_supported":
	subclaim = eval_item['subclaim']

	reasoning, new_label = get_reasoned_verdict(full_text, subclaim)

	# Update the entry with the new insights
	eval_item['original_label'] = "not_supported"
	eval_item['reasoning_audit'] = reasoning
	eval_item['support_label'] = new_label # Overwriting with refined label
	eval_item['is_refined'] = True
	else:
	eval_item['is_refined'] = False

	# Save every document to avoid data loss
	with open(save_path, "w") as f:
	json.dump(data, f, indent=2, ensure_ascii=False)

	print(f"Refinement complete. Saved to {save_path}")