import os import json import tqdm import argparse from openai import OpenAI import re # ----------------------------- # CONFIGURATION # ----------------------------- # Pointing to your ALREADY RUNNING vLLM server (Qwen3-30B-A3B-Instruct) API_URL = "http://172.16.34.29:8004/v1" API_KEY = "EMPTY" # This model name should match what vLLM expects (often the path or the alias) MODEL_NAME = "Qwen/Qwen3-30B-A3B-Instruct-2507" client = OpenAI(base_url=API_URL, api_key=API_KEY) # ----------------------------- # REASONING PROMPT # ----------------------------- def reasoning_prompt(text, subclaim): return f"""You are a senior clinical data validator. A previous automated system flagged a subclaim as 'not_supported'. Your job is to perform a deep-dive reasoning to verify if that judgment was correct. ### CONTEXT: Medical Text: {text} Subclaim: {subclaim} ### TASK: 1. Analyze the text for any paraphrased evidence, synonyms, or implicit support for the subclaim. 2. Determine if the previous 'not_supported' label was a "False Negative" (it actually is supported) or a "True Negative" (it is definitely not in the text). 3. Be strict: If the text truly doesn't mention the specifics, stick with 'not_supported'. ### OUTPUT FORMAT: Provide your internal reasoning first, then conclude with exactly one word: 'supported' or 'not_supported'.""" # ----------------------------- # LOGIC TO EXTRACT THINKING & LABEL # ----------------------------- def get_reasoned_verdict(text: str, subclaim: str): prompt = reasoning_prompt(text, subclaim) try: response = client.chat.completions.create( model=MODEL_NAME, messages=[{"role": "user", "content": prompt}], temperature=0.1, # Keep it low for consistency ) full_content = response.choices[0].message.content # Extract reasoning (vLLM usually includes tags for Qwen3-A3B) reasoning = "" if "" in full_content and "" in full_content: reasoning = re.search(r"(.*?)", full_content, re.DOTALL).group(1).strip() final_output = full_content.split("")[-1].strip().lower() else: # Fallback if tags aren't present reasoning = "No explicit tags provided." final_output = full_content.strip().lower() # Final label extraction if "not_supported" in final_output: label = "not_supported" elif "supported" in final_output: label = "supported" else: label = "inconclusive" return reasoning, label except Exception as e: print(f"Error: {e}") return str(e), "error_api" # ----------------------------- # MAIN PROCESSING # ----------------------------- if __name__ == "__main__": parser = argparse.ArgumentParser() # Provide the path to the JSON generated by your FIRST script parser.add_argument("--input_file", type=str, required=True) parser.add_argument("--save_path", type=str, default="/home/mshahidul/readctrl/data/reasoning/") args = parser.parse_args() with open(args.input_file, "r") as f: data = json.load(f) save_path = args.save_path+f"refined_{os.path.basename(args.input_file)}" print(f"Loaded {len(data)} documents. Starting reasoning audit...") for doc in tqdm.tqdm(data): full_text = doc.get('fulltext', '') for eval_item in doc.get('subclaim_evaluations', []): # Only process if the first model said 'not_supported' if eval_item['support_label'] == "not_supported": subclaim = eval_item['subclaim'] reasoning, new_label = get_reasoned_verdict(full_text, subclaim) # Update the entry with the new insights eval_item['original_label'] = "not_supported" eval_item['reasoning_audit'] = reasoning eval_item['support_label'] = new_label # Overwriting with refined label eval_item['is_refined'] = True else: eval_item['is_refined'] = False # Save every document to avoid data loss with open(save_path, "w") as f: json.dump(data, f, indent=2, ensure_ascii=False) print(f"Refinement complete. Saved to {save_path}")