import os import json import tqdm import argparse from openai import OpenAI import re # ----------------------------- # CONFIGURATION # ----------------------------- API_URL = "http://172.16.34.29:8004/v1" API_KEY = "EMPTY" MODEL_NAME = "Qwen/Qwen3-30B-A3B-Instruct-2507" client = OpenAI(base_url=API_URL, api_key=API_KEY) # ----------------------------- # REASONING PROMPT # ----------------------------- def reasoning_prompt(reference_text, statement, task_type="attribution"): if task_type == "attribution": # Checking if a summary subclaim is supported by the source medical text return f"""You are a senior clinical data validator. A previous system flagged a subclaim as 'not_supported' by the medical text. Verify if this is a False Negative. ### CONTEXT: Medical Text (Source): {reference_text} Subclaim (from Summary): {statement} ### TASK: 1. Search the Medical Text for paraphrased evidence or implicit support for the Subclaim. 2. Determine if it is 'supported' or 'not_supported'. ### OUTPUT FORMAT: Provide internal reasoning in tags, then conclude with exactly one word: 'supported' or 'not_supported'.""" else: # Checking if a source fact is actually present in the summary (Completeness) return f"""You are a senior clinical data validator. A system flagged that a specific fact from the source medical text is missing ('not_supported') from the summary. Verify if the summary actually contains this information. ### CONTEXT: Summary Text: {reference_text} Source Fact: {statement} ### TASK: 1. Search the Summary Text for the Source Fact. Look for synonyms or condensed mentions. 2. If the summary contains the info, label it 'supported'. If truly missing, label it 'not_supported'. ### OUTPUT FORMAT: Provide internal reasoning in tags, then conclude with exactly one word: 'supported' or 'not_supported'.""" # ----------------------------- # LOGIC TO EXTRACT THINKING & LABEL # ----------------------------- def get_reasoned_verdict(reference: str, statement: str, task_type: str): prompt = reasoning_prompt(reference, statement, task_type) try: response = client.chat.completions.create( model=MODEL_NAME, messages=[{"role": "user", "content": prompt}], temperature=0.1, ) full_content = response.choices[0].message.content reasoning = "" if "" in full_content and "" in full_content: reasoning = re.search(r"(.*?)", full_content, re.DOTALL).group(1).strip() final_output = full_content.split("")[-1].strip().lower() else: reasoning = "No explicit tags provided." final_output = full_content.strip().lower() if "not_supported" in final_output: label = "not_supported" elif "supported" in final_output: label = "supported" else: label = "inconclusive" return reasoning, label except Exception as e: return str(e), "error_api" # ----------------------------- # MAIN PROCESSING # ----------------------------- if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--input_file", type=str, required=True) parser.add_argument("--save_path", type=str, default="/home/mshahidul/readctrl/data/reasoning/") args = parser.parse_args() os.makedirs(args.save_path, exist_ok=True) with open(args.input_file, "r") as f: data = json.load(f) save_filename = f"refined_v2_{os.path.basename(args.input_file)}" full_save_path = os.path.join(args.save_path, save_filename) print(f"Processing {len(data)} documents...") for doc in tqdm.tqdm(data): # We need the source text for Attribution and the summary text for Completeness # Assuming 'fulltext' is the source and 'summary' is the generated summary source_text = doc.get('fulltext', '') summary_text = doc.get('summary', '') # Ensure this key matches your JSON # 1. Audit Attribution Details if 'attribution_details' in doc: for item in doc['attribution_details']: if item.get('label') == "not_supported": reasoning, new_label = get_reasoned_verdict(source_text, item.get('subclaim', ''), "attribution") item['original_label'] = "not_supported" item['reasoning_audit'] = reasoning item['label'] = new_label item['is_refined'] = True # 2. Audit Completeness Details if 'completeness_details' in doc: for item in doc['completeness_details']: if item.get('present_in_summary') == "not_supported": # Here we check if the 'source_fact' is in the 'summary_text' reasoning, new_label = get_reasoned_verdict(summary_text, item.get('source_fact', ''), "completeness") item['original_label'] = "not_supported" item['reasoning_audit'] = reasoning item['present_in_summary'] = new_label item['is_refined'] = True # Save state periodically with open(full_save_path, "w") as f: json.dump(data, f, indent=2, ensure_ascii=False) print(f"Refinement complete. Saved to {full_save_path}")