File size: 4,397 Bytes
030876e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import json
import tqdm
import argparse
from openai import OpenAI
import re

# -----------------------------
#  CONFIGURATION
# -----------------------------
# Pointing to your ALREADY RUNNING vLLM server (Qwen3-30B-A3B-Instruct)
API_URL = "http://172.16.34.29:8004/v1" 
API_KEY = "EMPTY"
# This model name should match what vLLM expects (often the path or the alias)
MODEL_NAME = "Qwen/Qwen3-30B-A3B-Instruct-2507" 

client = OpenAI(base_url=API_URL, api_key=API_KEY)

# -----------------------------
#  REASONING PROMPT
# -----------------------------
def reasoning_prompt(text, subclaim):
    return f"""You are a senior clinical data validator. A previous automated system flagged a subclaim as 'not_supported'. Your job is to perform a deep-dive reasoning to verify if that judgment was correct.

### CONTEXT:
Medical Text: {text}
Subclaim: {subclaim}

### TASK:
1. Analyze the text for any paraphrased evidence, synonyms, or implicit support for the subclaim.
2. Determine if the previous 'not_supported' label was a "False Negative" (it actually is supported) or a "True Negative" (it is definitely not in the text).
3. Be strict: If the text truly doesn't mention the specifics, stick with 'not_supported'.

### OUTPUT FORMAT:
Provide your internal reasoning first, then conclude with exactly one word: 'supported' or 'not_supported'."""

# -----------------------------
#  LOGIC TO EXTRACT THINKING & LABEL
# -----------------------------
def get_reasoned_verdict(text: str, subclaim: str):
    prompt = reasoning_prompt(text, subclaim)
    
    try:
        response = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[{"role": "user", "content": prompt}],
            temperature=0.1, # Keep it low for consistency
        )
        full_content = response.choices[0].message.content
        
        # Extract reasoning (vLLM usually includes <think> tags for Qwen3-A3B)
        reasoning = ""
        if "<think>" in full_content and "</think>" in full_content:
            reasoning = re.search(r"<think>(.*?)</think>", full_content, re.DOTALL).group(1).strip()
            final_output = full_content.split("</think>")[-1].strip().lower()
        else:
            # Fallback if tags aren't present
            reasoning = "No explicit <think> tags provided."
            final_output = full_content.strip().lower()

        # Final label extraction
        if "not_supported" in final_output:
            label = "not_supported"
        elif "supported" in final_output:
            label = "supported"
        else:
            label = "inconclusive"

        return reasoning, label

    except Exception as e:
        print(f"Error: {e}")
        return str(e), "error_api"

# -----------------------------
#  MAIN PROCESSING
# -----------------------------
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    # Provide the path to the JSON generated by your FIRST script
    parser.add_argument("--input_file", type=str, required=True) 
    parser.add_argument("--save_path", type=str, default="/home/mshahidul/readctrl/data/reasoning/")
    args = parser.parse_args()

    with open(args.input_file, "r") as f:
        data = json.load(f)
    save_path = args.save_path+f"refined_{os.path.basename(args.input_file)}"
    print(f"Loaded {len(data)} documents. Starting reasoning audit...")

    for doc in tqdm.tqdm(data):
        full_text = doc.get('fulltext', '')
        
        for eval_item in doc.get('subclaim_evaluations', []):
            # Only process if the first model said 'not_supported'
            if eval_item['support_label'] == "not_supported":
                subclaim = eval_item['subclaim']
                
                reasoning, new_label = get_reasoned_verdict(full_text, subclaim)
                
                # Update the entry with the new insights
                eval_item['original_label'] = "not_supported"
                eval_item['reasoning_audit'] = reasoning
                eval_item['support_label'] = new_label # Overwriting with refined label
                eval_item['is_refined'] = True
            else:
                eval_item['is_refined'] = False

        # Save every document to avoid data loss
        with open(save_path, "w") as f:
            json.dump(data, f, indent=2, ensure_ascii=False)

    print(f"Refinement complete. Saved to {save_path}")