| | import os |
| | import json |
| | import tqdm |
| | from openai import OpenAI |
| |
|
| | |
| | MODEL_PATH = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx_v2-bf16" |
| | API_URL = "http://172.16.34.29:8004/v1" |
| | API_KEY = "EMPTY" |
| |
|
| | client = OpenAI(base_url=API_URL, api_key=API_KEY) |
| |
|
| | def get_reasoning_prompt_json(source_text, gold_summary, generated_text, subclaim, level): |
| | """ |
| | Forces the model to output a machine-readable JSON object for clinical logic validation. |
| | """ |
| | return f"""You are a clinical logic validator auditing medical text simplification. |
| | |
| | ### Context & Goals: |
| | - **Target Literacy Level:** {level} |
| | |
| | 1. Level: Low Health Literacy (High Readability) |
| | |
| | Target: Individuals needing the simplest terms for immediate action. |
| | |
| | Linguistic Goal: Use "living room" language. Replace all medical jargon with functional descriptions (e.g., "renal" becomes "kidney"). |
| | |
| | Information Density: Focus strictly on the "need-to-know" info found in the Gold Summary. |
| | |
| | Strategy: High paraphrasing using analogies. One idea per sentence. |
| | |
| | Faithfulness: Must align perfectly with the Gold Summary. |
| | |
| | 2. Level: Intermediate Health Literacy (Medium Readability) |
| | |
| | Target: The general public (news-reading level). |
| | |
| | Linguistic Goal: Standard vocabulary. Common medical terms are okay, but technical "doctor-speak" must be simplified. |
| | |
| | Information Density: Balanced. Use the Gold Summary as the lead, supplemented by necessary context from the Source Text. |
| | |
| | Strategy: Moderate paraphrasing. Remove minor technical details to avoid information overload. |
| | |
| | Faithfulness: Maintains the main narrative of the Gold Summary. |
| | |
| | 3. Level: Proficient Health Literacy (Low Readability) |
| | |
| | Target: Researchers, clinicians, or highly informed patients. |
| | |
| | Linguistic Goal: Technical and academic language. Prioritize clinical nuance and medical accuracy. |
| | |
| | Information Density: High. Use the Full Source Text to include data, physiological mechanisms, and statistics. |
| | |
| | Strategy: Minimal paraphrasing. Retain all original technical terminology. |
| | |
| | Faithfulness: Adhere to the Source Text; you may add related subclaims that provide deeper scientific context. |
| | |
| | ### Input Data: |
| | 1. **Source Text:** {source_text} |
| | 2. **Gold Summary (Reference):** {gold_summary} |
| | 3. **Generated Text (Output):** {generated_text} |
| | 4. **Subclaim to Evaluate:** {subclaim} |
| | |
| | ### Task: |
| | Evaluate the Subclaim's status in the Generated Text compared to the Source and Gold Summary. Output ONLY a JSON object. |
| | |
| | ### Classification Categories: |
| | - "reasonable_removal": Subclaim in Source, but NOT in Gold (non-essential). |
| | - "reasonable_modification": Subclaim simplified correctly for the {level} goal. |
| | - "unreasonable_removal": Subclaim in Gold but MISSING from Generated (critical loss). |
| | - "unreasonable_addition": Subclaim in Generated but NOT in Source/Gold (hallucination). |
| | - "preserved": Fact maintained with high fidelity. |
| | |
| | ### JSON Schema Requirement: |
| | {{ |
| | "category": "string (reasonable_removal | reasonable_modification | unreasonable_removal | unreasonable_addition | preserved)", |
| | "action": "string (added | removed | modified | preserved)", |
| | "presence_in_gold": "boolean", |
| | "presence_in_generated": "boolean", |
| | "verdict": "string (one sentence clinical justification)" |
| | }} |
| | |
| | Output JSON:""" |
| |
|
| | def evaluate_reasoning_json(source, gold, generated, subclaim, level): |
| | prompt = get_reasoning_prompt_json(source, gold, generated, subclaim, level) |
| | |
| | try: |
| | response = client.chat.completions.create( |
| | model=MODEL_PATH, |
| | messages=[{"role": "user", "content": prompt}], |
| | max_tokens=400, |
| | temperature=0.1, |
| | |
| | ) |
| | content = response.choices[0].message.content.strip() |
| | |
| | |
| | if content.startswith("```json"): |
| | content = content.replace("```json", "").replace("```", "").strip() |
| | |
| | return json.loads(content) |
| | except Exception as e: |
| | return { |
| | "category": "error", |
| | "action": "error", |
| | "verdict": f"API or Parsing Error: {str(e)}" |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |