| | import json |
| | import os |
| | |
| | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" |
| | os.environ["CUDA_VISIBLE_DEVICES"] = "2" |
| | import torch |
| | from unsloth import FastLanguageModel |
| | from transformers import TextStreamer |
| |
|
| | |
| | model_path = "/home/mshahidul/readctrl_model/RL_model/readability_sft_lora_model" |
| | max_seq_length = 8192 |
| |
|
| | |
| | |
| | model, tokenizer = FastLanguageModel.from_pretrained( |
| | model_name = model_path, |
| | max_seq_length = max_seq_length, |
| | load_in_4bit = False, |
| | ) |
| |
|
| | |
| | |
| | FastLanguageModel.for_inference(model) |
| |
|
| | |
| | |
| | gold_summary = "A 34-year-old pregnant woman presents with seizures and dysarthria and is urgently referred for a cranial MRI. The classic ‘Medusa head’ sign is seen and the diagnosis is made as a venous anomaly of development with peripheral partial thrombosis and proximal slow flow.\n" |
| | fulltext = "We present the case of a 34-year-old woman, eight weeks pregnant with no other personal history of interest, who presents to the emergency department with generalized convulsions with dysarthria in the postcritical period, which resolve progressively in less than two hours. On physical examination, she is conscious, oriented, with no language or motor or sensory deficits. Only signs of a right lateral tongue bite are observed.\n\nThe complementary tests, such as blood tests or the electrocardiogram, are normal. Given that the episode corresponds with a first epileptic seizure and the patient is pregnant, an urgent magnetic resonance of the skull is requested.\n\nThe usual protocol was performed and 3D T1 sequences without and with intravenous contrast were obtained in axial, coronal and sagital planes, axial FLAIR, axial T2, VEN BOLD and magnetic susceptibility sequences, as well as axial diffusion and apparent diffusion coefficient map. The MRI identified multiple venous cortico-medullary vascular structures converging centripetally to a large central venous structure draining through the inferior anastomotic vein into the left transverse sinus, forming the classic ‘Medusa head’ sign. In the T1 sequences, the drainage vein was seen to be increased in signal with central hyphocaptation after contrast administration, suggesting partial thrombosis versus slow flow. In addition, in T2 and FLAIR sequences, the brain tissue surrounding the drainage vein was seen to be hyperintense, without diffusion restriction and compatible with edema.\n\nThese findings are suggestive of a venous anomaly of development with signs of partial peripheral thrombosis and slow flow more proximal, which cause edema of the surrounding tissue. She is started on clexane 60 mg/12 hours and levetiracetam 500 mg/12 hours and the patient shows improvement and symptomatic stability after one week.\n" |
| | |
| |
|
| | |
| | system_prompt = f""" |
| | **System Role:** |
| | |
| | You are an expert medical editor and Health Literacy specialist. Your task is to transform complex medical text into three distinct versions based on the reader's health literacy level. You must maintain the source language of the input while adjusting the linguistic complexity. Use the provided Gold Summary as the factual anchor to ensure the simplified versions remain accurate and focused on the most important information. |
| | |
| | **User Prompt:** |
| | |
| | Please process the following medical Source Text and its corresponding Gold Summary to generate three versions tailored to different health literacy levels. |
| | ### Instructions for Each Level: |
| | |
| | 1. Level: Low Health Literacy (High Readability) |
| | |
| | Target: Individuals needing the simplest terms for immediate action. |
| | |
| | Linguistic Goal: Use "living room" language. Replace all medical jargon with functional descriptions (e.g., "renal" becomes "kidney"). |
| | |
| | Information Density: Focus strictly on the "need-to-know" info found in the Gold Summary. |
| | |
| | Strategy: High paraphrasing using analogies. One idea per sentence. |
| | |
| | Faithfulness: Must align perfectly with the Gold Summary. |
| | |
| | 2. Level: Intermediate Health Literacy (Medium Readability) |
| | |
| | Target: The general public (news-reading level). |
| | |
| | Linguistic Goal: Standard vocabulary. Common medical terms are okay, but technical "doctor-speak" must be simplified. |
| | |
| | Information Density: Balanced. Use the Gold Summary as the lead, supplemented by necessary context from the Source Text. |
| | |
| | Strategy: Moderate paraphrasing. Remove minor technical details to avoid information overload. |
| | |
| | Faithfulness: Maintains the main narrative of the Gold Summary. |
| | |
| | 3. Level: Proficient Health Literacy (Low Readability) |
| | |
| | Target: Researchers, clinicians, or highly informed patients. |
| | |
| | Linguistic Goal: Technical and academic language. Prioritize clinical nuance and medical accuracy. |
| | |
| | Information Density: High. Use the Full Source Text to include data, physiological mechanisms, and statistics. |
| | |
| | Strategy: Minimal paraphrasing. Retain all original technical terminology. |
| | |
| | Faithfulness: Adhere to the Source Text; you may add related subclaims that provide deeper scientific context. |
| | |
| | Input Language: English |
| | Gold Summary (The Anchor): |
| | {gold_summary} |
| | Source Text (The Detail): |
| | {fulltext} |
| | |
| | **Output Format (JSON only):** |
| | {{ |
| | "low_health_literacy": "...", |
| | "intermediate_health_literacy": "...", |
| | "proficient_health_literacy": "..." |
| | }} |
| | """ |
| |
|
| | |
| | messages = [ |
| | {"role": "user", "content": system_prompt} |
| | ] |
| |
|
| | input_text = tokenizer.apply_chat_template( |
| | messages, |
| | tokenize = False, |
| | add_generation_prompt = True, |
| | ) |
| |
|
| | inputs = tokenizer([input_text], return_tensors = "pt").to("cuda") |
| |
|
| | |
| | |
| | text_streamer = TextStreamer(tokenizer, skip_prompt = True,skip_special_tokens = True) |
| |
|
| | print("--- Model Response ---") |
| | _ = model.generate( |
| | **inputs, |
| | streamer = text_streamer, |
| | max_new_tokens = 2048, |
| | temperature = 0.7, |
| | top_p = 0.8, |
| | top_k = 20, |
| | repetition_penalty = 1.05, |
| | use_cache = True, |
| | ) |