import os import json import torch from threading import Thread from transformers import TextIteratorStreamer def generate_vqa_response(model, tokenizer, message, history, modality, image): """ Generates an educational scenario and Socratic questions via local LLM. IMPLEMENTED AS GENERATOR FOR STREAMING. """ if model is None or tokenizer is None: yield "### Error\nFailed to load local MedGemma model. Please verify your HF_TOKEN and log output." return if image is None: focus = "physiological markers, systemic interactions, and clinical diagnostic criteria" system_prompt = f"""You are the Clinical Generalist, a highly specialized medical tutoring AI. Your purpose is to facilitate clinical reasoning, not just provide answers. Follow this structural protocol for EVERY generation: 1. CLINICAL OVERVIEW: Provide a brief, concise overview of the pathophysiology or concepts surrounding the query. 2. SYSTEMIC INVENTORY: Explicitly focus on {focus}. 3. DIFFERENTIAL REASONING: Mention the primary differential diagnosis but immediately contrast it with a 'mimic'. 4. SOCRATIC QUESTIONING: Answer the clinician's query Socraticly. Challenge the clinician to justify their reasoning. Tone: Professional, objective, and Socratic. Ensure you provide a complete answer.""" else: if modality == "Chest X-Ray" or modality == "X-Ray": focus = "costophrenic angles, hilar shadows, and cardiac silhouette" elif modality == "CT Scan": focus = "Hounsfield Units (HU), axial cross-sections, and windowing (Lung vs. Soft Tissue)" elif modality == "MRI": focus = "T1/T2 weighted signals, contrast enhancement, and multi-planar viewing" else: focus = "key anatomical landmarks" system_prompt = f"""You are the NerdMedica Socratic Auditor, a highly specialized medical tutoring AI. Your purpose is to facilitate clinical reasoning, not just provide answers. Follow this structural protocol for EVERY generation: 1. CLINICAL SCENARIO: Create a brief, realistic 3-sentence patient history (Age, Chief Complaint, Vitals) that matches the pathology seen in the provided {modality}. 2. ANATOMICAL INVENTORY: Explicitly focus on {focus}. 3. DIFFERENTIAL REASONING: Mention the primary finding but immediately contrast it with a 'mimic'. 4. SOCRATIC QUESTIONING: Answer the clinician's query Socraticly. Challenge the clinician to justify their diagnosis based on visual evidence. Tone: Professional, objective, and Socratic. Ensure you provide a complete answer.""" prompt_content = f"Clinician Question: {message}" history_text = "" if history: for msg in history: role = "User" if msg["role"] == "user" else "AI" content = msg["content"] history_text += f"\n{role}: {content}" if history_text: formatted_prompt = f"{system_prompt}\n\nChat History:{history_text}\n\n{prompt_content}" else: formatted_prompt = f"{system_prompt}\n\n{prompt_content}" messages = [ {"role": "user", "content": formatted_prompt} ] print("Generating NerdMedica feedback using MedGemma (Streaming)...") try: prompt_str = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=False ) inputs = tokenizer(prompt_str, return_tensors="pt") inputs = {k: v.to(model.device) for k, v in inputs.items()} streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) generation_kwargs = dict( **inputs, streamer=streamer, max_new_tokens=512, temperature=0.1, top_p=0.9, repetition_penalty=1.2, do_sample=True ) thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() generated_text = "" for new_text in streamer: generated_text += new_text yield generated_text except Exception as e: print(f"Error during MedGemma generation: {e}") yield f"### Generation Error\nSystem Recalibrating: VRAM constraint exceeded or model error encountered. Details: {e}"