Spaces:
Runtime error
Runtime error
| import os | |
| import json | |
| import torch | |
| from threading import Thread | |
| from transformers import TextIteratorStreamer | |
| def generate_vqa_response(model, tokenizer, message, history, modality, image): | |
| """ | |
| Generates an educational scenario and Socratic questions via local LLM. | |
| IMPLEMENTED AS GENERATOR FOR STREAMING. | |
| """ | |
| if model is None or tokenizer is None: | |
| yield "### Error\nFailed to load local MedGemma model. Please verify your HF_TOKEN and log output." | |
| return | |
| if image is None: | |
| focus = "physiological markers, systemic interactions, and clinical diagnostic criteria" | |
| system_prompt = f"""You are the Clinical Generalist, a highly specialized medical tutoring AI. Your purpose is to facilitate clinical reasoning, not just provide answers. | |
| Follow this structural protocol for EVERY generation: | |
| 1. CLINICAL OVERVIEW: Provide a brief, concise overview of the pathophysiology or concepts surrounding the query. | |
| 2. SYSTEMIC INVENTORY: Explicitly focus on {focus}. | |
| 3. DIFFERENTIAL REASONING: Mention the primary differential diagnosis but immediately contrast it with a 'mimic'. | |
| 4. SOCRATIC QUESTIONING: Answer the clinician's query Socraticly. Challenge the clinician to justify their reasoning. | |
| Tone: Professional, objective, and Socratic. Ensure you provide a complete answer.""" | |
| else: | |
| if modality == "Chest X-Ray" or modality == "X-Ray": | |
| focus = "costophrenic angles, hilar shadows, and cardiac silhouette" | |
| elif modality == "CT Scan": | |
| focus = "Hounsfield Units (HU), axial cross-sections, and windowing (Lung vs. Soft Tissue)" | |
| elif modality == "MRI": | |
| focus = "T1/T2 weighted signals, contrast enhancement, and multi-planar viewing" | |
| else: | |
| focus = "key anatomical landmarks" | |
| system_prompt = f"""You are the NerdMedica Socratic Auditor, a highly specialized medical tutoring AI. Your purpose is to facilitate clinical reasoning, not just provide answers. | |
| Follow this structural protocol for EVERY generation: | |
| 1. CLINICAL SCENARIO: Create a brief, realistic 3-sentence patient history (Age, Chief Complaint, Vitals) that matches the pathology seen in the provided {modality}. | |
| 2. ANATOMICAL INVENTORY: Explicitly focus on {focus}. | |
| 3. DIFFERENTIAL REASONING: Mention the primary finding but immediately contrast it with a 'mimic'. | |
| 4. SOCRATIC QUESTIONING: Answer the clinician's query Socraticly. Challenge the clinician to justify their diagnosis based on visual evidence. | |
| Tone: Professional, objective, and Socratic. Ensure you provide a complete answer.""" | |
| prompt_content = f"Clinician Question: {message}" | |
| history_text = "" | |
| if history: | |
| for msg in history: | |
| role = "User" if msg["role"] == "user" else "AI" | |
| content = msg["content"] | |
| history_text += f"\n{role}: {content}" | |
| if history_text: | |
| formatted_prompt = f"{system_prompt}\n\nChat History:{history_text}\n\n{prompt_content}" | |
| else: | |
| formatted_prompt = f"{system_prompt}\n\n{prompt_content}" | |
| messages = [ | |
| {"role": "user", "content": formatted_prompt} | |
| ] | |
| print("Generating NerdMedica feedback using MedGemma (Streaming)...") | |
| try: | |
| prompt_str = tokenizer.apply_chat_template( | |
| messages, add_generation_prompt=True, tokenize=False | |
| ) | |
| inputs = tokenizer(prompt_str, return_tensors="pt") | |
| inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
| streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| **inputs, | |
| streamer=streamer, | |
| max_new_tokens=512, | |
| temperature=0.1, | |
| top_p=0.9, | |
| repetition_penalty=1.2, | |
| do_sample=True | |
| ) | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| generated_text = "" | |
| for new_text in streamer: | |
| generated_text += new_text | |
| yield generated_text | |
| except Exception as e: | |
| print(f"Error during MedGemma generation: {e}") | |
| yield f"### Generation Error\nSystem Recalibrating: VRAM constraint exceeded or model error encountered. Details: {e}" | |