Spaces:
Sleeping
Sleeping
File size: 3,892 Bytes
ccb8f07 c238547 ccb8f07 b66ce1f ccb8f07 c238547 ccb8f07 e83f1b8 ccb8f07 e83f1b8 ccb8f07 e83f1b8 ccb8f07 eef4b5e fdbfd6d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | import gradio as gr
import torch
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
from PIL import Image
# 1. HuggingFace Space Deployment Settings
MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct" # Base model
# To use your fine-tuned model from Kaggle:
# 1. model.push_to_hub("your-name/med-qwen-vl-adapter")
# 2. Add adapter load here for PEFT
ADAPTER_ID = "hssling/med-qwen-vl-adapter"
# Initialize Model and Processor globally
print("Starting App Engine...")
print(f"Loading {MODEL_ID}...")
device = "cuda" if torch.cuda.is_available() else "cpu"
processor = AutoProcessor.from_pretrained(MODEL_ID)
model = Qwen2VLForConditionalGeneration.from_pretrained(
MODEL_ID,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
device_map="auto"
)
if ADAPTER_ID:
print(f"Loading custom fine-tuned LoRA weights: {ADAPTER_ID}")
model.load_adapter(ADAPTER_ID)
# 2. Main API Function called by our Next App
def diagnose_api(history: str, examination: str, image: Image.Image = None, audio_path: str = None, temp: float = 0.2, max_tokens: int = 1500):
try:
if image is None:
# Fallback if no image is passed
return "Error: Qwen-VL requires at least one image/diagnostic input to function accurately."
# Re-construct the specific structured prompt our diagnostic copilot uses
system_prompt = "You are a highly advanced Multi-Modal Diagnostic Co-Pilot Medical AI. Provide ## Integrated Analysis, ## Decision Making, and ## Management & Treatment Plan."
user_prompt = f"History: {history}\nExamination: {examination}\nAnalyze the provided scan and history."
messages = [
{"role": "system", "content": system_prompt},
{
"role": "user",
"content": [
{"type": "image"},
{"type": "text", "text": user_prompt}
]
}
]
text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = processor(
text=[text_input],
images=[image],
padding=True,
return_tensors="pt"
).to(device)
with torch.no_grad():
generated_ids = model.generate(**inputs, max_new_tokens=int(max_tokens), temperature=float(temp), top_p=0.9, do_sample=True)
generated_ids_trimmed = [
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
return output_text
except Exception as e:
return f"Model Error: {str(e)}"
# 3. Create the Gradio interface
# This acts as the visual UI for the HF Space, but more importantly,
# exposes an API endpoint via `/api/predict` that our React app can connect to securely.
demo = gr.Interface(
fn=diagnose_api,
inputs=[
gr.Textbox(lines=5, label="Patient History (String)", placeholder="Age, symptoms, past medical history..."),
gr.Textbox(lines=5, label="Examination Findings (String)", placeholder="Vitals, systemic exam..."),
gr.Image(type="pil", label="Diagnostic Scan / Image"),
gr.Audio(type="filepath", label="Optional Dictation Audio", visible=False),
gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, label="Temperature (Creativity)"),
gr.Slider(minimum=256, maximum=4096, value=1500, step=256, label="Max Output Tokens")
],
outputs=gr.Markdown(label="Clinical Report Output"),
title="Multi-Modal Diagnostic Co-Pilot API (Trained via Kaggle)",
description="This Space hosts the fine-tuned medical vision-language model for the Diagnostic Co-Pilot ecosystem."
)
if __name__ == "__main__":
demo.launch()
|