Spaces:

hssling
/

diagnostic-copilot-api

Sleeping

App Files Files Community

diagnostic-copilot-api / app.py

hssling

Restore Gradio launch block internally, depending on pinned hub version

eef4b5e 9 days ago

raw

history blame contribute delete

3.89 kB

	import gradio as gr
	import torch
	from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
	from PIL import Image

	# 1. HuggingFace Space Deployment Settings
	MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct" # Base model
	# To use your fine-tuned model from Kaggle:
	# 1. model.push_to_hub("your-name/med-qwen-vl-adapter")
	# 2. Add adapter load here for PEFT
	ADAPTER_ID = "hssling/med-qwen-vl-adapter"

	# Initialize Model and Processor globally
	print("Starting App Engine...")
	print(f"Loading {MODEL_ID}...")

	device = "cuda" if torch.cuda.is_available() else "cpu"
	processor = AutoProcessor.from_pretrained(MODEL_ID)
	model = Qwen2VLForConditionalGeneration.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float16 if device == "cuda" else torch.float32,
	device_map="auto"
	)

	if ADAPTER_ID:
	print(f"Loading custom fine-tuned LoRA weights: {ADAPTER_ID}")
	model.load_adapter(ADAPTER_ID)

	# 2. Main API Function called by our Next App
	def diagnose_api(history: str, examination: str, image: Image.Image = None, audio_path: str = None, temp: float = 0.2, max_tokens: int = 1500):
	try:
	if image is None:
	# Fallback if no image is passed
	return "Error: Qwen-VL requires at least one image/diagnostic input to function accurately."

	# Re-construct the specific structured prompt our diagnostic copilot uses
	system_prompt = "You are a highly advanced Multi-Modal Diagnostic Co-Pilot Medical AI. Provide ## Integrated Analysis, ## Decision Making, and ## Management & Treatment Plan."
	user_prompt = f"History: {history}\nExamination: {examination}\nAnalyze the provided scan and history."

	messages = [
	{"role": "system", "content": system_prompt},
	{
	"role": "user",
	"content": [
	{"type": "image"},
	{"type": "text", "text": user_prompt}
	]
	}
	]

	text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	inputs = processor(
	text=[text_input],
	images=[image],
	padding=True,
	return_tensors="pt"
	).to(device)

	with torch.no_grad():
	generated_ids = model.generate(**inputs, max_new_tokens=int(max_tokens), temperature=float(temp), top_p=0.9, do_sample=True)

	generated_ids_trimmed = [
	out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
	]

	output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

	return output_text

	except Exception as e:
	return f"Model Error: {str(e)}"

	# 3. Create the Gradio interface
	# This acts as the visual UI for the HF Space, but more importantly,
	# exposes an API endpoint via `/api/predict` that our React app can connect to securely.
	demo = gr.Interface(
	fn=diagnose_api,
	inputs=[
	gr.Textbox(lines=5, label="Patient History (String)", placeholder="Age, symptoms, past medical history..."),
	gr.Textbox(lines=5, label="Examination Findings (String)", placeholder="Vitals, systemic exam..."),
	gr.Image(type="pil", label="Diagnostic Scan / Image"),
	gr.Audio(type="filepath", label="Optional Dictation Audio", visible=False),
	gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, label="Temperature (Creativity)"),
	gr.Slider(minimum=256, maximum=4096, value=1500, step=256, label="Max Output Tokens")
	],
	outputs=gr.Markdown(label="Clinical Report Output"),
	title="Multi-Modal Diagnostic Co-Pilot API (Trained via Kaggle)",
	description="This Space hosts the fine-tuned medical vision-language model for the Diagnostic Co-Pilot ecosystem."
	)

	if __name__ == "__main__":
	demo.launch()