Spaces:

aqibmumtazbits
/

test

No application file

App Files Files Community

test / app.py

aqibmumtazbits

Create app.py

6e89446 verified 10 days ago

raw

history blame contribute delete

5.24 kB

	import torch
	import gradio as gr
	from PIL import Image
	from transformers import Qwen2_5_VLForConditionalGeneration, Qwen2_5_VLProcessor

	# ---------------------------------------------------------------------------
	# Configuration
	# ---------------------------------------------------------------------------
	MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32

	DEFAULT_PROMPT = (
	"Do you see any abnormality in the chest? Write briefly. "
	"If yes, also tell where the abnormality is in which part of the chest. "
	"The chest parts include lungs, heart and vessels, spine, diaphragm, "
	"soft tissues, Mediastinum and bones of chest shown in image. "
	"Respond only in English. Do NOT use any other language. "
	"Do not use Chinese language."
	)

	# ---------------------------------------------------------------------------
	# Load model & processor
	# ---------------------------------------------------------------------------
	print(f"Loading model: {MODEL_ID}")
	model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
	MODEL_ID,
	torch_dtype=DTYPE,
	device_map="auto" if torch.cuda.is_available() else None,
	)
	if not torch.cuda.is_available():
	model = model.to(DEVICE)

	# Skip video_processor attribute to avoid torchvision dependency
	_orig_attrs = Qwen2_5_VLProcessor.attributes[:]
	Qwen2_5_VLProcessor.attributes = [a for a in _orig_attrs if a != "video_processor"]
	processor = Qwen2_5_VLProcessor.from_pretrained(MODEL_ID)
	Qwen2_5_VLProcessor.attributes = _orig_attrs
	print("Model loaded successfully.")


	# ---------------------------------------------------------------------------
	# Helpers
	# ---------------------------------------------------------------------------
	def pad_to_square(image: Image.Image) -> Image.Image:
	width, height = image.size
	if width == height:
	return image
	max_dim = max(width, height)
	new_image = Image.new("RGB", (max_dim, max_dim), (0, 0, 0))
	new_image.paste(image, ((max_dim - width) // 2, (max_dim - height) // 2))
	return new_image


	# ---------------------------------------------------------------------------
	# Inference
	# ---------------------------------------------------------------------------
	def predict(image: Image.Image, prompt: str, max_new_tokens: int, temperature: float):
	if image is None:
	return "Please upload a chest X-ray image."

	if image.mode != "RGB":
	image = image.convert("RGB")

	image = pad_to_square(image)

	if not prompt.strip():
	prompt = DEFAULT_PROMPT

	messages = [
	{
	"role": "user",
	"content": [
	{"type": "image", "image": image},
	{"type": "text", "text": prompt},
	],
	}
	]

	text = processor.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	inputs = processor(
	text=[text], images=[image], return_tensors="pt", padding=True
	).to(model.device)

	with torch.no_grad():
	generated_ids = model.generate(
	**inputs,
	max_new_tokens=int(max_new_tokens),
	do_sample=temperature > 0,
	temperature=temperature if temperature > 0 else 1.0,
	)

	generated_ids_trimmed = [
	out_ids[len(in_ids):]
	for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
	]

	return processor.batch_decode(
	generated_ids_trimmed,
	skip_special_tokens=True,
	clean_up_tokenization_spaces=False,
	)[0]


	# ---------------------------------------------------------------------------
	# Gradio UI
	# ---------------------------------------------------------------------------
	with gr.Blocks(
	title="Chest X-Ray Analysis — Qwen2.5-VL-3B",
	theme=gr.themes.Soft(),
	) as demo:
	gr.Markdown(
	"# Chest X-Ray Analysis\n"
	"Upload a chest X-ray and get an automated report "
	"powered by Qwen2.5-VL-3B-Instruct."
	)

	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(type="pil", label="Upload Chest X-Ray")
	prompt_input = gr.Textbox(
	label="Prompt",
	value=DEFAULT_PROMPT,
	lines=4,
	)
	with gr.Row():
	max_tokens_slider = gr.Slider(
	minimum=64, maximum=1024, value=512, step=64,
	label="Max New Tokens",
	)
	temperature_slider = gr.Slider(
	minimum=0.0, maximum=1.5, value=0.3, step=0.05,
	label="Temperature (0 = greedy)",
	)
	submit_btn = gr.Button("Analyze", variant="primary")

	with gr.Column(scale=1):
	output_text = gr.Textbox(label="Model Report", lines=20)

	submit_btn.click(
	predict,
	inputs=[image_input, prompt_input, max_tokens_slider, temperature_slider],
	outputs=output_text,
	)

	gr.Markdown(
	"---\n"
	"Research purposes only — not a substitute for professional medical diagnosis."
	)

	if __name__ == "__main__":
	demo.launch()