mmrech
/

pitvqa-training-scripts

Model card Files Files and versions

xet

Community

mmrech commited on Jan 18

Commit

2e3edd5

verified ·

1 Parent(s): dc287cc

Upload merge_stage4_adapter.py with huggingface_hub

Browse files

Files changed (1) hide show

merge_stage4_adapter.py +207 -0

merge_stage4_adapter.py ADDED Viewed

	@@ -0,0 +1,207 @@

+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "torch",
+#     "transformers>=4.40.0",
+#     "peft>=0.10.0",
+#     "accelerate",
+#     "bitsandbytes",
+#     "huggingface_hub>=0.21.0",
+# ]
+# ///
+"""
+Merge Stage 4 (Unified) adapter into base model.
+Stage 4 is trained on ALL tasks, so it can handle:
+- Point localization
+- Bounding box detection
+- Classification
+- Free-form queries
+Run with: hf jobs uv run --flavor a10g-large --secrets HF_TOKEN merge_stage4_adapter.py
+"""
+import os
+import torch
+from pathlib import Path
+# ============================================================
+# Config
+# ============================================================
+UNIFIED_MODEL = "mmrech/pitvqa-qwen2vl-unified-v2"
+BASE_MODEL = "Qwen/Qwen2-VL-2B-Instruct"
+OUTPUT_REPO = "mmrech/pitvqa-qwen2vl-merged"
+# ============================================================
+# Setup
+# ============================================================
+from huggingface_hub import login, HfApi
+hf_token = os.environ.get("HF_TOKEN")
+if hf_token:
+    login(token=hf_token)
+    print("✓ Logged in to HuggingFace")
+api = HfApi()
+# ============================================================
+# Load and Merge
+# ============================================================
+print("\n🤖 Loading base model...")
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+from peft import PeftModel
+# Load base model (full precision for merging)
+base = Qwen2VLForConditionalGeneration.from_pretrained(
+    BASE_MODEL,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    trust_remote_code=True
+)
+print(f"✓ Base model loaded")
+# Load processor
+processor = AutoProcessor.from_pretrained(BASE_MODEL, trust_remote_code=True)
+print(f"✓ Processor loaded")
+# Load Stage 4 adapter (Unified - handles all tasks)
+print("\n📦 Loading Stage 4 (Unified) adapter...")
+model = PeftModel.from_pretrained(
+    base,
+    UNIFIED_MODEL,
+    adapter_name="stage4",
+    subfolder="stage4"
+)
+print(f"✓ Stage 4 adapter loaded")
+# Merge adapter into base model
+print("\n🔗 Merging adapter...")
+merged_model = model.merge_and_unload()
+print(f"✓ Adapter merged")
+# ============================================================
+# Save and Upload
+# ============================================================
+print("\n💾 Saving merged model...")
+output_dir = Path("./pitvqa-merged")
+output_dir.mkdir(exist_ok=True)
+merged_model.save_pretrained(output_dir)
+processor.save_pretrained(output_dir)
+print(f"✓ Saved to {output_dir}")
+# Create model card
+model_card = """---
+license: apache-2.0
+base_model: Qwen/Qwen2-VL-2B-Instruct
+tags:
+  - medical
+  - vision-language
+  - surgical-ai
+  - pituitary-surgery
+  - qwen2-vl
+  - merged-adapter
+---
+# PitVQA Merged Model
+A **merged** version of the PitVQA unified model for pituitary surgery understanding.
+## Model Description
+This model merges the Stage 4 (Unified) LoRA adapter into the Qwen2-VL-2B base model.
+It can handle ALL tasks without adapter switching:
+- **Point Localization**: `<point x='45.2' y='68.3'>suction device</point>`
+- **Bounding Box**: `<box x1='20' y1='30' x2='60' y2='70'>tumor region</box>`
+- **Classification**: Surgical phase identification
+- **Free-form queries**: Any question about the surgical scene
+## Usage
+```python
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+import torch
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    "mmrech/pitvqa-qwen2vl-merged",
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
+)
+processor = AutoProcessor.from_pretrained("mmrech/pitvqa-qwen2vl-merged")
+# No adapter switching needed - just inference
+messages = [{"role": "user", "content": [
+    {"type": "image", "image": your_image},
+    {"type": "text", "text": "Point to the suction device"}
+]}]
+text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+inputs = processor(text=[text], images=[your_image], return_tensors="pt").to(model.device)
+output = model.generate(**inputs, max_new_tokens=128)
+print(processor.decode(output[0], skip_special_tokens=True))
+```
+## Source
+- Base: `Qwen/Qwen2-VL-2B-Instruct`
+- Adapter source: `mmrech/pitvqa-qwen2vl-unified-v2` (Stage 4)
+- Training dataset: `mmrech/pitvqa-comprehensive-spatial`
+"""
+with open(output_dir / "README.md", "w") as f:
+    f.write(model_card)
+print("✓ Created README.md")
+# Upload to HuggingFace
+print(f"\n📤 Uploading to {OUTPUT_REPO}...")
+try:
+    # Create repo if needed
+    api.create_repo(OUTPUT_REPO, exist_ok=True)
+    # Upload all files
+    api.upload_folder(
+        folder_path=str(output_dir),
+        repo_id=OUTPUT_REPO,
+        repo_type="model"
+    )
+    print(f"✓ Uploaded to https://huggingface.co/{OUTPUT_REPO}")
+except Exception as e:
+    print(f"⚠ Upload error: {e}")
+# ============================================================
+# Verify
+# ============================================================
+print("\n🧪 Verifying merged model...")
+# Quick test
+from PIL import Image
+import numpy as np
+# Create test image
+test_image = Image.fromarray(np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8))
+messages = [{"role": "user", "content": [
+    {"type": "image", "image": test_image},
+    {"type": "text", "text": "What do you see in this image?"}
+]}]
+text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+inputs = processor(text=[text], images=[test_image], return_tensors="pt").to(merged_model.device)
+with torch.no_grad():
+    output = merged_model.generate(**inputs, max_new_tokens=50, do_sample=False)
+response = processor.decode(output[0], skip_special_tokens=True)
+print(f"Test response: {response[:200]}...")
+print("\n✅ DONE! Merged model available at:")
+print(f"   https://huggingface.co/{OUTPUT_REPO}")