hssling commited on
Commit
11368a6
·
1 Parent(s): eb170be

Stabilize HF Space runtime: quantized loading, version pins, adapter revision config

Browse files
Files changed (4) hide show
  1. app.py +65 -15
  2. kaggle_retrain_and_deploy.py +269 -0
  3. model_config.json +5 -0
  4. requirements.txt +10 -9
app.py CHANGED
@@ -1,29 +1,77 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
 
4
  from PIL import Image
5
  import json
 
6
 
7
- MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
8
- ADAPTER_ID = "hssling/cardioai-adapter"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  print("Starting App Engine...")
 
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
- processor = AutoProcessor.from_pretrained(MODEL_ID)
13
- model = Qwen2VLForConditionalGeneration.from_pretrained(
14
- MODEL_ID,
15
- torch_dtype=torch.float16 if device == "cuda" else torch.float32,
16
- device_map="auto"
17
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  if ADAPTER_ID:
20
- print(f"Loading custom fine-tuned LoRA weights: {ADAPTER_ID}")
21
  try:
22
- model.load_adapter(ADAPTER_ID)
 
 
 
 
 
 
23
  except Exception as e:
24
- print(f"Failed to load adapter. Using base model. Error: {e}")
25
 
26
- def diagnose_ecg(image: Image.Image = None, temp: float = 0.4, max_tokens: int = 2000):
27
  try:
28
  if image is None:
29
  return json.dumps({"error": "No image provided."})
@@ -49,7 +97,9 @@ def diagnose_ecg(image: Image.Image = None, temp: float = 0.4, max_tokens: int =
49
  images=[image],
50
  padding=True,
51
  return_tensors="pt"
52
- ).to(device)
 
 
53
 
54
  with torch.no_grad():
55
  generated_ids = model.generate(**inputs, max_new_tokens=int(max_tokens), temperature=float(temp), top_p=0.9, do_sample=True)
@@ -70,7 +120,7 @@ demo = gr.Interface(
70
  inputs=[
71
  gr.Image(type="pil", label="ECG Image Scan"),
72
  gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.1, label="Temperature"),
73
- gr.Slider(minimum=256, maximum=4096, value=2000, step=256, label="Max Tokens")
74
  ],
75
  outputs=gr.Markdown(label="Clinical Report Output"),
76
  title="CardioAI Inference API",
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoProcessor, Qwen2VLForConditionalGeneration, BitsAndBytesConfig
4
+ from peft import PeftModel
5
  from PIL import Image
6
  import json
7
+ import os
8
 
9
+ DEFAULT_MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
10
+ DEFAULT_ADAPTER_ID = "hssling/cardioai-adapter"
11
+ CONFIG_PATH = "model_config.json"
12
+
13
+ def load_runtime_config():
14
+ config = {
15
+ "base_model": os.environ.get("BASE_MODEL_ID", DEFAULT_MODEL_ID),
16
+ "adapter_repo": os.environ.get("ADAPTER_REPO_ID", DEFAULT_ADAPTER_ID),
17
+ "adapter_revision": os.environ.get("ADAPTER_REVISION", "main")
18
+ }
19
+ if os.path.exists(CONFIG_PATH):
20
+ try:
21
+ with open(CONFIG_PATH, "r", encoding="utf-8") as f:
22
+ disk_cfg = json.load(f)
23
+ config["base_model"] = disk_cfg.get("base_model", config["base_model"])
24
+ config["adapter_repo"] = disk_cfg.get("adapter_repo", config["adapter_repo"])
25
+ config["adapter_revision"] = disk_cfg.get("adapter_revision", config["adapter_revision"])
26
+ except Exception as e:
27
+ print(f"Failed to read {CONFIG_PATH}; falling back to defaults. Error: {e}")
28
+ return config
29
+
30
+ cfg = load_runtime_config()
31
+ MODEL_ID = cfg["base_model"]
32
+ ADAPTER_ID = cfg["adapter_repo"]
33
+ ADAPTER_REV = cfg["adapter_revision"]
34
 
35
  print("Starting App Engine...")
36
+ os.makedirs("/tmp/offload", exist_ok=True)
37
  device = "cuda" if torch.cuda.is_available() else "cpu"
38
+ processor = AutoProcessor.from_pretrained(MODEL_ID, use_fast=False)
39
+
40
+ model_kwargs = {
41
+ "pretrained_model_name_or_path": MODEL_ID,
42
+ "device_map": "auto",
43
+ "low_cpu_mem_usage": True,
44
+ "offload_folder": "/tmp/offload"
45
+ }
46
+
47
+ if device == "cuda":
48
+ model_kwargs["torch_dtype"] = torch.float16
49
+ model_kwargs["quantization_config"] = BitsAndBytesConfig(
50
+ load_in_4bit=True,
51
+ bnb_4bit_quant_type="nf4",
52
+ bnb_4bit_compute_dtype=torch.float16,
53
+ bnb_4bit_use_double_quant=True
54
+ )
55
+ else:
56
+ # CPU space: keep dtype low to reduce memory footprint.
57
+ model_kwargs["torch_dtype"] = torch.float16
58
+
59
+ model = Qwen2VLForConditionalGeneration.from_pretrained(**model_kwargs)
60
 
61
  if ADAPTER_ID:
62
+ print(f"Loading custom fine-tuned LoRA weights: {ADAPTER_ID}@{ADAPTER_REV}")
63
  try:
64
+ model = PeftModel.from_pretrained(
65
+ model,
66
+ ADAPTER_ID,
67
+ revision=ADAPTER_REV,
68
+ is_trainable=False
69
+ )
70
+ print("Adapter load successful.")
71
  except Exception as e:
72
+ print(f"Failed to load adapter; serving base model instead. Error: {e}")
73
 
74
+ def diagnose_ecg(image: Image.Image = None, temp: float = 0.4, max_tokens: int = 768):
75
  try:
76
  if image is None:
77
  return json.dumps({"error": "No image provided."})
 
97
  images=[image],
98
  padding=True,
99
  return_tensors="pt"
100
+ )
101
+ model_device = model.device if hasattr(model, "device") else torch.device(device)
102
+ inputs = {k: v.to(model_device) for k, v in inputs.items()}
103
 
104
  with torch.no_grad():
105
  generated_ids = model.generate(**inputs, max_new_tokens=int(max_tokens), temperature=float(temp), top_p=0.9, do_sample=True)
 
120
  inputs=[
121
  gr.Image(type="pil", label="ECG Image Scan"),
122
  gr.Slider(minimum=0.0, maximum=1.0, value=0.4, step=0.1, label="Temperature"),
123
+ gr.Slider(minimum=128, maximum=1536, value=768, step=128, label="Max Tokens")
124
  ],
125
  outputs=gr.Markdown(label="Clinical Report Output"),
126
  title="CardioAI Inference API",
kaggle_retrain_and_deploy.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %% [markdown]
2
+ # CardioAI Kaggle Notebook: Retrain + Deploy to Hugging Face Space
3
+ #
4
+ # This script is notebook-friendly (run cell by cell in Kaggle).
5
+ # Outcome:
6
+ # 1) Fine-tune LoRA adapter on ECG image dataset.
7
+ # 2) Push adapter to HF model repo.
8
+ # 3) Auto-update HF Space config so app serves the new adapter revision.
9
+
10
+ # %% Install deps (run once in a Kaggle cell)
11
+ # !pip -q install -U "transformers>=4.49.0" "datasets>=2.19.0" "accelerate>=0.34.0" "peft>=0.13.0" "huggingface_hub>=0.26.0" "Pillow>=10.0.0"
12
+ # !pip -q install -U "bitsandbytes>=0.46.1"
13
+ # # After installing/upgrading bitsandbytes on Kaggle, restart session once, then run all cells.
14
+
15
+ # %%
16
+ import os
17
+ import json
18
+ import random
19
+ from dataclasses import dataclass
20
+ from typing import Dict, Any, List
21
+
22
+ import torch
23
+ from datasets import load_dataset
24
+ from huggingface_hub import HfApi
25
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
26
+ from transformers import (
27
+ AutoProcessor,
28
+ BitsAndBytesConfig,
29
+ Qwen2VLForConditionalGeneration,
30
+ Trainer,
31
+ TrainingArguments
32
+ )
33
+
34
+ # %%
35
+ # ----------------------------
36
+ # CONFIG (edit these values)
37
+ # ----------------------------
38
+ BASE_MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
39
+ DATASET_ID = "IdaFLab/ECG-Plot-Images" # Suitable ECG plot dataset used in your current pipeline
40
+ DATASET_SPLIT = "train[:3000]" # Raise when stable (e.g. full train)
41
+
42
+ HF_ADAPTER_REPO = "hssling/cardioai-adapter"
43
+ HF_SPACE_REPO = "hssling/cardioai-api" # Space repo to auto-point to newest adapter revision
44
+
45
+ OUTPUT_DIR = "/kaggle/working/cardioai_adapter"
46
+ SEED = 42
47
+
48
+ EPOCHS = 2
49
+ LR = 2e-4
50
+ TRAIN_BATCH_SIZE = 2
51
+ GRAD_ACCUM = 4
52
+ MAX_TOKENS = 768
53
+ LOAD_IN_4BIT = True
54
+
55
+ # %%
56
+ # ----------------------------
57
+ # Auth from Kaggle Secrets
58
+ # ----------------------------
59
+ try:
60
+ from kaggle_secrets import UserSecretsClient
61
+ _secrets = UserSecretsClient()
62
+ HF_TOKEN = _secrets.get_secret("HF_TOKEN")
63
+ except Exception as e:
64
+ raise RuntimeError("Missing Kaggle secret HF_TOKEN") from e
65
+
66
+ os.environ["HF_TOKEN"] = HF_TOKEN
67
+ api = HfApi(token=HF_TOKEN)
68
+
69
+ random.seed(SEED)
70
+ torch.manual_seed(SEED)
71
+
72
+ print("Authenticated to Hugging Face Hub.")
73
+
74
+ # %%
75
+ def has_compatible_bitsandbytes() -> bool:
76
+ try:
77
+ import bitsandbytes as bnb # type: ignore
78
+ ver = getattr(bnb, "__version__", "0.0.0")
79
+ major, minor, patch = [int(x) for x in ver.split(".")[:3]]
80
+ return (major, minor, patch) >= (0, 46, 1)
81
+ except Exception:
82
+ return False
83
+
84
+ # %%
85
+ # ----------------------------
86
+ # Load processor + base model
87
+ # ----------------------------
88
+ processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, token=HF_TOKEN, use_fast=False)
89
+
90
+ use_4bit_now = LOAD_IN_4BIT and torch.cuda.is_available() and has_compatible_bitsandbytes()
91
+ if use_4bit_now:
92
+ print("Using 4-bit quantization with bitsandbytes.")
93
+ bnb_config = BitsAndBytesConfig(
94
+ load_in_4bit=True,
95
+ bnb_4bit_quant_type="nf4",
96
+ bnb_4bit_compute_dtype=torch.float16,
97
+ bnb_4bit_use_double_quant=True
98
+ )
99
+ else:
100
+ print("bitsandbytes>=0.46.1 not available (or no CUDA). Falling back to fp16/bf16 load.")
101
+ bnb_config = None
102
+
103
+ model_kwargs = {
104
+ "pretrained_model_name_or_path": BASE_MODEL_ID,
105
+ "device_map": "auto",
106
+ "token": HF_TOKEN
107
+ }
108
+ if use_4bit_now:
109
+ model_kwargs["quantization_config"] = bnb_config
110
+ model_kwargs["torch_dtype"] = torch.float16
111
+ else:
112
+ model_kwargs["torch_dtype"] = torch.bfloat16 if torch.cuda.is_available() else torch.float32
113
+
114
+ model = Qwen2VLForConditionalGeneration.from_pretrained(**model_kwargs)
115
+ if use_4bit_now:
116
+ model = prepare_model_for_kbit_training(model)
117
+
118
+ lora_cfg = LoraConfig(
119
+ r=16,
120
+ lora_alpha=32,
121
+ lora_dropout=0.05,
122
+ bias="none",
123
+ task_type="CAUSAL_LM",
124
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "down_proj", "gate_proj"]
125
+ )
126
+ model = get_peft_model(model, lora_cfg)
127
+ model.print_trainable_parameters()
128
+
129
+ # %%
130
+ # ----------------------------
131
+ # Dataset and formatting
132
+ # ----------------------------
133
+ dataset = load_dataset(DATASET_ID, split=DATASET_SPLIT)
134
+ dataset = dataset.shuffle(seed=SEED)
135
+
136
+ label_map = {
137
+ 0: "Normal sinus rhythm with no significant ectopy.",
138
+ 1: "Supraventricular ectopic activity is present.",
139
+ 2: "Ventricular ectopic beats are present.",
140
+ 3: "Fusion beat pattern is present."
141
+ }
142
+
143
+ def to_train_example(ex: Dict[str, Any]) -> Dict[str, Any]:
144
+ # Keep mapping stable with your existing dataset schema.
145
+ finding = label_map.get(int(ex.get("type", 0)), "ECG abnormality present; clinical correlation advised.")
146
+
147
+ messages = [
148
+ {
149
+ "role": "system",
150
+ "content": "You are CardioAI, an expert cardiology assistant for ECG interpretation."
151
+ },
152
+ {
153
+ "role": "user",
154
+ "content": [
155
+ {"type": "image"},
156
+ {"type": "text", "text": "Analyze this ECG and provide rhythm, key abnormalities, and a short impression."}
157
+ ]
158
+ },
159
+ {
160
+ "role": "assistant",
161
+ "content": [{"type": "text", "text": finding}]
162
+ }
163
+ ]
164
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
165
+ return {"image": ex["image"], "text": text}
166
+
167
+ train_ds = dataset.map(to_train_example, remove_columns=dataset.column_names)
168
+
169
+ @dataclass
170
+ class ECGCollator:
171
+ processor: Any
172
+ max_tokens: int = MAX_TOKENS
173
+
174
+ def __call__(self, batch: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
175
+ images = [x["image"].convert("RGB") for x in batch]
176
+ texts = [x["text"] for x in batch]
177
+ model_inputs = self.processor(
178
+ text=texts,
179
+ images=images,
180
+ return_tensors="pt",
181
+ padding=True,
182
+ truncation=True,
183
+ max_length=self.max_tokens
184
+ )
185
+ labels = model_inputs["input_ids"].clone()
186
+ # Ignore padding in loss
187
+ labels[labels == self.processor.tokenizer.pad_token_id] = -100
188
+ model_inputs["labels"] = labels
189
+ return model_inputs
190
+
191
+ collator = ECGCollator(processor=processor)
192
+
193
+ # %%
194
+ # ----------------------------
195
+ # Train
196
+ # ----------------------------
197
+ args = TrainingArguments(
198
+ output_dir=OUTPUT_DIR,
199
+ per_device_train_batch_size=TRAIN_BATCH_SIZE,
200
+ gradient_accumulation_steps=GRAD_ACCUM,
201
+ learning_rate=LR,
202
+ num_train_epochs=EPOCHS,
203
+ logging_steps=20,
204
+ save_strategy="epoch",
205
+ fp16=True,
206
+ remove_unused_columns=False,
207
+ report_to="none",
208
+ optim="paged_adamw_8bit"
209
+ )
210
+
211
+ trainer = Trainer(
212
+ model=model,
213
+ args=args,
214
+ train_dataset=train_ds,
215
+ data_collator=collator
216
+ )
217
+
218
+ trainer.train()
219
+
220
+ # %%
221
+ # ----------------------------
222
+ # Save + Push adapter
223
+ # ----------------------------
224
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
225
+ trainer.model.save_pretrained(OUTPUT_DIR) # PEFT adapter files
226
+ processor.save_pretrained(OUTPUT_DIR)
227
+
228
+ api.create_repo(HF_ADAPTER_REPO, repo_type="model", exist_ok=True)
229
+ commit_info = api.upload_folder(
230
+ folder_path=OUTPUT_DIR,
231
+ repo_id=HF_ADAPTER_REPO,
232
+ repo_type="model",
233
+ commit_message="Kaggle retrain: refresh ECG LoRA adapter"
234
+ )
235
+
236
+ if hasattr(commit_info, "oid"):
237
+ adapter_revision = commit_info.oid
238
+ else:
239
+ adapter_revision = "main"
240
+
241
+ print(f"Adapter pushed: https://huggingface.co/{HF_ADAPTER_REPO}")
242
+ print(f"Adapter revision: {adapter_revision}")
243
+
244
+ # %%
245
+ # ----------------------------
246
+ # Update Space runtime config
247
+ # ----------------------------
248
+ space_cfg = {
249
+ "base_model": BASE_MODEL_ID,
250
+ "adapter_repo": HF_ADAPTER_REPO,
251
+ "adapter_revision": adapter_revision
252
+ }
253
+
254
+ api.upload_file(
255
+ path_or_fileobj=json.dumps(space_cfg, indent=2).encode("utf-8"),
256
+ path_in_repo="model_config.json",
257
+ repo_id=HF_SPACE_REPO,
258
+ repo_type="space",
259
+ commit_message=f"Point space to adapter revision {adapter_revision}"
260
+ )
261
+
262
+ try:
263
+ api.restart_space(repo_id=HF_SPACE_REPO)
264
+ print("Space restart requested.")
265
+ except Exception as e:
266
+ print(f"Space restart API call failed (manual restart may be needed): {e}")
267
+
268
+ print(f"Space URL: https://huggingface.co/spaces/{HF_SPACE_REPO}")
269
+ print("Done. Your app can continue using the same Space endpoint.")
model_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "base_model": "Qwen/Qwen2-VL-2B-Instruct",
3
+ "adapter_repo": "hssling/cardioai-adapter",
4
+ "adapter_revision": "9aca394c57a984d7d314d36decff40f72858538a"
5
+ }
requirements.txt CHANGED
@@ -1,9 +1,10 @@
1
- torch>=2.0
2
- transformers>=4.40.0
3
- accelerate
4
- peft
5
- bitsandbytes
6
- datasets
7
- huggingface-hub<0.28.0
8
- gradio>=4.0.0
9
- Pillow
 
 
1
+ torch>=2.1
2
+ transformers==4.49.0
3
+ accelerate>=0.34.0
4
+ peft==0.14.0
5
+ bitsandbytes>=0.46.1
6
+ datasets>=2.19.0
7
+ huggingface-hub>=0.28.1,<0.30.0
8
+ gradio==4.44.1
9
+ gradio_client==1.3.0
10
+ Pillow>=10.0.0