hssling commited on
Commit
f0e7aa1
·
1 Parent(s): 26841c9

Switch to IdaFLab/ECG-Plot-Images Parquet dataset to prevent HTTP bottlenecks

Browse files
Files changed (1) hide show
  1. train_ecg.py +13 -4
train_ecg.py CHANGED
@@ -7,7 +7,7 @@ from huggingface_hub import login
7
 
8
  # 1. Configuration targeting ECG Image Scans
9
  MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
10
- DATASET_ID = "edcci/GenECG"
11
  OUTPUT_DIR = "./cardioai-adapter"
12
  HF_HUB_REPO = "hssling/cardioai-adapter"
13
 
@@ -46,8 +46,8 @@ def main():
46
 
47
  print(f"Loading dataset: {DATASET_ID}")
48
  try:
49
- # edcci/GenECG contains real generated image matrices of PTB-XL
50
- dataset = load_dataset(DATASET_ID, "A", split="train[:2000]")
51
  except Exception as e:
52
  print(f"Warning: {DATASET_ID} not found. Synthesizing a robust mock dataset for algorithmic testing.")
53
  from datasets import Dataset
@@ -59,7 +59,16 @@ def main():
59
  dataset = Dataset.from_dict({"image": dummy_images, "findings": dummy_findings})
60
 
61
  def format_data(example):
62
- findings = "Standard clinical 12-lead ECG tracing. Real signal derived from PTB-XL."
 
 
 
 
 
 
 
 
 
63
  messages = [
64
  {
65
  "role": "system",
 
7
 
8
  # 1. Configuration targeting ECG Image Scans
9
  MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
10
+ DATASET_ID = "IdaFLab/ECG-Plot-Images"
11
  OUTPUT_DIR = "./cardioai-adapter"
12
  HF_HUB_REPO = "hssling/cardioai-adapter"
13
 
 
46
 
47
  print(f"Loading dataset: {DATASET_ID}")
48
  try:
49
+ # Load high-quality synthetic/real ECG plots in Parquet format to prevent HTTP bottleneck
50
+ dataset = load_dataset(DATASET_ID, split="train[:2000]")
51
  except Exception as e:
52
  print(f"Warning: {DATASET_ID} not found. Synthesizing a robust mock dataset for algorithmic testing.")
53
  from datasets import Dataset
 
59
  dataset = Dataset.from_dict({"image": dummy_images, "findings": dummy_findings})
60
 
61
  def format_data(example):
62
+ label_map = {
63
+ 0: "Normal Sinus Rhythm. No significant ectopic activity.",
64
+ 1: "Supraventricular Ectopic Beat (SVEB). Premature atrial or junctional contraction.",
65
+ 2: "Ventricular Ectopic Beat (VEB). Premature ventricular contraction.",
66
+ 3: "Fusion of ventricular and normal beat."
67
+ }
68
+ # In IdaFLab/ECG-Plot-Images, label is stored in 'type'
69
+ lbl = example.get("type", 0)
70
+ findings = label_map.get(lbl, "Standard clinical ECG tracing.")
71
+
72
  messages = [
73
  {
74
  "role": "system",