hssling commited on
Commit
066afed
·
1 Parent(s): 1bede02

Switch to massive PULSE-ECG real world dataset

Browse files
Files changed (1) hide show
  1. train_ecg.py +3 -3
train_ecg.py CHANGED
@@ -7,7 +7,7 @@ from huggingface_hub import login
7
 
8
  # 1. Configuration targeting ECG Image Scans
9
  MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
10
- DATASET_ID = "hssling/ECG-10k-Control"
11
  OUTPUT_DIR = "./cardioai-adapter"
12
  HF_HUB_REPO = "hssling/cardioai-adapter"
13
 
@@ -46,7 +46,7 @@ def main():
46
 
47
  print(f"Loading dataset: {DATASET_ID}")
48
  try:
49
- dataset = load_dataset(DATASET_ID, split="train") # Using the full 10k ECG dataset
50
  except Exception as e:
51
  print(f"Warning: {DATASET_ID} not found. Synthesizing a robust mock dataset for algorithmic testing.")
52
  from datasets import Dataset
@@ -58,7 +58,7 @@ def main():
58
  dataset = Dataset.from_dict({"image": dummy_images, "findings": dummy_findings})
59
 
60
  def format_data(example):
61
- findings = example.get("findings") or example.get("text") or example.get("description") or "ECG tracing findings."
62
  messages = [
63
  {
64
  "role": "system",
 
7
 
8
  # 1. Configuration targeting ECG Image Scans
9
  MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
10
+ DATASET_ID = "PULSE-ECG/ECGInstruct"
11
  OUTPUT_DIR = "./cardioai-adapter"
12
  HF_HUB_REPO = "hssling/cardioai-adapter"
13
 
 
46
 
47
  print(f"Loading dataset: {DATASET_ID}")
48
  try:
49
+ dataset = load_dataset(DATASET_ID, split="train[:10000]") # Using 10k real ECG items
50
  except Exception as e:
51
  print(f"Warning: {DATASET_ID} not found. Synthesizing a robust mock dataset for algorithmic testing.")
52
  from datasets import Dataset
 
58
  dataset = Dataset.from_dict({"image": dummy_images, "findings": dummy_findings})
59
 
60
  def format_data(example):
61
+ findings = example.get("findings") or example.get("output") or example.get("text") or example.get("description") or "ECG tracing findings."
62
  messages = [
63
  {
64
  "role": "system",