Spaces:

hssling
/

cardioai-api

Sleeping

App Files Files Community

hssling commited on Feb 23

Commit

8af47a3

0 Parent(s):

Initialize CardioAI training and deployment pipeline

Browse files

Files changed (5) hide show

.github/workflows/sync_to_hub.yml +23 -0
README.md +15 -0
app.py +81 -0
requirements.txt +8 -0
train_ecg.py +125 -0

.github/workflows/sync_to_hub.yml ADDED Viewed

	@@ -0,0 +1,23 @@

+name: Sync to Hugging Face Hub
+on:
+  push:
+    branches: [main, master]
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to Hugging Face Hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          git remote add space https://hssling:$HF_TOKEN@huggingface.co/spaces/hssling/cardioai-api
+          git push --force space master:main

README.md ADDED Viewed

	@@ -0,0 +1,15 @@

+---
+title: CardioAI ECG API
+emoji: ❤️‍🔥
+colorFrom: red
+colorTo: pink
+sdk: gradio
+sdk_version: "4.26.0"
+app_file: app.py
+pinned: false
+python_version: "3.10"
+---
+# CardioAI Fine-Tuned Model API
+Training logic and execution backend for Kaggle-to-HuggingFace continuous deployment.

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import gradio as gr
+import torch
+from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
+from PIL import Image
+import json
+MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
+ADAPTER_ID = "hssling/cardioai-adapter"
+print("Starting App Engine...")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+    device_map="auto"
+)
+if ADAPTER_ID:
+    print(f"Loading custom fine-tuned LoRA weights: {ADAPTER_ID}")
+    try:
+        model.load_adapter(ADAPTER_ID)
+    except Exception as e:
+        print(f"Failed to load adapter. Using base model. Error: {e}")
+def diagnose_ecg(image: Image.Image = None, temp: float = 0.2, max_tokens: int = 1500):
+    try:
+        if image is None:
+            return json.dumps({"error": "No image provided."})
+        system_prompt = "You are CardioAI, a highly advanced expert Cardiologist. Analyze the provided Electrocardiogram (ECG/EKG)."
+        user_prompt = "Analyze this 12-lead Electrocardiogram trace and extract the detailed clinical rhythms and pathological findings in a structured format."
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image"},
+                    {"type": "text", "text": user_prompt}
+                ]
+            }
+        ]
+        text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        inputs = processor(
+            text=[text_input],
+            images=[image],
+            padding=True,
+            return_tensors="pt"
+        ).to(device)
+        with torch.no_grad():
+            generated_ids = model.generate(**inputs, max_new_tokens=int(max_tokens), temperature=float(temp), top_p=0.9, do_sample=True)
+        generated_ids_trimmed = [
+            out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        output_text = processor.batch_decode(generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+        return output_text
+    except Exception as e:
+        return f"Error: {str(e)}"
+demo = gr.Interface(
+    fn=diagnose_ecg,
+    inputs=[
+        gr.Image(type="pil", label="ECG Image Scan"),
+        gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
+        gr.Slider(minimum=256, maximum=4096, value=1500, step=256, label="Max Tokens")
+    ],
+    outputs=gr.Markdown(label="Clinical Report Output"),
+    title="CardioAI Inference API",
+    description="Fine-tuned Medical LLM for Electrocardiogram (ECG) Tracings."
+)
+if __name__ == "__main__":
+    demo.launch(share=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch>=2.0
+transformers>=4.40.0
+accelerate
+peft
+bitsandbytes
+datasets
+gradio>=4.0.0
+Pillow

train_ecg.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import torch
+from transformers import AutoProcessor, Qwen2VLForConditionalGeneration, TrainingArguments, Trainer
+from peft import LoraConfig, get_peft_model
+from datasets import load_dataset
+import os
+from huggingface_hub import login
+# 1. Configuration targeting ECG Image Scans
+MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
+DATASET_ID = "hssling/ECG-10k-Control"
+OUTPUT_DIR = "./cardioai-adapter"
+HF_HUB_REPO = "hssling/cardioai-adapter"
+def main():
+    # Attempt to authenticate with Hugging Face via Kaggle Secrets
+    try:
+        from kaggle_secrets import UserSecretsClient
+        user_secrets = UserSecretsClient()
+        hf_token = user_secrets.get_secret("HF_TOKEN")
+        login(token=hf_token)
+        print("Successfully logged into Hugging Face Hub using Kaggle Secrets.")
+    except Exception as e:
+        print("Could not log in via Kaggle Secrets.", e)
+    print(f"Loading processor and model: {MODEL_ID}")
+    processor = AutoProcessor.from_pretrained(MODEL_ID)
+    # 4-bit Quantization
+    model = Qwen2VLForConditionalGeneration.from_pretrained(
+        MODEL_ID,
+        device_map="auto",
+        torch_dtype=torch.float16,
+        low_cpu_mem_usage=True,
+    )
+    print("Applying LoRA parameters...")
+    lora_config = LoraConfig(
+        r=16,
+        lora_alpha=32,
+        lora_dropout=0.05,
+        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
+        bias="none",
+    )
+    model = get_peft_model(model, lora_config)
+    print(f"Loading dataset: {DATASET_ID}")
+    dataset = load_dataset(DATASET_ID, split="train") # Using the full 10k ECG dataset
+    def format_data(example):
+        findings = example.get("findings") or example.get("text") or example.get("description") or "ECG tracing findings."
+        messages = [
+            {
+                "role": "system",
+                "content": "You are CardioAI, a highly advanced expert Cardiologist. Analyze the provided Electrocardiogram (ECG/EKG)."
+            },
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image"},
+                    {"type": "text", "text": "Analyze this 12-lead Electrocardiogram trace and extract the detailed clinical rhythms and pathological findings in a structured format."}
+                ]
+            },
+            {
+                "role": "assistant",
+                "content": [
+                    {"type": "text", "text": str(findings)}
+                ]
+            }
+        ]
+        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
+        return {"text": text, "image": example["image"]}
+    formatted_dataset = dataset.map(format_data, remove_columns=dataset.column_names)
+    training_args = TrainingArguments(
+        output_dir=OUTPUT_DIR,
+        per_device_train_batch_size=2,
+        gradient_accumulation_steps=4,
+        learning_rate=2e-4,
+        logging_steps=50,
+        num_train_epochs=3, # Train extensively across the entire 10k dataset 3 times
+        save_strategy="epoch", # Save at the end of every epoch
+        fp16=True,
+        optim="paged_adamw_8bit",
+        remove_unused_columns=False,
+        report_to="none"
+    )
+    def collate_fn(examples):
+        texts = [ex["text"] for ex in examples]
+        images = [ex["image"] for ex in examples]
+        batch = processor(
+            text=texts,
+            images=images,
+            padding=True,
+            return_tensors="pt"
+        )
+        batch["labels"] = batch["input_ids"].clone()
+        return batch
+    print("Starting fine-tuning...")
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=formatted_dataset,
+        data_collator=collate_fn
+    )
+    trainer.train()
+    print(f"Saving fine-tuned adapter to {OUTPUT_DIR}")
+    trainer.save_model(OUTPUT_DIR)
+    processor.save_pretrained(OUTPUT_DIR)
+    print(f"Pushing model weights to Hugging Face Hub: {HF_HUB_REPO}...")
+    try:
+        trainer.model.push_to_hub(HF_HUB_REPO)
+        processor.push_to_hub(HF_HUB_REPO)
+        print(f"✅ Success! Your model is now live at: https://huggingface.co/{HF_HUB_REPO}")
+    except Exception as e:
+        print(f"❌ Failed to push to Hugging Face Hub. Error: {e}")
+if __name__ == "__main__":
+    main()