Spaces:

AKSazgar
/

ECG-Instruct-Llama-3.2-11B-Vision

Sleeping

AKSazgar commited on Oct 28

Commit

7040489

1 Parent(s): 43f83e5

Add ECG AI7 Gradio application

- Add app.py with full ECG interpretation functionality
- Support for English and Farsi output languages
- Add requirements.txt with necessary dependencies
- Update README.md with detailed documentation

Files changed (3) hide show

README.md +34 -2
app.py +232 -0
requirements.txt +6 -0

README.md CHANGED Viewed

@@ -1,12 +1,44 @@
 ---
 title: ECG Instruct Llama 3.2 11B Vision
-emoji: 📈
 colorFrom: blue
 colorTo: indigo
 sdk: gradio
 sdk_version: 5.49.1
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: ECG Instruct Llama 3.2 11B Vision
+emoji: 🫀
 colorFrom: blue
 colorTo: indigo
 sdk: gradio
 sdk_version: 5.49.1
 app_file: app.py
 pinned: false
+license: apache-2.0
 ---
+# 🫀 ECG AI7 - Intelligent ECG Interpretation
+An AI-powered ECG interpretation tool using Llama 3.2 11B Vision, fine-tuned specifically for electrocardiogram analysis.
+## Features
+- **Advanced ECG Analysis**: Powered by Llama 3.2 11B Vision model fine-tuned on ECG data
+- **Bilingual Support**: Generate reports in both English and Farsi (Persian)
+- **Clinical Context**: Add patient information for more personalized interpretations
+- **User-Friendly Interface**: Simple upload and analyze workflow
+## How to Use
+1. Upload an ECG image (12-lead ECG works best)
+2. Optionally add patient information or clinical notes
+3. Select your preferred output language (English or Farsi)
+4. Click Submit and wait for the AI analysis
+## Important Disclaimer
+⚠️ This tool is for **educational and research purposes only**. AI-generated interpretations should be verified by licensed cardiologists. Always consult with qualified healthcare professionals for medical decisions.
+## Model
+This Space uses the [ECG-Instruct-Llama-3.2-11B-Vision](https://huggingface.co/AKSazgar/ECG-Instruct-Llama-3.2-11B-Vision) model.
+## Technical Details
+- **Model**: Llama 3.2 11B Vision (fine-tuned)
+- **Framework**: PyTorch + Transformers
+- **Interface**: Gradio
+- **Languages**: English, Farsi (Persian)

app.py ADDED Viewed

	@@ -0,0 +1,232 @@

+#!/usr/bin/env python3
+"""
+ECG AI7 - ECG Interpretation using Llama 3.2 11B Vision
+Gradio interface for Hugging Face Spaces
+"""
+import torch
+from transformers import MllamaForConditionalGeneration, AutoProcessor, TextStreamer
+from PIL import Image
+import gradio as gr
+import os
+# Model configuration
+MODEL_ID = "AKSazgar/ECG-Instruct-Llama-3.2-11B-Vision"
+print(f"Loading model: {MODEL_ID}")
+print("This may take a few minutes on first load...")
+# Load model and processor
+model = MllamaForConditionalGeneration.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+)
+processor = AutoProcessor.from_pretrained(MODEL_ID)
+print("Model loaded successfully!")
+# Helper functions
+def _strip_assistant_prefix_safe(s: str) -> str:
+    """Safely strip assistant prefix from generated text"""
+    s = s.lstrip()
+    # Only remove a leading role block if it literally starts the text
+    for prefix in ("user", "assistant", "User", "Assistant"):
+        if s.startswith(prefix):
+            idx = s.find("\n\n")
+            if idx != -1:
+                return s[idx+2:].lstrip()
+            idx = s.find("\n")
+            if idx != -1:
+                return s[idx+1:].lstrip()
+    return s
+def generate_full_report(image_path: str, query: str, *,
+                         max_new_tokens: int = 1600,
+                         do_stream: bool = False,
+                         temperature: float = 0.0) -> str:
+    """
+    Generate ECG interpretation report
+    Args:
+        image_path: local path to ECG image
+        query: instruction string for the model
+        max_new_tokens: maximum tokens to generate
+        do_stream: whether to stream output (for terminal use)
+        temperature: sampling temperature (0.0 = greedy)
+    Returns:
+        Full decoded interpretation report
+    """
+    image = Image.open(image_path).convert("RGB")
+    # Build single user turn: image + text
+    messages = [
+        {"role": "user", "content": [
+            {"type": "image"},
+            {"type": "text", "text": query}
+        ]}
+    ]
+    # Create prompt compatible with processor
+    input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
+    inputs = processor(text=input_text, images=image, return_tensors="pt")
+    # Move inputs to same device as model
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}
+    # Setup streamer if requested
+    streamer = TextStreamer(processor.tokenizer, skip_prompt=True) if do_stream else None
+    # Generate
+    with torch.no_grad():
+        out_ids = model.generate(
+            **inputs,
+            streamer=streamer,
+            max_new_tokens=max_new_tokens,
+            use_cache=True,
+            do_sample=False if temperature == 0.0 else True,
+            temperature=temperature,
+            top_p=1.0,
+        )
+    # Decode full generated text
+    full_raw = processor.batch_decode(out_ids, skip_special_tokens=True)[0]
+    full_clean = _strip_assistant_prefix_safe(full_raw)
+    return full_clean
+def translate_to_farsi(english_text: str, max_new_tokens: int = 1600) -> str:
+    """Translate English text to Persian using the same model"""
+    msgs = [
+        {"role": "user", "content": [
+            {"type": "text",
+             "text": "فقط متن زیر را به فارسی روان ترجمه کن و فقط ترجمه را برگردان:\n\n" + english_text}
+        ]}
+    ]
+    prompt = processor.apply_chat_template(msgs, add_generation_prompt=True)
+    inputs = processor(text=prompt, return_tensors="pt")
+    # Move to device
+    inputs = {k: v.to(model.device) for k, v in inputs.items()}
+    with torch.no_grad():
+        out = model.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            do_sample=False,
+            temperature=0.0,
+            top_p=1.0
+        )
+    ans = processor.batch_decode(out, skip_special_tokens=True)[0]
+    return _strip_assistant_prefix_safe(ans)
+# Gradio interface function
+def analyze_ecg_gradio(image, text_instruction="", language="Farsi"):
+    """
+    Main function for Gradio interface
+    Args:
+        image: uploaded ECG image filepath (string path)
+        text_instruction: optional clinical note / context
+        language: output language (English or Farsi)
+    Returns:
+        Full AI-generated ECG interpretation report
+    """
+    try:
+        print(f"Received image: {image}")
+        print(f"Text instruction: {text_instruction}")
+        print(f"Language: {language}")
+        # Build query
+        query = "You are an expert cardiologist. "
+        if text_instruction:
+            query += f"Patient info: {text_instruction}. "
+        query += "Write an in-depth diagnosis report from this ECG data, including the final diagnosis."
+        # Generate report in English
+        print("Generating report in English...")
+        report = generate_full_report(image, query, max_new_tokens=1600, do_stream=False)
+        # Translate to Farsi if requested
+        if language == "Farsi":
+            print("Translating to Farsi...")
+            report = translate_to_farsi(report, max_new_tokens=1600)
+        print("Report generated successfully!")
+        return report
+    except Exception as e:
+        import traceback
+        error_msg = f"Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
+        print(error_msg)
+        return error_msg
+# Create Gradio interface
+demo = gr.Interface(
+    fn=analyze_ecg_gradio,
+    inputs=[
+        gr.Image(type="filepath", label="ECG Image"),
+        gr.Textbox(
+            lines=2,
+            placeholder="Optional: Enter patient info or clinical notes (e.g., '55-year-old male with chest pain')",
+            label="Clinical Note"
+        ),
+        gr.Dropdown(
+            choices=["English", "Farsi"],
+            value="Farsi",
+            label="Output Language"
+        ),
+    ],
+    outputs=gr.Textbox(
+        label="AI ECG Report",
+        lines=20,
+        show_copy_button=True
+    ),
+    title="🫀 ECG AI7 - Intelligent ECG Interpretation",
+    description="""
+    Upload an ECG image to get an AI-powered interpretation.
+    **Features:**
+    - Advanced ECG analysis using Llama 3.2 11B Vision
+    - Support for English and Farsi (Persian) output
+    - Optional patient context for more personalized reports
+    **Note:** This is an AI assistant tool and should not replace professional medical diagnosis.
+    """,
+    examples=[
+        ["example_ecg.jpg", "55-year-old male with chest pain", "English"],
+        ["example_ecg.jpg", "بیمار 55 ساله مرد با درد قفسه سینه", "Farsi"],
+    ] if os.path.exists("example_ecg.jpg") else None,
+    article="""
+    ### About
+    This application uses a fine-tuned Llama 3.2 11B Vision model specifically trained for ECG interpretation.
+    ### How to Use
+    1. Upload an ECG image (12-lead ECG works best)
+    2. Optionally add patient information or clinical context
+    3. Select your preferred output language (English or Farsi)
+    4. Click Submit and wait for the AI analysis
+    ### Important Disclaimer
+    This tool is for educational and research purposes. Always consult with qualified healthcare
+    professionals for medical decisions. AI-generated interpretations should be verified by licensed
+    cardiologists.
+    ---
+    Model: [ECG-Instruct-Llama-3.2-11B-Vision](https://huggingface.co/AKSazgar/ECG-Instruct-Llama-3.2-11B-Vision)
+    """,
+    theme=gr.themes.Soft(),
+    allow_flagging="never",
+)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch>=2.0.0
+transformers>=4.45.0
+accelerate>=0.20.0
+Pillow>=10.0.0
+sentencepiece>=0.1.99
+protobuf>=3.20.0