Spaces:

Synaptics
/

SR100-Model-Compiler

Running on Zero

File size: 11,770 Bytes

import glob
import gradio as gr
import tempfile
import os
import sr100_model_compiler
import html
import pathlib
import spaces

# ---------- Helpers ----------

def _resolve_uploaded_path(uploaded):
    """
    Normalize Gradio File input into a filesystem path.
    Handles: str, dict with {path|name}, file-like objects with .path/.name,
    or a list/tuple of the above.
    """
    if uploaded is None:
        return None
    if isinstance(uploaded, (list, tuple)) and uploaded:
        return _resolve_uploaded_path(uploaded[0])
    if isinstance(uploaded, str):
        return uploaded
    if isinstance(uploaded, dict):
        return uploaded.get("path") or uploaded.get("name")
    for attr in ("path", "name"):
        if hasattr(uploaded, attr):
            return getattr(uploaded, attr)
    return None


@spaces.GPU
def compile_model(model_name, vmem_value, lpmem_value, uploaded_model):
    # Decide the source model path (uploaded has priority)
    uploaded_path = _resolve_uploaded_path(uploaded_model)
    model_path = uploaded_path or model_name

    # Basic validations
    if not model_path or not os.path.exists(model_path):
        return (
            "<div style='color:#d32f2f; font-weight:bold; font-size:1.1em;'>"
            "❌ ERROR: Could not locate the model file you selected or uploaded."
            "</div>"
        )

    if pathlib.Path(model_path).suffix.lower() != ".tflite":
        return (
            "<div style='color:#d32f2f; font-weight:bold; font-size:1.1em;'>"
            "❌ ERROR: Please provide a <code>.tflite</code> model file.</div>"
        )

    # Create a temporary directory
    with tempfile.TemporaryDirectory() as out_dir:
        print(f"Created temporary directory: {out_dir}")

        vmem_size_limit = int(vmem_value * 1000)
        lpmem_size_limit = int(lpmem_value * 1000)
        
        # Run the model fitter with better error handling
        try:
            original_file_name = os.path.basename(model_path)
            root, ext = os.path.splitext(original_file_name)
            safe_root = root.replace('.', '_')  
            model_file_name = f"{safe_root}{ext}"
            temp_model_path = os.path.join(out_dir, model_file_name)
            
            print(f"Copying model to sanitized path: {temp_model_path}")
            with open(model_path, "rb") as src, open(temp_model_path, "wb") as dst:
                dst.write(src.read())
            
            print(f"Starting model optimization for {temp_model_path}")
            print(f"VMEM limit: {vmem_size_limit}, LPMEM limit: {lpmem_size_limit}")
            
            success, results = sr100_model_compiler.sr100_model_optimizer(
                model_file=temp_model_path,
                vmem_size_limit=vmem_size_limit,
                lpmem_size_limit=lpmem_size_limit,
                optimize='Performance'
            )
            
            print(f"Optimization complete. Success: {success}")
            print(f"Results: {results}")
            
            # Check if results is None or missing expected keys
            if not results:
                return (
                    "<div style='color:#d32f2f; font-weight:bold; font-size:1.2em;'>"
                    "❌ ERROR: Optimization returned empty results</div>"
                )
                
        except Exception as e:
            error_message = str(e)
            print(f"Exception during model optimization: {error_message}")
            
            return (
                "<div style='color:#d32f2f; font-weight:bold; font-size:1.2em;'>"
                "❌ ERROR: Model optimization failed</div>"
                "<div style='margin-top:0.5em;color:#000;'>Error details:</div>"
                f"<pre style='white-space:pre-wrap; background:#f6f8fa; padding:8px; border-radius:6px; color:#000;'>{html.escape(error_message)}</pre>"
            )

    output = []
    
    # Check for specific failure cases from results
    if not success:
        print(f"Optimization reported failure. Reason: {results.get('failure_reason', 'Unknown')}")
    
    # Check if NPU cycles is zero (CPU-only model)
    npu_zero = results.get('cycles_npu', 0) == 0
    
    if npu_zero:
        output.append(
            "<div style='color:#e65100; font-weight:bold; font-size:1.2em;'>"
            "⚠️ CPU-ONLY: Model fits in memory but no operators mapped to the NPU</div>"
        )
        output.append(
            "<div style='color:#000; margin-top:0.25em;'>"
            "This typically means the model contains ops not supported by the SR100 NPU. "
            "Please review/convert unsupported ops or choose an NPU-friendly model.</div>"
        )
        output.append("<div style='margin-top:0.5em;color:#000;'>Compiler log:</div>")
        output.append(
            f"<pre style='white-space:pre-wrap; background:#f6f8fa; padding:8px; border-radius:6px; color:#000;'>"
            f"{html.escape(results.get('vela_log', 'No log available'))}</pre>"
        )
    else:
        if success:
            output.append(
                "<div style='color:#007dc3; font-weight:bold; font-size:1.2em;'>"
                "✅ SUCCESS: Model fits on SR100 and below is the estimates Performance</div>"
            )
        else:
            output.append(
                "<div style='color:#d32f2f; font-weight:bold; font-size:1.2em;'>"
                "❌ FAILURE: Model does not fit on SR100, Please check Memory usage of Model</div>"
            )

        # Format metrics in a nice table
        table_rows = []
        
        # Calculate all the metrics
        weights_size = results['weights_size'] / 1000.0
        arena_size = results['arena_cache_size'] / 1000.0
        clock = results['core_clock'] / 1.0e6
        infer_time = results['inference_time'] * 1000.0
        infer_fps = results['inferences_per_sec']
        vmem_size = results['vmem_size'] / 1000.0
        lpmem_size = results['lpmem_size'] / 1000.0
        vmem_size_limit = results['vmem_size_limit'] / 1000.0
        lpmem_size_limit = results['lpmem_size_limit'] / 1000.0
        vmem_perc = results['vmem_size'] * 100.0 / results['vmem_size_limit']
        lpmem_perc = results['lpmem_size'] * 100.0 / results['lpmem_size_limit']
        
        # Add rows to the table
        metrics = [
            ("Clock Frequency", f"{clock:0.1f} MHz"),
            ("Inference Time", f"{infer_time:0.1f} ms"),
            ("Inferences Per Second", f"{infer_fps:0.1f} fps"),
            ("Arena Cache Size", f"{arena_size:0.3f} kB"),
            ("Model Size", f"{weights_size:0.3f} kB"),
            ("Model Location", f"{results['model_loc']}"),
            ("System Configuration", f"{results['system_config']}"),
            ("VMEM Size", f"{vmem_size:0.3f} kB ({vmem_perc:0.1f}% of {vmem_size_limit:0.3f} kB limit)"),
            ("LPMEM Size", f"{lpmem_size:0.3f} kB ({lpmem_perc:0.1f}% of {lpmem_size_limit:0.3f} kB limit)")
        ]
        
        for label, value in metrics:
            table_rows.append(
                "<tr>"
                f"<td style='padding:4px 12px; font-weight:bold; border-bottom:1px solid #eee; color:#000;'>{label}</td>"
                f"<td style='padding:4px 12px; border-bottom:1px solid #eee; color:#000;'>{value}</td>"
                "</tr>"
            )
        
        output.append(
            "<table style='margin-top:1em; border-collapse:collapse; color:#000;'>"
            + "".join(table_rows) + "</table>"
        )

    return "".join(output)

# Get all available models
model_choices = glob.glob('models/*.tflite')

custom_css = """
:root {
    --color-accent: #007dc3;
    --color-primary-500: #007dc3;
    --color-primary-600: #007dc3;
}
body, .gradio-container, #root {
    background: #fff !important;
}
/* Hide Gradio footer and settings */
footer, .gradio-footer, .svelte-1ipelgc, .gradio-logo, .gradio-app__settings {
    display: none !important;
}
/* Style input labels and controls */
.gradio-slider label,
.gradio-radio label,
.gradio-dropdown label,
.gradio-file label {
    color: #007dc3 !important;
    font-weight: bold;
}
.gradio-slider input[type="range"]::-webkit-slider-thumb,
.gradio-slider input[type="range"]::-moz-range-thumb,
.gradio-slider input[type="range"]::-ms-thumb {
    background: #007dc3 !important;
}
.gradio-radio input[type="radio"]:checked + span {
    background: #007dc3 !important;
    border-color: #007dc3 !important;
}
.gradio-dropdown select,
.gradio-file input[type="file"] {
    border-color: #007dc3 !important;
}
.gradio-button {
    background: #007dc3 !important;
    color: #fff !important;
    border: none !important;
}
"""
    
with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("<h1 style='font-size:2.5em; color:#007dc3; margin-bottom:0;'>SR100 Model Compiler</h1>", elem_id="main_title")
    gr.Markdown("<h3 style='margin-top:0; color:#000;'>Bring a TFlite INT8 model and compile it for Synaptics Astra SR100. Learn more at <a href='https://developer.synaptics.com/docs/sr/sr100/quick-start?utm_source=hf' target='_blank' style='color:#007dc3; text-decoration:underline;'>Synaptics AI Developer Zone</a></h3>", elem_id="subtitle")
    gr.Markdown("""
    <p style='margin-top:0; color:#000; font-style:italic;'>
    SR100 includes the following on-chip SRAM memories:<br>
    - 1536 kB of Virtual Memory SRAM (VMEM) for high-speed operations.<br>
    - 1536 kB of Low Power SRAM (LPMEM) for images, audio, and other less-performance-critical data.<br><br>
    The amount of memory allocated to the model is customizable. Any memory not allocated to the model is usable by the application.<br>
    Ensure that the Arena cache size is smaller than the available VMEM to ensure it fits and runs optimally.
    </p>
    """, elem_id="memory_note"
    )

    with gr.Row():
        vmem_slider = gr.Slider(minimum=1, maximum=1536, step=1.024, label="Set total VMEM SRAM size available in kB", value=1536.0)
        lpmem_slider = gr.Slider(minimum=1, maximum=1536, step=1.024, label="Set total LPMEM SRAM size in kB", value=1536.0)
    
    model_dropdown = gr.Dropdown(
        label="Select a model",
        value='models/person_classification_256x448.tflite',
        choices=model_choices
    )
    
    # Add file upload component
    model_upload = gr.File(label="Or upload a .tflite INT8 model. Please note, Uploaded models are stored in a temporary directory and will be deleted automatically after processing.", file_types=[".tflite"], file_count="single")
    
    # Run the compile
    compile_btn = gr.Button("Compile Model")
    compile_text = gr.Markdown("<span style='color:#000;'>Waiting for model results</span>")
    
    # Compute options
    compile_btn.click(compile_model, inputs=[model_dropdown, vmem_slider, lpmem_slider, model_upload], outputs=[compile_text])
    
    gr.HTML("""
    <div style="max-width: 900px; margin: 2rem auto; background: white; color: black; border-radius: 12px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06); border: 1px solid #e5e7eb; padding: 1.5rem; text-align: center;">
        For a detailed walkthrough, please see our
        <a href="https://developer.synaptics.com/docs/sr/sr100/evaluate-sr?utm_source=hf" target="_blank" style="color: #1a0dab;">Evaluate Model Guide</a>.<br>
        This Space uses a simulation toolchain to estimate model performance providing results that closely reflect real hardware behavior.
        <br><br>
        Request a 
        <a href="https://synacsm.atlassian.net/servicedesk/customer/portal/543/group/597/create/7208?utm_source=hf" target="_blank" style="color: #1a0dab;">Machina Micro [MCU] Dev Kit</a> with Astra SR100 MCU.
    </div>
    """)

if __name__ == "__main__":
    demo.launch()