Spaces:

Salesforce
/

LeastLoadedEP

Sleeping

App Files Files Community

nxphi47 commited on Jan 23

Commit

b17633a

verified ·

1 Parent(s): b5f5724

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +113 -110

src/streamlit_app.py CHANGED Viewed

@@ -933,9 +933,6 @@ with tab_stats:
 # TAB 2: STEP-BY-STEP ANIMATION
 # ============================================================================
 with tab_anim:
-    st.subheader("Step-by-Step Algorithm Animation")
-    st.caption("This animation follows LLA + LLAS with α capacity and min-tokens-per-GEMM (m) skip/force-assign behavior.")
     anim_num_gpus = 4
     anim_local_experts = 2
     anim_total_experts = anim_num_gpus * anim_local_experts
@@ -970,33 +967,36 @@ with tab_anim:
                 st.session_state[f"anim_load_{idx}"] = int(v)
             st.session_state["anim_step"] = 0
-    with st.expander("Animation Configuration", expanded=True):
-        left, right = st.columns([1, 1], gap="large")
-        with left:
-            preset = st.selectbox("Preset", list(PRESETS.keys()), key="anim_preset")
-            st.button("Apply Preset", key="anim_apply_preset", on_click=apply_preset_callback)
-        with right:
-            st.slider(
-                "α (capacity factor)",
-                0.5, 1.5,
-                step=0.05,
-                key="anim_alpha"
-            )
-            st.slider(
-                "m (min tokens per GEMM)",
-                1, 512,
-                step=1,
-                key="anim_min_gemm",
-                help="LLAS rule: if candidate chunk c < m and remaining r > c, skip that GPU; else may force-assign."
-            )
-        st.markdown("**Expert Loads (native placement shown as E{i} -> GPU{i//2})**")
-        load_cols = st.columns(anim_num_gpus)
         for gpu_idx in range(anim_num_gpus):
-            with load_cols[gpu_idx]:
-                st.caption(f"GPU {gpu_idx}")
                 for local_idx in range(anim_local_experts):
                     idx = gpu_idx * anim_local_experts + local_idx
                     st.number_input(
@@ -1015,7 +1015,7 @@ with tab_anim:
         total_now = sum(loads_now)
         m_alpha_now = alpha_now * (total_now / anim_num_gpus) if anim_num_gpus > 0 else float(total_now)
-        st.info(f"Current: α={alpha_now:.2f}, m={m_now}, Total={total_now}, m_alpha={m_alpha_now:.2f}")
         if st.button("Reset Animation Step", key="anim_reset_step"):
             st.session_state["anim_step"] = 0
@@ -1035,90 +1035,93 @@ with tab_anim:
     st.session_state["anim_step"] = current_step
     state = anim_steps[current_step]
-    # Controls
-    ctrl_col1, ctrl_col2, ctrl_col3, ctrl_col4, ctrl_col5 = st.columns([1, 1, 1, 1, 4])
-    with ctrl_col1:
-        if st.button("Reset", key="anim_reset"):
-            st.session_state["anim_step"] = 0
-            st.rerun()
-    with ctrl_col2:
-        if st.button("Prev", key="anim_prev") and current_step > 0:
-            st.session_state["anim_step"] -= 1
-            st.rerun()
-    with ctrl_col3:
-        if st.button("Next", key="anim_next") and current_step < len(anim_steps) - 1:
-            st.session_state["anim_step"] += 1
-            st.rerun()
-    with ctrl_col4:
-        if st.button("End", key="anim_end"):
-            st.session_state["anim_step"] = len(anim_steps) - 1
-            st.rerun()
-    st.progress(current_step / max(len(anim_steps) - 1, 1), text=f"Step {current_step + 1} / {len(anim_steps)}")
-    case_type = state.get("case_type")
-    if case_type in (1, 2, 3):
-        label = "Case 1" if case_type == 1 else "Case 2" if case_type == 2 else "Case 3"
-        st.write(f"**{label}** — {state['message']}")
-    else:
-        st.info(state["message"])
-    viz_col1, viz_col2, viz_col3 = st.columns([1.3, 1.2, 1.5])
-    with viz_col1:
-        st.markdown("##### Experts (sorted by load)")
-        exp_cols = st.columns(2)
-        for idx in range(anim_total_experts):
-            if idx >= len(state["sorted_loads"]):
-                continue
-            load = int(state["sorted_loads"][idx])
-            original_idx = int(state["sorted_indices"][idx])
-            is_processed = idx in state.get("assignments", {})
-            is_current = idx == int(state["current_expert_idx"])
-            color = EXPERT_COLORS[original_idx % len(EXPERT_COLORS)]
-            opacity = "0.4" if is_processed else "1"
-            border = "3px solid #facc15" if is_current else "1px solid #4b5563"
-            with exp_cols[idx % 2]:
-                st.markdown(
-                    f"""<div style="background-color: {color}22; border: {border}; border-radius: 6px;
-                    padding: 6px; margin: 2px 0; opacity: {opacity};">
-                    <span style="color: #9ca3af; font-size: 10px;">E{original_idx} -> GPU{original_idx // anim_local_experts}</span>
-                    <span style="color: {color}; font-size: 16px; font-weight: bold; float: right;">{load}</span>
-                    </div>""",
-                    unsafe_allow_html=True
-                )
-    with viz_col2:
-        st.markdown("##### GPU Topology")
-        st.plotly_chart(create_gpu_topology_chart(state, anim_num_gpus), use_container_width=True, key="anim_topology")
-        st.caption("Helpers exclude the native GPU. Overflow is possible via force-assign in LLAS.")
-    with viz_col3:
-        st.markdown("##### GPU Loads")
-        st.plotly_chart(create_load_bars_chart(state, anim_num_gpus), use_container_width=True, key="anim_loads")
-        st.markdown("##### Assignment Map")
-        st.caption("Format: (GPU, start, end)")
-        if state.get("assignments"):
-            rows = []
-            for idx, assigns in state["assignments"].items():
-                original_idx = int(state["sorted_indices"][idx])
-                native_gpu = original_idx // anim_local_experts
-                has_spill = any(int(a["gpu"]) != int(native_gpu) for a in assigns)
-                assign_str = " ".join([f"(G{int(a['gpu'])},{int(a['start'])},{int(a['end'])})" for a in assigns])
-                rows.append({
-                    "Expert": f"E{original_idx}",
-                    "Load": int(state["sorted_loads"][idx]),
-                    "Assignments": assign_str,
-                    "Spilled?": "Yes" if has_spill else "No",
-                })
-            df = pd.DataFrame(rows)
-            st.dataframe(df, use_container_width=True, hide_index=True, height=220)
-        else:
-            st.caption("No assignments yet")

 # TAB 2: STEP-BY-STEP ANIMATION
 # ============================================================================
 with tab_anim:
     anim_num_gpus = 4
     anim_local_experts = 2
     anim_total_experts = anim_num_gpus * anim_local_experts
                 st.session_state[f"anim_load_{idx}"] = int(v)
             st.session_state["anim_step"] = 0
+    cfg_col, out_col = st.columns([0.32, 0.68], gap="large")
+    with cfg_col:
+        st.subheader("Animation Config")
+        st.caption("LLA + LLAS with α capacity and min-tokens-per-GEMM (m).")
+        preset = st.selectbox("Preset", list(PRESETS.keys()), key="anim_preset")
+        st.button("Apply Preset", key="anim_apply_preset", on_click=apply_preset_callback)
+        st.markdown("#### Parameters")
+        st.slider(
+            "α (capacity factor)",
+            0.5, 1.5,
+            step=0.05,
+            key="anim_alpha"
+        )
+        st.slider(
+            "m (min tokens per GEMM)",
+            1, 512,
+            step=1,
+            key="anim_min_gemm",
+            help="LLAS rule: if candidate chunk c < m and remaining r > c, skip that GPU; else may force-assign."
+        )
+        st.markdown("#### Expert Loads")
+        st.caption("E{i} → GPU{i//2}")
+        load_cols = st.columns(2)
         for gpu_idx in range(anim_num_gpus):
+            with load_cols[gpu_idx % 2]:
+                st.markdown(f"**GPU {gpu_idx}**")
                 for local_idx in range(anim_local_experts):
                     idx = gpu_idx * anim_local_experts + local_idx
                     st.number_input(
         total_now = sum(loads_now)
         m_alpha_now = alpha_now * (total_now / anim_num_gpus) if anim_num_gpus > 0 else float(total_now)
+        st.info(f"α={alpha_now:.2f}, m={m_now}, Total={total_now}, m_α={m_alpha_now:.2f}")
         if st.button("Reset Animation Step", key="anim_reset_step"):
             st.session_state["anim_step"] = 0
     st.session_state["anim_step"] = current_step
     state = anim_steps[current_step]
+    with out_col:
+        st.subheader("Step-by-Step Animation")
+        # Controls
+        ctrl_col1, ctrl_col2, ctrl_col3, ctrl_col4, ctrl_col5 = st.columns([1, 1, 1, 1, 4])
+        with ctrl_col1:
+            if st.button("Reset", key="anim_reset"):
+                st.session_state["anim_step"] = 0
+                st.rerun()
+        with ctrl_col2:
+            if st.button("Prev", key="anim_prev") and current_step > 0:
+                st.session_state["anim_step"] -= 1
+                st.rerun()
+        with ctrl_col3:
+            if st.button("Next", key="anim_next") and current_step < len(anim_steps) - 1:
+                st.session_state["anim_step"] += 1
+                st.rerun()
+        with ctrl_col4:
+            if st.button("End", key="anim_end"):
+                st.session_state["anim_step"] = len(anim_steps) - 1
+                st.rerun()
+        st.progress(current_step / max(len(anim_steps) - 1, 1), text=f"Step {current_step + 1} / {len(anim_steps)}")
+        case_type = state.get("case_type")
+        if case_type in (1, 2, 3):
+            label = "Case 1" if case_type == 1 else "Case 2" if case_type == 2 else "Case 3"
+            st.write(f"**{label}** — {state['message']}")
+        else:
+            st.info(state["message"])
+        viz_col1, viz_col2, viz_col3 = st.columns([1.3, 1.2, 1.5])
+        with viz_col1:
+            st.markdown("##### Experts (sorted by load)")
+            exp_cols = st.columns(2)
+            for idx in range(anim_total_experts):
+                if idx >= len(state["sorted_loads"]):
+                    continue
+                load = int(state["sorted_loads"][idx])
+                original_idx = int(state["sorted_indices"][idx])
+                is_processed = idx in state.get("assignments", {})
+                is_current = idx == int(state["current_expert_idx"])
+                color = EXPERT_COLORS[original_idx % len(EXPERT_COLORS)]
+                opacity = "0.4" if is_processed else "1"
+                border = "3px solid #facc15" if is_current else "1px solid #4b5563"
+                with exp_cols[idx % 2]:
+                    st.markdown(
+                        f"""<div style="background-color: {color}22; border: {border}; border-radius: 6px;
+                        padding: 6px; margin: 2px 0; opacity: {opacity};">
+                        <span style="color: #9ca3af; font-size: 10px;">E{original_idx} -> GPU{original_idx // anim_local_experts}</span>
+                        <span style="color: {color}; font-size: 16px; font-weight: bold; float: right;">{load}</span>
+                        </div>""",
+                        unsafe_allow_html=True
+                    )
+        with viz_col2:
+            st.markdown("##### GPU Topology")
+            st.plotly_chart(create_gpu_topology_chart(state, anim_num_gpus), use_container_width=True, key="anim_topology")
+            st.caption("Helpers exclude the native GPU. Overflow is possible via force-assign in LLAS.")
+        with viz_col3:
+            st.markdown("##### GPU Loads")
+            st.plotly_chart(create_load_bars_chart(state, anim_num_gpus), use_container_width=True, key="anim_loads")
+            st.markdown("##### Assignment Map")
+            st.caption("Format: (GPU, start, end)")
+            if state.get("assignments"):
+                rows = []
+                for idx, assigns in state["assignments"].items():
+                    original_idx = int(state["sorted_indices"][idx])
+                    native_gpu = original_idx // anim_local_experts
+                    has_spill = any(int(a["gpu"]) != int(native_gpu) for a in assigns)
+                    assign_str = " ".join([f"(G{int(a['gpu'])},{int(a['start'])},{int(a['end'])})" for a in assigns])
+                    rows.append({
+                        "Expert": f"E{original_idx}",
+                        "Load": int(state["sorted_loads"][idx]),
+                        "Assignments": assign_str,
+                        "Spilled?": "Yes" if has_spill else "No",
+                    })
+                df = pd.DataFrame(rows)
+                st.dataframe(df, use_container_width=True, hide_index=True, height=220)
+            else:
+                st.caption("No assignments yet")