Spaces:

bkhmsi
/

cognitive-reasoners

Running

App Files Files Community

bkhmsi commited on Oct 18

Commit

c0742fe

1 Parent(s): 55b6215

added expert ablation

Browse files

Files changed (2) hide show

app.py +61 -20
router_backend.py +4 -4

app.py CHANGED Viewed

@@ -81,7 +81,16 @@ def _compose_prompt(user_prompt: str, assistant_prompt: str) -> str:
         return [{"role": "user", "content": user_prompt}, {"role": "assistant", "content": assistant_prompt}]
     return user_prompt
-def route_and_plot(model_choice: str, hf_token: str, user_prompt: str, assistant_prompt: str) -> Tuple[pd.DataFrame, "plotly.graph_objs._figure.Figure", str]:
     """
     Main pipeline:
     - Compose prompt (user + optional assistant)
@@ -91,29 +100,45 @@ def route_and_plot(model_choice: str, hf_token: str, user_prompt: str, assistant
     if hf_token.strip() == "":
         hf_token = None  # allow empty token
-    model_id = model_choice.strip()
-    if not model_id:
-        raise gr.Error("Please select a model or enter a custom model id.")
-    prompt = _compose_prompt(user_prompt, assistant_prompt)
-    if not prompt:
-        raise gr.Error("Please enter a prompt.")
     seed = 42
     use_mock = False
-    if use_mock:
-        msg = "Using mock data."
-        vals = _mock_routing(model_id, prompt, seed=seed)
         generation = None
     else:
-        try:
-            raw, generation = get_expert_routing(model_id, hf_token, prompt)  # <-- your real function
-            vals = _normalize_output(raw)
-            msg = "Routed with real backend."
-        except Exception as e:
-            # fallback to mock on error, but surface message
-            msg = f"Backend error: {e}\nFalling back to mock data."
             vals = _mock_routing(model_id, prompt, seed=seed)
             generation = None
     df = pd.DataFrame({"Expert": EXPERTS, "Percent": vals})
     colors = ["#97D077", "#4285F4", "#FFAB40", "#A64D79"]
@@ -143,6 +168,20 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     with gr.Row():
         model_choice = gr.Dropdown(choices=DEFAULT_MODELS, label="Select a model", value=DEFAULT_MODELS[0])
         hf_token = gr.Textbox(label="Huggingface token for authentication", placeholder="Required for Llama-based models", lines=1)
     with gr.Row():
         user_prompt = gr.Textbox(lines=6, label="User prompt", placeholder="Type the user message here...")
@@ -159,11 +198,13 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     with gr.Row():
         table = gr.Dataframe(label="Routing Percentages", interactive=False)
     plot = gr.Plot(label="Bar Plot")
-    status = gr.Markdown("")
     run.click(
         route_and_plot,
-        inputs=[model_choice, hf_token, user_prompt, assistant_prompt],
         outputs=[generation_output, table, plot, status],
     )

         return [{"role": "user", "content": user_prompt}, {"role": "assistant", "content": assistant_prompt}]
     return user_prompt
+def route_and_plot(
+    model_choice: str,
+    hf_token: str,
+    user_prompt: str,
+    assistant_prompt: str,
+    ablate_language: bool,
+    ablate_logic: bool,
+    ablate_social: bool,
+    ablate_world: bool,
+) -> Tuple[pd.DataFrame, "plotly.graph_objs._figure.Figure", str]:
     """
     Main pipeline:
     - Compose prompt (user + optional assistant)
     if hf_token.strip() == "":
         hf_token = None  # allow empty token
+    ablations = []
+    if ablate_language:
+        ablations.append("language")
+    if ablate_logic:
+        ablations.append("logic")
+    if ablate_social:
+        ablations.append("social")
+    if ablate_world:
+        ablations.append("world")
     seed = 42
     use_mock = False
+    if len(ablations) == 4:
+        msg = "Error message: you can't ablate all experts.<br>Falling back to mock data."
         generation = None
+        vals = _mock_routing(model_id, prompt, seed=seed)
     else:
+        model_id = model_choice.strip()
+        if not model_id:
+            raise gr.Error("Please select a model or enter a custom model id.")
+        prompt = _compose_prompt(user_prompt, assistant_prompt)
+        if not prompt:
+            raise gr.Error("Please enter a prompt.")
+        if use_mock:
+            msg = "Using mock data."
             vals = _mock_routing(model_id, prompt, seed=seed)
             generation = None
+        else:
+            try:
+                raw, generation = get_expert_routing(model_id, hf_token, prompt, ablations)  # <-- your real function
+                vals = _normalize_output(raw)
+                msg = "Routed with real backend."
+            except Exception as e:
+                # fallback to mock on error, but surface message
+                msg = f"Backend error: {e}\nFalling back to mock data."
+                vals = _mock_routing(model_id, prompt, seed=seed)
+                generation = None
     df = pd.DataFrame({"Expert": EXPERTS, "Percent": vals})
     colors = ["#97D077", "#4285F4", "#FFAB40", "#A64D79"]
     with gr.Row():
         model_choice = gr.Dropdown(choices=DEFAULT_MODELS, label="Select a model", value=DEFAULT_MODELS[0])
         hf_token = gr.Textbox(label="Huggingface token for authentication", placeholder="Required for Llama-based models", lines=1)
+    with gr.Column():
+        with gr.Row():
+            gr.Markdown(
+                """
+                #### Ablate Experts
+                (Check to disable an expert; the routing percentages will be redistributed among the remaining experts)
+                """, label="Ablate Experts"
+            )
+        with gr.Row():
+            ablate_language = gr.Checkbox(value=False, label="Language Expert")
+            ablate_logic = gr.Checkbox(value=False, label="Logic Expert")
+            ablate_social = gr.Checkbox(value=False, label="Social Expert")
+            ablate_world = gr.Checkbox(value=False, label="World Expert")
     with gr.Row():
         user_prompt = gr.Textbox(lines=6, label="User prompt", placeholder="Type the user message here...")
     with gr.Row():
         table = gr.Dataframe(label="Routing Percentages", interactive=False)
     plot = gr.Plot(label="Bar Plot")
+    status = gr.Markdown("", label="System Message")
     run.click(
         route_and_plot,
+        inputs=[model_choice, hf_token, user_prompt, assistant_prompt, ablate_language, ablate_logic, ablate_social, ablate_world],
         outputs=[generation_output, table, plot, status],
     )

router_backend.py CHANGED Viewed

@@ -26,9 +26,9 @@ from models.micro_moe_llama import MiCRoLlamaMoE
 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
-def get_expert_routing(model_id: str, hf_token: str, prompt: Union[str, List[Dict[str, str]]]) -> Union[List[float], Dict[str, float], Tuple[float, float, float, float]]:
-    model, tokenizer = build_model(model_id, hf_token)
     if isinstance(prompt, str):
         generation, routing_weights = generate_continuation(model, tokenizer, prompt)
@@ -189,7 +189,7 @@ def get_routing_weights(model, tokenizer, prompts, apply_chat_template=True):
     return routing_weights
-def build_model(model_id: str, hf_token: str, use_cache: bool = True):
     model_path, base_model, model_class = get_model_path(model_id)
@@ -203,7 +203,7 @@ def build_model(model_id: str, hf_token: str, use_cache: bool = True):
     model_config.use_bfloat16 = True
     model_config._attn_implementation = "eager" # {sdpa, flash_attention_2, eager}
     model_config.use_cache = use_cache
-    model_config.ablate = []
     tokenizer = AutoTokenizer.from_pretrained(base_model, use_auth_token=hf_token)
     tokenizer.padding_side = "left"

 DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+def get_expert_routing(model_id: str, hf_token: str, prompt: Union[str, List[Dict[str, str]]], ablations: List[str] = None) -> Union[List[float], Dict[str, float], Tuple[float, float, float, float]]:
+    model, tokenizer = build_model(model_id, hf_token, ablations=ablations)
     if isinstance(prompt, str):
         generation, routing_weights = generate_continuation(model, tokenizer, prompt)
     return routing_weights
+def build_model(model_id: str, hf_token: str, ablations: List[str], use_cache: bool = True):
     model_path, base_model, model_class = get_model_path(model_id)
     model_config.use_bfloat16 = True
     model_config._attn_implementation = "eager" # {sdpa, flash_attention_2, eager}
     model_config.use_cache = use_cache
+    model_config.ablate = ablations
     tokenizer = AutoTokenizer.from_pretrained(base_model, use_auth_token=hf_token)
     tokenizer.padding_side = "left"