math-under-llm / ui /tab_database.py
Alex W.
fix: prevent dirty DB entries from mistyped model IDs; add cascading model delete
c1b6928
# ui/tab_database.py
"""
Tab4: Database Browser
- Model list (Plan A: aggregated by modality)
- Model detail (Plan B: raw components rows, expandable)
- Per-head raw data query (modality + layer_type as two independent filters)
- DB stats
- Delete model (admin only, cascading)
"""
import gradio as gr
import pandas as pd
from db.schema import init_db, get_db_stats
from db.reader import (
get_analyzed_models,
get_model_components,
get_model_summary,
get_layer_metrics,
get_resume_status,
)
from db.writer import delete_model
def load_db_stats() -> str:
conn = init_db()
stats = get_db_stats(conn)
return (
f"Database Statistics\n"
f"{'โ”€'*40}\n"
f" Models: {stats.get('models', 0)}\n"
f" Components: {stats.get('components', 0)}\n"
f" Layer-head records:{stats.get('layer_head_metrics', 0)}\n"
f" Summary rows: {stats.get('model_summary', 0)}\n"
f" DB size: {stats.get('db_size_mb', 0)} MB\n"
)
def load_model_list() -> pd.DataFrame:
conn = init_db()
df = get_analyzed_models(conn)
if df.empty:
return pd.DataFrame(columns=[
"model_id", "model_type", "analyzed_at", "analyze_sec",
"n_components", "language_layers", "vision_layers", "audio_layers"
])
for col in ["vision_layers", "audio_layers"]:
df[col] = df[col].apply(lambda x: "" if x == 0 else x)
return df
def load_model_detail(
model_id: str
) -> tuple[pd.DataFrame, pd.DataFrame, str]:
if not model_id.strip():
return pd.DataFrame(), pd.DataFrame(), "Please enter a model ID."
conn = init_db()
mid = model_id.strip()
comp_df = get_model_components(conn, mid)
summary_df = get_model_summary(conn, mid)
status_lines = [f"Resume Status: {mid}\n{'โ”€'*50}\n"]
if not comp_df.empty:
for pfx in comp_df["prefix"].tolist():
rs = get_resume_status(conn, mid, pfx)
status_lines.append(
f" [{pfx}]\n"
f" Done layers : {rs['total_done']}\n"
f" Layer index : {sorted(rs['done_layers'])}\n"
)
else:
status_lines.append(" No data yet.\n")
return comp_df, summary_df, "".join(status_lines)
def load_layer_data(
model_id: str,
modality: str,
layer_type: str,
start_layer:int,
end_layer: int,
) -> tuple[pd.DataFrame, str]:
if not model_id.strip():
return pd.DataFrame(), "Please enter a model ID."
conn = init_db()
mod = modality if modality != "all" else None
lt = layer_type if layer_type != "all" else None
df = get_layer_metrics(
conn,
model_id = model_id.strip(),
modality = mod,
layer_type = lt,
start_layer = int(start_layer),
end_layer = int(end_layer),
)
if df.empty:
return pd.DataFrame(), (
f"No data found: model={model_id} "
f"modality={mod or 'all'} layer_type={lt or 'all'}"
)
status = (
f"โœ… {len(df)} records "
f"| layers {df['layer'].min()}~{df['layer'].max()} "
f"| modality={mod or 'all'} layer_type={lt or 'all'}"
)
return df, status
def run_delete_model(
model_id: str,
admin_token: str,
) -> tuple[str, pd.DataFrame]:
"""
็บง่”ๅˆ ้™คๆŒ‡ๅฎšๆจกๅž‹็š„ๆ‰€ๆœ‰ๆ•ฐๆฎใ€‚
้œ€่ฆ Admin Write Token ้ชŒ่ฏใ€‚
่ฟ”ๅ›ž (็Šถๆ€ๆ–‡ๆœฌ, ๅˆทๆ–ฐๅŽ็š„ๆจกๅž‹ๅˆ—่กจ)
"""
if not model_id.strip():
return "โŒ Please enter a model ID to delete.", load_model_list()
conn = init_db()
success, msg = delete_model(conn, model_id.strip(), admin_token)
# ๆ— ่ฎบๆˆๅŠŸๅคฑ่ดฅ้ƒฝๅˆทๆ–ฐๆจกๅž‹ๅˆ—่กจ
updated_list = load_model_list()
return msg, updated_list
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# Tab4 UI
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def build_tab_database():
with gr.Tab("๐Ÿ—„๏ธ Database"):
gr.Markdown(
"## Database Browser\n"
"View analyzed models, raw per-head data, and resume status.\n\n"
"> ๆŸฅ็œ‹ๅทฒๅˆ†ๆžๆจกๅž‹ใ€้€ๅคดๅŽŸๅง‹ๆ•ฐๆฎๅŠๆ–ญ็‚น็ปญไผ ็Šถๆ€ใ€‚"
)
# โ”€โ”€ DB Stats โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Row():
stats_text = gr.Textbox(
label="Database Statistics",
value="Click Refresh to load.",
lines=7,
interactive=False,
scale=2,
)
refresh_stats_btn = gr.Button(
"๐Ÿ”„ Refresh Stats", scale=1, variant="secondary"
)
refresh_stats_btn.click(fn=load_db_stats, outputs=stats_text)
gr.Markdown("---")
# โ”€โ”€ Model List โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
gr.Markdown(
"### Analyzed Models\n"
"Layers are split by modality. "
"`language_layers` includes both standard and global layers.\n\n"
"> ๅฑ‚ๆ•ฐๆŒ‰ๆจกๆ€ๆ‹†ๅˆ†ใ€‚`language_layers` ๅซ standard ๅ’Œ global ๅฑ‚ใ€‚"
)
refresh_models_btn = gr.Button(
"๐Ÿ”„ Refresh Model List", variant="secondary"
)
models_table = gr.Dataframe(
label="Analyzed Models",
headers=[
"model_id", "model_type", "analyzed_at", "analyze_sec",
"n_components", "language_layers", "vision_layers", "audio_layers"
],
interactive=False,
)
refresh_models_btn.click(fn=load_model_list, outputs=models_table)
gr.Markdown("---")
# โ”€โ”€ Delete Model โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
gr.Markdown(
"### ๐Ÿ—‘๏ธ Delete Model\n"
"Permanently remove a model and **all** its associated data "
"(layer_head_metrics, model_summary, components, models).\n"
"Requires Admin Write Token. This action **cannot be undone**.\n\n"
"> ๆฐธไน…ๅˆ ้™คๆจกๅž‹ๅŠๅ…ถๆ‰€ๆœ‰ๅ…ณ่”ๆ•ฐๆฎ๏ผŒ้œ€่ฆ Admin Write Token๏ผŒๆ“ไฝœไธๅฏ้€†ใ€‚"
)
with gr.Row():
delete_model_id = gr.Textbox(
label="Model ID to delete",
placeholder="meta-llama/Meta-Llama-3-70B-intruct",
scale=3,
)
delete_token = gr.Textbox(
label="Admin Write Token",
type="password",
scale=2,
)
delete_btn = gr.Button(
"๐Ÿ—‘๏ธ Delete", variant="stop", scale=1
)
delete_status = gr.Textbox(
label="Delete Status",
lines=6,
interactive=False,
)
delete_btn.click(
fn=run_delete_model,
inputs=[delete_model_id, delete_token],
outputs=[delete_status, models_table], # ๅˆ ้™คๅŽ่‡ชๅŠจๅˆทๆ–ฐๆจกๅž‹ๅˆ—่กจ
)
gr.Markdown("---")
# โ”€โ”€ Model Detail โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
gr.Markdown(
"### Model Detail & Resume Status\n"
"Expand raw component rows and check which layers are done.\n\n"
"> ๆŸฅ็œ‹ๅŽŸๅง‹็ป„ไปถไฟกๆฏๅŠๆ–ญ็‚น็ปญไผ ่ฟ›ๅบฆใ€‚"
)
with gr.Row():
detail_model_id = gr.Textbox(
label="Model ID",
placeholder="google/gemma-4-e2b",
scale=3,
)
load_detail_btn = gr.Button(
"๐Ÿ“‹ Load Detail", variant="secondary", scale=1
)
resume_status_text = gr.Textbox(
label="Resume Status",
lines=8,
interactive=False,
)
components_table = gr.Dataframe(
label="Components (raw) โ€” prefix / modality / n_layers / head_dim",
headers=[
"prefix", "modality", "n_layers",
"head_dim_min", "head_dim_max",
"has_kv_shared", "has_global", "d_model"
],
interactive=False,
)
summary_table = gr.Dataframe(
label="Model Summary (all / standard / global)",
interactive=False,
)
load_detail_btn.click(
fn=load_model_detail,
inputs=[detail_model_id],
outputs=[components_table, summary_table, resume_status_text],
)
gr.Markdown("---")
# โ”€โ”€ Raw Data Query โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
gr.Markdown(
"### Per-head Raw Data Query\n"
"`Modality` and `Layer Type` are two independent filter dimensions.\n\n"
"> Modality๏ผˆๆจกๆ€๏ผ‰ๅ’Œ Layer Type๏ผˆๅฑ‚็ป“ๆž„็ฑปๅž‹๏ผ‰ๆ˜ฏไธคไธช็‹ฌ็ซ‹่ฟ‡ๆปค็ปดๅบฆ๏ผŒๅฏ็ป„ๅˆไฝฟ็”จใ€‚"
)
with gr.Row():
raw_model_id = gr.Textbox(
label="Model ID",
placeholder="google/gemma-4-e2b",
scale=2,
)
raw_modality = gr.Dropdown(
label="Modality",
choices=["all", "language", "vision", "audio"],
value="language",
scale=1,
info="Filter by component modality | ๆŒ‰ๆจกๆ€่ฟ‡ๆปค",
)
raw_layer_type = gr.Dropdown(
label="Layer Type",
choices=["all", "standard", "global"],
value="all",
scale=1,
info=(
"standard = normal layers | "
"global = K=V shared layers (e.g. Gemma global)"
),
)
with gr.Row():
raw_start = gr.Number(
label="Start Layer", value=0, precision=0, scale=1
)
raw_end = gr.Number(
label="End Layer", value=10, precision=0, scale=1
)
load_raw_btn = gr.Button(
"๐Ÿ” Query Data", variant="secondary", scale=1
)
raw_status = gr.Textbox(
label="Query Status", lines=1, interactive=False
)
raw_table = gr.Dataframe(
label="Per-head Raw Data",
interactive=False,
wrap=False,
)
load_raw_btn.click(
fn=load_layer_data,
inputs=[
raw_model_id, raw_modality, raw_layer_type,
raw_start, raw_end
],
outputs=[raw_table, raw_status],
)