jkottu's picture
Initial commit: LLM Inference Dashboard
aefabf0
"""Quantization details panel component."""
import gradio as gr
import pandas as pd
from typing import Dict, Any, Tuple, Optional
from collectors.quant_collector import QuantizationCollector, QuantizationInfo
def create_quant_panel() -> Dict[str, Any]:
"""
Create the quantization details panel.
Returns:
Dictionary of Gradio components
"""
with gr.Column():
gr.Markdown("### Quantization Details")
with gr.Row():
quant_type = gr.Textbox(
label="Quantization Method",
interactive=False,
)
bits = gr.Number(
label="Bits",
precision=0,
interactive=False,
)
group_size = gr.Number(
label="Group Size",
precision=0,
interactive=False,
)
# Full configuration JSON
quant_details = gr.JSON(
label="Full Configuration",
)
# Layer precision table
gr.Markdown("#### Per-Layer Precision")
layer_table = gr.Dataframe(
headers=["Layer", "Bits", "Group Size", "Dtype"],
datatype=["str", "number", "str", "str"],
label="Layer Precisions",
interactive=False,
)
return {
"quant_type": quant_type,
"bits": bits,
"group_size": group_size,
"quant_details": quant_details,
"layer_table": layer_table,
}
def update_quant_panel(
collector: QuantizationCollector,
) -> Tuple[str, int, Optional[int], Dict, pd.DataFrame]:
"""
Update the quantization panel with current data.
Args:
collector: Quantization collector instance
Returns:
Tuple of (method, bits, group_size, details_json, layer_table)
"""
info = collector.detect()
layers = collector.get_layer_precisions()
# Build layer table
layer_rows = []
for layer in layers[:20]: # Limit to 20 rows
layer_rows.append({
"Layer": layer.layer_name,
"Bits": layer.bits,
"Group Size": str(layer.group_size) if layer.group_size else "-",
"Dtype": layer.dtype,
})
layer_df = pd.DataFrame(layer_rows) if layer_rows else pd.DataFrame(
columns=["Layer", "Bits", "Group Size", "Dtype"]
)
return (
info.method,
info.bits,
info.group_size,
info.to_dict(),
layer_df,
)
def get_quant_summary(info: QuantizationInfo) -> str:
"""
Get a summary string for the quantization.
Args:
info: QuantizationInfo instance
Returns:
Human-readable summary string
"""
if info.method == "None (FP16/BF16)":
return f"Full precision ({info.compute_dtype or 'float16'})"
summary = f"{info.method} {info.bits}-bit"
if info.group_size:
summary += f", group size {info.group_size}"
if info.quant_type:
summary += f" ({info.quant_type})"
return summary