Spaces:

jkottu
/

llm-inference-dashboard

Sleeping

App Files Files Community

llm-inference-dashboard / components /quant_panel.py

jkottu

Initial commit: LLM Inference Dashboard

aefabf0 about 2 months ago

raw

history blame contribute delete

3.04 kB

	"""Quantization details panel component."""

	import gradio as gr
	import pandas as pd
	from typing import Dict, Any, Tuple, Optional

	from collectors.quant_collector import QuantizationCollector, QuantizationInfo


	def create_quant_panel() -> Dict[str, Any]:
	"""
	Create the quantization details panel.

	Returns:
	Dictionary of Gradio components
	"""
	with gr.Column():
	gr.Markdown("### Quantization Details")

	with gr.Row():
	quant_type = gr.Textbox(
	label="Quantization Method",
	interactive=False,
	)
	bits = gr.Number(
	label="Bits",
	precision=0,
	interactive=False,
	)
	group_size = gr.Number(
	label="Group Size",
	precision=0,
	interactive=False,
	)

	# Full configuration JSON
	quant_details = gr.JSON(
	label="Full Configuration",
	)

	# Layer precision table
	gr.Markdown("#### Per-Layer Precision")
	layer_table = gr.Dataframe(
	headers=["Layer", "Bits", "Group Size", "Dtype"],
	datatype=["str", "number", "str", "str"],
	label="Layer Precisions",
	interactive=False,
	)

	return {
	"quant_type": quant_type,
	"bits": bits,
	"group_size": group_size,
	"quant_details": quant_details,
	"layer_table": layer_table,
	}


	def update_quant_panel(
	collector: QuantizationCollector,
	) -> Tuple[str, int, Optional[int], Dict, pd.DataFrame]:
	"""
	Update the quantization panel with current data.

	Args:
	collector: Quantization collector instance

	Returns:
	Tuple of (method, bits, group_size, details_json, layer_table)
	"""
	info = collector.detect()
	layers = collector.get_layer_precisions()

	# Build layer table
	layer_rows = []
	for layer in layers[:20]: # Limit to 20 rows
	layer_rows.append({
	"Layer": layer.layer_name,
	"Bits": layer.bits,
	"Group Size": str(layer.group_size) if layer.group_size else "-",
	"Dtype": layer.dtype,
	})

	layer_df = pd.DataFrame(layer_rows) if layer_rows else pd.DataFrame(
	columns=["Layer", "Bits", "Group Size", "Dtype"]
	)

	return (
	info.method,
	info.bits,
	info.group_size,
	info.to_dict(),
	layer_df,
	)


	def get_quant_summary(info: QuantizationInfo) -> str:
	"""
	Get a summary string for the quantization.

	Args:
	info: QuantizationInfo instance

	Returns:
	Human-readable summary string
	"""
	if info.method == "None (FP16/BF16)":
	return f"Full precision ({info.compute_dtype or 'float16'})"

	summary = f"{info.method} {info.bits}-bit"

	if info.group_size:
	summary += f", group size {info.group_size}"

	if info.quant_type:
	summary += f" ({info.quant_type})"

	return summary