Spaces:
Sleeping
Sleeping
| """Request tracing panel component.""" | |
| import gradio as gr | |
| import pandas as pd | |
| from typing import Dict, Any, Tuple | |
| from services.request_tracer import RequestTracer | |
| def create_tracing_panel(tracer: RequestTracer) -> Dict[str, Any]: | |
| """ | |
| Create the request tracing panel. | |
| Args: | |
| tracer: Request tracer instance | |
| Returns: | |
| Dictionary of Gradio components | |
| """ | |
| with gr.Column(): | |
| gr.Markdown("### Request Tracing") | |
| # Filter controls | |
| with gr.Row(): | |
| trace_filter = gr.Dropdown( | |
| choices=["All Requests", "Slow Only"], | |
| value="All Requests", | |
| label="Filter", | |
| ) | |
| trace_limit = gr.Slider( | |
| minimum=10, | |
| maximum=500, | |
| value=100, | |
| step=10, | |
| label="Show Last N Requests", | |
| ) | |
| refresh_btn = gr.Button("Refresh", size="sm") | |
| # Summary stats | |
| with gr.Row(): | |
| total_requests = gr.Number( | |
| label="Total Requests", | |
| precision=0, | |
| interactive=False, | |
| ) | |
| slow_requests = gr.Number( | |
| label="Slow Requests", | |
| precision=0, | |
| interactive=False, | |
| ) | |
| slow_rate = gr.Number( | |
| label="Slow Rate %", | |
| precision=1, | |
| interactive=False, | |
| ) | |
| baseline_p95 = gr.Number( | |
| label="Baseline P95 (ms)", | |
| precision=1, | |
| interactive=False, | |
| ) | |
| # Traces table | |
| traces_table = gr.Dataframe( | |
| headers=[ | |
| "ID", "Prompt Toks", "Output Toks", | |
| "Queue (ms)", "Prefill (ms)", "Decode (ms)", | |
| "Total (ms)", "Tok/s", "Slow?" | |
| ], | |
| datatype=[ | |
| "str", "number", "number", | |
| "number", "number", "number", | |
| "number", "number", "str" | |
| ], | |
| label="Request Traces", | |
| interactive=False, | |
| ) | |
| # Latency breakdown chart | |
| gr.Markdown("#### Average Latency Breakdown") | |
| latency_breakdown = gr.BarPlot( | |
| x="phase", | |
| y="ms", | |
| title="Latency by Phase", | |
| x_title="Phase", | |
| y_title="Time (ms)", | |
| height=200, | |
| ) | |
| # Percentiles | |
| gr.Markdown("#### Latency Percentiles") | |
| with gr.Row(): | |
| p50 = gr.Number(label="P50 (ms)", precision=1, interactive=False) | |
| p95 = gr.Number(label="P95 (ms)", precision=1, interactive=False) | |
| p99 = gr.Number(label="P99 (ms)", precision=1, interactive=False) | |
| # Event handlers | |
| def refresh_traces(filter_val, limit): | |
| slow_only = filter_val == "Slow Only" | |
| return update_tracing_panel(tracer, slow_only, int(limit)) | |
| refresh_btn.click( | |
| fn=refresh_traces, | |
| inputs=[trace_filter, trace_limit], | |
| outputs=[ | |
| total_requests, slow_requests, slow_rate, baseline_p95, | |
| traces_table, latency_breakdown, p50, p95, p99 | |
| ], | |
| ) | |
| return { | |
| "trace_filter": trace_filter, | |
| "trace_limit": trace_limit, | |
| "total_requests": total_requests, | |
| "slow_requests": slow_requests, | |
| "slow_rate": slow_rate, | |
| "baseline_p95": baseline_p95, | |
| "traces_table": traces_table, | |
| "latency_breakdown": latency_breakdown, | |
| "p50": p50, | |
| "p95": p95, | |
| "p99": p99, | |
| } | |
| def update_tracing_panel( | |
| tracer: RequestTracer, | |
| slow_only: bool = False, | |
| limit: int = 100, | |
| ) -> Tuple[int, int, float, float, pd.DataFrame, pd.DataFrame, float, float, float]: | |
| """ | |
| Update the tracing panel with current data. | |
| Args: | |
| tracer: Request tracer instance | |
| slow_only: Only show slow requests | |
| limit: Maximum number of traces to show | |
| Returns: | |
| Tuple of all component values | |
| """ | |
| stats = tracer.get_stats() | |
| traces = tracer.get_recent_traces(limit=limit, slow_only=slow_only) | |
| breakdown = tracer.get_latency_breakdown() | |
| percentiles = tracer.get_percentiles() | |
| # Build traces table | |
| trace_rows = [] | |
| for trace in reversed(traces): # Most recent first | |
| trace_rows.append({ | |
| "ID": trace.request_id, | |
| "Prompt Toks": trace.prompt_tokens, | |
| "Output Toks": trace.output_tokens, | |
| "Queue (ms)": round(trace.queue_time_ms, 1), | |
| "Prefill (ms)": round(trace.prefill_time_ms, 1), | |
| "Decode (ms)": round(trace.decode_time_ms, 1), | |
| "Total (ms)": round(trace.total_time_ms, 1), | |
| "Tok/s": round(trace.tokens_per_second, 1), | |
| "Slow?": "Yes" if trace.is_slow else "", | |
| }) | |
| traces_df = pd.DataFrame(trace_rows) if trace_rows else pd.DataFrame( | |
| columns=[ | |
| "ID", "Prompt Toks", "Output Toks", | |
| "Queue (ms)", "Prefill (ms)", "Decode (ms)", | |
| "Total (ms)", "Tok/s", "Slow?" | |
| ] | |
| ) | |
| # Build breakdown chart | |
| breakdown_df = pd.DataFrame([ | |
| {"phase": "Queue", "ms": breakdown.queue_ms}, | |
| {"phase": "Prefill", "ms": breakdown.prefill_ms}, | |
| {"phase": "Decode", "ms": breakdown.decode_ms}, | |
| ]) | |
| return ( | |
| stats["total_requests"], | |
| stats["slow_requests"], | |
| stats.get("slow_rate_percent", 0), | |
| stats.get("baseline_p95", 0) or 0, | |
| traces_df, | |
| breakdown_df, | |
| percentiles["p50"], | |
| percentiles["p95"], | |
| percentiles["p99"], | |
| ) | |