Spaces:
Sleeping
Sleeping
Update app_gradio.py
Browse files- app_gradio.py +34 -31
app_gradio.py
CHANGED
|
@@ -19,12 +19,14 @@ SOURCES = [
|
|
| 19 |
"AnalyticsEngine", "ThirdPartyAPI", "LegacyCRM",
|
| 20 |
]
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
"
|
| 25 |
-
"
|
| 26 |
-
"
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
|
| 29 |
EXAMPLE_LOGS = [
|
| 30 |
["ModernCRM", "User User12345 logged in."],
|
|
@@ -91,12 +93,12 @@ def classify_single(source: str, log_message: str):
|
|
| 91 |
try:
|
| 92 |
result = classify_log(source, log_message)
|
| 93 |
latency = (time.perf_counter() - t0) * 1000
|
| 94 |
-
icon =
|
| 95 |
return (
|
| 96 |
result["label"],
|
| 97 |
f"{icon} {result['tier']}",
|
| 98 |
f"{result['confidence']:.1%}" if result["confidence"] else "N/A",
|
| 99 |
-
f"{latency:.
|
| 100 |
)
|
| 101 |
except Exception as e:
|
| 102 |
return f"Error: {str(e)}", "Fail", "β", "β"
|
|
@@ -115,35 +117,36 @@ def classify_batch(file, progress=gr.Progress(track_tqdm=True)):
|
|
| 115 |
progress(0.9, desc="π Calculating Metrics...")
|
| 116 |
|
| 117 |
total = len(df)
|
| 118 |
-
tier_counts = df["tier_used"].value_counts().to_dict()
|
| 119 |
label_counts = df["predicted_label"].value_counts().to_dict()
|
|
|
|
| 120 |
|
| 121 |
-
#
|
| 122 |
-
tier_lines =
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
-
|
| 128 |
label_lines = "\n".join([f" β’ {k}: {v}" for k, v in label_counts.items()])
|
| 129 |
|
| 130 |
-
# Latency Metrics (P50, P95, P99)
|
| 131 |
-
if "latency_ms" in df.columns:
|
| 132 |
-
lats = df["latency_ms"].dropna()
|
| 133 |
-
p50, p95, p99 = np.percentile(lats, 50), np.percentile(lats, 95), np.percentile(lats, 99)
|
| 134 |
-
else:
|
| 135 |
-
# Fallback if logic is purely regex
|
| 136 |
-
p50, p95, p99 = 0.1, 1.9, 2.5
|
| 137 |
-
|
| 138 |
stats = (
|
| 139 |
-
f"β
Classified {total} logs\n\n"
|
| 140 |
-
f"π
|
| 141 |
-
f"π·οΈ Label distribution:\n{label_lines}
|
| 142 |
-
f"β±οΈ Performance Metrics:\n"
|
| 143 |
-
f" β’ Total Time: {total_time_sec:.2f} s\n"
|
| 144 |
-
f" β’ P50 Latency: {p50:.1f} ms\n"
|
| 145 |
-
f" β’ P95 Latency: {p95:.1f} ms\n"
|
| 146 |
-
f" β’ P99 Latency: {p99:.1f} ms"
|
| 147 |
)
|
| 148 |
|
| 149 |
progress(1.0, desc="β
Success")
|
|
|
|
| 19 |
"AnalyticsEngine", "ThirdPartyAPI", "LegacyCRM",
|
| 20 |
]
|
| 21 |
|
| 22 |
+
# Updated to dynamically support the new Cache Hit tiers
|
| 23 |
+
def get_tier_icon(tier_name: str) -> str:
|
| 24 |
+
if "Regex" in tier_name: return "π’"
|
| 25 |
+
if "BERT" in tier_name: return "π΅"
|
| 26 |
+
if "Cache Hit" in tier_name: return "β‘" # Make the cost savings pop in the UI
|
| 27 |
+
if "fallback" in tier_name: return "π "
|
| 28 |
+
if "LLM" in tier_name: return "π‘"
|
| 29 |
+
return "βͺ"
|
| 30 |
|
| 31 |
EXAMPLE_LOGS = [
|
| 32 |
["ModernCRM", "User User12345 logged in."],
|
|
|
|
| 93 |
try:
|
| 94 |
result = classify_log(source, log_message)
|
| 95 |
latency = (time.perf_counter() - t0) * 1000
|
| 96 |
+
icon = get_tier_icon(result["tier"])
|
| 97 |
return (
|
| 98 |
result["label"],
|
| 99 |
f"{icon} {result['tier']}",
|
| 100 |
f"{result['confidence']:.1%}" if result["confidence"] else "N/A",
|
| 101 |
+
f"{latency:.4f} ms" # FIX: Expose the true sub-millisecond telemetry
|
| 102 |
)
|
| 103 |
except Exception as e:
|
| 104 |
return f"Error: {str(e)}", "Fail", "β", "β"
|
|
|
|
| 117 |
progress(0.9, desc="π Calculating Metrics...")
|
| 118 |
|
| 119 |
total = len(df)
|
|
|
|
| 120 |
label_counts = df["predicted_label"].value_counts().to_dict()
|
| 121 |
+
tier_counts = df["tier_used"].value_counts().to_dict()
|
| 122 |
|
| 123 |
+
# FIX: Decouple Latency Metrics per tier instead of global distribution
|
| 124 |
+
tier_lines = []
|
| 125 |
+
for tier, count in tier_counts.items():
|
| 126 |
+
tier_df = df[df["tier_used"] == tier]
|
| 127 |
+
lats = tier_df["latency_ms"].dropna()
|
| 128 |
+
icon = get_tier_icon(tier)
|
| 129 |
+
pct = count / total
|
| 130 |
+
|
| 131 |
+
if "BERT" in tier:
|
| 132 |
+
total_ms = lats.sum()
|
| 133 |
+
tier_lines.append(f" {icon} {tier}: Batch Latency {total_ms:.1f} ms (Over {count} logs)")
|
| 134 |
+
elif "Regex" in tier:
|
| 135 |
+
p50 = np.percentile(lats, 50) if not lats.empty else 0
|
| 136 |
+
tier_lines.append(f" {icon} {tier}: < 0.1 ms (p50: {p50:.4f} ms) | {count} logs ({pct:.0%})")
|
| 137 |
+
else:
|
| 138 |
+
p50 = np.percentile(lats, 50) if not lats.empty else 0
|
| 139 |
+
p95 = np.percentile(lats, 95) if not lats.empty else 0
|
| 140 |
+
p99 = np.percentile(lats, 99) if not lats.empty else 0
|
| 141 |
+
tier_lines.append(f" {icon} {tier}: {count} logs ({pct:.0%}) | p50={p50:.1f}ms p95={p95:.1f}ms p99={p99:.1f}ms")
|
| 142 |
|
| 143 |
+
tier_lines_str = "\n".join(tier_lines)
|
| 144 |
label_lines = "\n".join([f" β’ {k}: {v}" for k, v in label_counts.items()])
|
| 145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
stats = (
|
| 147 |
+
f"β
Classified {total} logs in {total_time_sec:.2f} s\n\n"
|
| 148 |
+
f"π Performance by Tier:\n{tier_lines_str}\n\n"
|
| 149 |
+
f"π·οΈ Label distribution:\n{label_lines}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
)
|
| 151 |
|
| 152 |
progress(1.0, desc="β
Success")
|