Spaces:
Sleeping
Sleeping
Update app_gradio.py
Browse files- app_gradio.py +35 -37
app_gradio.py
CHANGED
|
@@ -6,6 +6,7 @@ from __future__ import annotations
|
|
| 6 |
import io
|
| 7 |
import time
|
| 8 |
import pandas as pd
|
|
|
|
| 9 |
import gradio as gr
|
| 10 |
from classify import classify_log, classify_csv
|
| 11 |
|
|
@@ -397,21 +398,50 @@ def classify_single(source: str, log_message: str):
|
|
| 397 |
def classify_batch(file):
|
| 398 |
if file is None:
|
| 399 |
return None, "β οΈ Please upload a CSV file."
|
|
|
|
|
|
|
|
|
|
| 400 |
try:
|
| 401 |
output_path, df = classify_csv(file.name, "/tmp/classified_output.csv")
|
| 402 |
except ValueError as e:
|
| 403 |
return None, f"β οΈ {e}"
|
| 404 |
except Exception as e:
|
| 405 |
return None, f"β Error: {e}"
|
|
|
|
|
|
|
| 406 |
total = len(df)
|
|
|
|
| 407 |
tier_counts = df["tier_used"].value_counts().to_dict()
|
| 408 |
label_counts = df["predicted_label"].value_counts().to_dict()
|
|
|
|
| 409 |
tier_lines = "\n".join(f" {TIER_COLORS.get(k,'βͺ')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
|
| 410 |
label_lines = "\n".join(f" β’ {k}: {v}" for k, v in label_counts.items())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
stats = (
|
| 412 |
f"β
Classified {total} logs\n\n"
|
| 413 |
f"π Tier breakdown:\n{tier_lines}\n\n"
|
| 414 |
-
f"π·οΈ Label distribution:\n{label_lines}"
|
|
|
|
| 415 |
)
|
| 416 |
return output_path, stats
|
| 417 |
|
|
@@ -487,8 +517,7 @@ with gr.Blocks(title="LOG CLASSIFICATION SYSTEM") as demo:
|
|
| 487 |
with gr.Tab("π¦ BATCH CSV"):
|
| 488 |
gr.Markdown("""
|
| 489 |
### Bulk Classification
|
| 490 |
-
Upload a CSV with columns: **`source`**, **`log_message`**
|
| 491 |
-
Output includes: `predicted_label`, `tier_used`, `confidence`, `latency_ms`
|
| 492 |
""")
|
| 493 |
with gr.Row():
|
| 494 |
with gr.Column():
|
|
@@ -496,7 +525,8 @@ Output includes: `predicted_label`, `tier_used`, `confidence`, `latency_ms`
|
|
| 496 |
batch_btn = gr.Button("βΆ CLASSIFY ALL", variant="primary")
|
| 497 |
with gr.Column():
|
| 498 |
csv_output = gr.File(label="π₯ DOWNLOAD RESULTS")
|
| 499 |
-
|
|
|
|
| 500 |
|
| 501 |
batch_btn.click(
|
| 502 |
fn=classify_batch,
|
|
@@ -505,36 +535,4 @@ Output includes: `predicted_label`, `tier_used`, `confidence`, `latency_ms`
|
|
| 505 |
)
|
| 506 |
|
| 507 |
gr.Markdown("""
|
| 508 |
-
**Sample CSV format:**
|
| 509 |
-
```
|
| 510 |
-
source,log_message
|
| 511 |
-
ModernCRM,User User123 logged in.
|
| 512 |
-
LegacyCRM,Case escalation for ticket ID 7324 failed.
|
| 513 |
-
BillingSystem,GET /api/v2/invoice HTTP/1.1 status: 500
|
| 514 |
-
```
|
| 515 |
-
""")
|
| 516 |
-
|
| 517 |
-
# ββ Tab 3: Architecture βββββββββββββββββββββββββββββββββββββββββββ
|
| 518 |
-
with gr.Tab("ποΈ ARCHITECTURE"):
|
| 519 |
-
gr.Markdown("""
|
| 520 |
-
## 3-Tier Hybrid Pipeline
|
| 521 |
-
|
| 522 |
-
| Tier | Method | Coverage | Latency | Trigger |
|
| 523 |
-
|------|--------|----------|---------|---------|
|
| 524 |
-
| π’ **Regex** | Python `re` patterns | ~21% | < 1ms | Fixed patterns |
|
| 525 |
-
| π΅ **BERT** | `all-MiniLM-L6-v2` + LogReg | ~79% | 20β80ms | High-volume categories |
|
| 526 |
-
| π‘ **LLM** | HuggingFace Inference API | ~0.3% | 500β2000ms | LegacyCRM + rare patterns |
|
| 527 |
-
|
| 528 |
-
## Model Performance
|
| 529 |
-
- **Training data**: 2,410 synthetic enterprise logs
|
| 530 |
-
- **Confidence threshold**: 0.5 (below β escalate to LLM)
|
| 531 |
-
- **Source-aware routing**: `LegacyCRM` β LLM directly
|
| 532 |
-
|
| 533 |
-
## Environment Variables
|
| 534 |
-
| Secret | Purpose |
|
| 535 |
-
|--------|---------|
|
| 536 |
-
| `HF_TOKEN` | LLM inference for LegacyCRM logs |
|
| 537 |
-
""")
|
| 538 |
-
|
| 539 |
-
if __name__ == "__main__":
|
| 540 |
-
demo.launch(server_name="0.0.0.0", server_port=7860, theme=THEME, css=CUSTOM_CSS)
|
|
|
|
| 6 |
import io
|
| 7 |
import time
|
| 8 |
import pandas as pd
|
| 9 |
+
import numpy as np # <-- Added numpy for percentiles
|
| 10 |
import gradio as gr
|
| 11 |
from classify import classify_log, classify_csv
|
| 12 |
|
|
|
|
| 398 |
def classify_batch(file):
|
| 399 |
if file is None:
|
| 400 |
return None, "β οΈ Please upload a CSV file."
|
| 401 |
+
|
| 402 |
+
t0 = time.perf_counter() # Start Total Timer
|
| 403 |
+
|
| 404 |
try:
|
| 405 |
output_path, df = classify_csv(file.name, "/tmp/classified_output.csv")
|
| 406 |
except ValueError as e:
|
| 407 |
return None, f"β οΈ {e}"
|
| 408 |
except Exception as e:
|
| 409 |
return None, f"β Error: {e}"
|
| 410 |
+
|
| 411 |
+
total_time_sec = time.perf_counter() - t0 # End Total Timer
|
| 412 |
total = len(df)
|
| 413 |
+
|
| 414 |
tier_counts = df["tier_used"].value_counts().to_dict()
|
| 415 |
label_counts = df["predicted_label"].value_counts().to_dict()
|
| 416 |
+
|
| 417 |
tier_lines = "\n".join(f" {TIER_COLORS.get(k,'βͺ')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
|
| 418 |
label_lines = "\n".join(f" β’ {k}: {v}" for k, v in label_counts.items())
|
| 419 |
+
|
| 420 |
+
# Calculate Latencies (Requires 'latency_ms' column in CSV output from classify_csv)
|
| 421 |
+
if "latency_ms" in df.columns and not df["latency_ms"].empty:
|
| 422 |
+
latencies = df["latency_ms"].dropna()
|
| 423 |
+
p50 = np.percentile(latencies, 50)
|
| 424 |
+
p95 = np.percentile(latencies, 95)
|
| 425 |
+
p99 = np.percentile(latencies, 99)
|
| 426 |
+
latency_stats = (
|
| 427 |
+
f"β±οΈ Performance Metrics:\n"
|
| 428 |
+
f" β’ Total Time: {total_time_sec:.2f} s\n"
|
| 429 |
+
f" β’ P50 Latency: {p50:.1f} ms\n"
|
| 430 |
+
f" β’ P95 Latency: {p95:.1f} ms\n"
|
| 431 |
+
f" β’ P99 Latency: {p99:.1f} ms"
|
| 432 |
+
)
|
| 433 |
+
else:
|
| 434 |
+
latency_stats = (
|
| 435 |
+
f"β±οΈ Performance Metrics:\n"
|
| 436 |
+
f" β’ Total Time: {total_time_sec:.2f} s\n"
|
| 437 |
+
f" β’ (Latency stats unavailable: 'latency_ms' not found in output)"
|
| 438 |
+
)
|
| 439 |
+
|
| 440 |
stats = (
|
| 441 |
f"β
Classified {total} logs\n\n"
|
| 442 |
f"π Tier breakdown:\n{tier_lines}\n\n"
|
| 443 |
+
f"π·οΈ Label distribution:\n{label_lines}\n\n"
|
| 444 |
+
f"{latency_stats}"
|
| 445 |
)
|
| 446 |
return output_path, stats
|
| 447 |
|
|
|
|
| 517 |
with gr.Tab("π¦ BATCH CSV"):
|
| 518 |
gr.Markdown("""
|
| 519 |
### Bulk Classification
|
| 520 |
+
Upload a CSV with columns: **`source`**, **`log_message`** Output includes: `predicted_label`, `tier_used`, `confidence`, `latency_ms`
|
|
|
|
| 521 |
""")
|
| 522 |
with gr.Row():
|
| 523 |
with gr.Column():
|
|
|
|
| 525 |
batch_btn = gr.Button("βΆ CLASSIFY ALL", variant="primary")
|
| 526 |
with gr.Column():
|
| 527 |
csv_output = gr.File(label="π₯ DOWNLOAD RESULTS")
|
| 528 |
+
# Increased lines from 12 to 16 to fit the new metrics nicely
|
| 529 |
+
stats_out = gr.Textbox(label="π STATISTICS", lines=16, interactive=False)
|
| 530 |
|
| 531 |
batch_btn.click(
|
| 532 |
fn=classify_batch,
|
|
|
|
| 535 |
)
|
| 536 |
|
| 537 |
gr.Markdown("""
|
| 538 |
+
**Sample CSV format:**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|