NOT-OMEGA commited on
Commit
ea255c9
Β·
verified Β·
1 Parent(s): facc0f2

Update app_gradio.py

Browse files
Files changed (1) hide show
  1. app_gradio.py +35 -37
app_gradio.py CHANGED
@@ -6,6 +6,7 @@ from __future__ import annotations
6
  import io
7
  import time
8
  import pandas as pd
 
9
  import gradio as gr
10
  from classify import classify_log, classify_csv
11
 
@@ -397,21 +398,50 @@ def classify_single(source: str, log_message: str):
397
  def classify_batch(file):
398
  if file is None:
399
  return None, "⚠️ Please upload a CSV file."
 
 
 
400
  try:
401
  output_path, df = classify_csv(file.name, "/tmp/classified_output.csv")
402
  except ValueError as e:
403
  return None, f"⚠️ {e}"
404
  except Exception as e:
405
  return None, f"❌ Error: {e}"
 
 
406
  total = len(df)
 
407
  tier_counts = df["tier_used"].value_counts().to_dict()
408
  label_counts = df["predicted_label"].value_counts().to_dict()
 
409
  tier_lines = "\n".join(f" {TIER_COLORS.get(k,'βšͺ')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
410
  label_lines = "\n".join(f" β€’ {k}: {v}" for k, v in label_counts.items())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  stats = (
412
  f"βœ… Classified {total} logs\n\n"
413
  f"πŸ“Š Tier breakdown:\n{tier_lines}\n\n"
414
- f"🏷️ Label distribution:\n{label_lines}"
 
415
  )
416
  return output_path, stats
417
 
@@ -487,8 +517,7 @@ with gr.Blocks(title="LOG CLASSIFICATION SYSTEM") as demo:
487
  with gr.Tab("πŸ“¦ BATCH CSV"):
488
  gr.Markdown("""
489
  ### Bulk Classification
490
- Upload a CSV with columns: **`source`**, **`log_message`**
491
- Output includes: `predicted_label`, `tier_used`, `confidence`, `latency_ms`
492
  """)
493
  with gr.Row():
494
  with gr.Column():
@@ -496,7 +525,8 @@ Output includes: `predicted_label`, `tier_used`, `confidence`, `latency_ms`
496
  batch_btn = gr.Button("β–Ά CLASSIFY ALL", variant="primary")
497
  with gr.Column():
498
  csv_output = gr.File(label="πŸ“₯ DOWNLOAD RESULTS")
499
- stats_out = gr.Textbox(label="πŸ“Š STATISTICS", lines=12, interactive=False)
 
500
 
501
  batch_btn.click(
502
  fn=classify_batch,
@@ -505,36 +535,4 @@ Output includes: `predicted_label`, `tier_used`, `confidence`, `latency_ms`
505
  )
506
 
507
  gr.Markdown("""
508
- **Sample CSV format:**
509
- ```
510
- source,log_message
511
- ModernCRM,User User123 logged in.
512
- LegacyCRM,Case escalation for ticket ID 7324 failed.
513
- BillingSystem,GET /api/v2/invoice HTTP/1.1 status: 500
514
- ```
515
- """)
516
-
517
- # ── Tab 3: Architecture ───────────────────────────────────────────
518
- with gr.Tab("πŸ—οΈ ARCHITECTURE"):
519
- gr.Markdown("""
520
- ## 3-Tier Hybrid Pipeline
521
-
522
- | Tier | Method | Coverage | Latency | Trigger |
523
- |------|--------|----------|---------|---------|
524
- | 🟒 **Regex** | Python `re` patterns | ~21% | < 1ms | Fixed patterns |
525
- | πŸ”΅ **BERT** | `all-MiniLM-L6-v2` + LogReg | ~79% | 20–80ms | High-volume categories |
526
- | 🟑 **LLM** | HuggingFace Inference API | ~0.3% | 500–2000ms | LegacyCRM + rare patterns |
527
-
528
- ## Model Performance
529
- - **Training data**: 2,410 synthetic enterprise logs
530
- - **Confidence threshold**: 0.5 (below β†’ escalate to LLM)
531
- - **Source-aware routing**: `LegacyCRM` β†’ LLM directly
532
-
533
- ## Environment Variables
534
- | Secret | Purpose |
535
- |--------|---------|
536
- | `HF_TOKEN` | LLM inference for LegacyCRM logs |
537
- """)
538
-
539
- if __name__ == "__main__":
540
- demo.launch(server_name="0.0.0.0", server_port=7860, theme=THEME, css=CUSTOM_CSS)
 
6
  import io
7
  import time
8
  import pandas as pd
9
+ import numpy as np # <-- Added numpy for percentiles
10
  import gradio as gr
11
  from classify import classify_log, classify_csv
12
 
 
398
  def classify_batch(file):
399
  if file is None:
400
  return None, "⚠️ Please upload a CSV file."
401
+
402
+ t0 = time.perf_counter() # Start Total Timer
403
+
404
  try:
405
  output_path, df = classify_csv(file.name, "/tmp/classified_output.csv")
406
  except ValueError as e:
407
  return None, f"⚠️ {e}"
408
  except Exception as e:
409
  return None, f"❌ Error: {e}"
410
+
411
+ total_time_sec = time.perf_counter() - t0 # End Total Timer
412
  total = len(df)
413
+
414
  tier_counts = df["tier_used"].value_counts().to_dict()
415
  label_counts = df["predicted_label"].value_counts().to_dict()
416
+
417
  tier_lines = "\n".join(f" {TIER_COLORS.get(k,'βšͺ')} {k}: {v} ({v/total:.0%})" for k, v in tier_counts.items())
418
  label_lines = "\n".join(f" β€’ {k}: {v}" for k, v in label_counts.items())
419
+
420
+ # Calculate Latencies (Requires 'latency_ms' column in CSV output from classify_csv)
421
+ if "latency_ms" in df.columns and not df["latency_ms"].empty:
422
+ latencies = df["latency_ms"].dropna()
423
+ p50 = np.percentile(latencies, 50)
424
+ p95 = np.percentile(latencies, 95)
425
+ p99 = np.percentile(latencies, 99)
426
+ latency_stats = (
427
+ f"⏱️ Performance Metrics:\n"
428
+ f" β€’ Total Time: {total_time_sec:.2f} s\n"
429
+ f" β€’ P50 Latency: {p50:.1f} ms\n"
430
+ f" β€’ P95 Latency: {p95:.1f} ms\n"
431
+ f" β€’ P99 Latency: {p99:.1f} ms"
432
+ )
433
+ else:
434
+ latency_stats = (
435
+ f"⏱️ Performance Metrics:\n"
436
+ f" β€’ Total Time: {total_time_sec:.2f} s\n"
437
+ f" β€’ (Latency stats unavailable: 'latency_ms' not found in output)"
438
+ )
439
+
440
  stats = (
441
  f"βœ… Classified {total} logs\n\n"
442
  f"πŸ“Š Tier breakdown:\n{tier_lines}\n\n"
443
+ f"🏷️ Label distribution:\n{label_lines}\n\n"
444
+ f"{latency_stats}"
445
  )
446
  return output_path, stats
447
 
 
517
  with gr.Tab("πŸ“¦ BATCH CSV"):
518
  gr.Markdown("""
519
  ### Bulk Classification
520
+ Upload a CSV with columns: **`source`**, **`log_message`** Output includes: `predicted_label`, `tier_used`, `confidence`, `latency_ms`
 
521
  """)
522
  with gr.Row():
523
  with gr.Column():
 
525
  batch_btn = gr.Button("β–Ά CLASSIFY ALL", variant="primary")
526
  with gr.Column():
527
  csv_output = gr.File(label="πŸ“₯ DOWNLOAD RESULTS")
528
+ # Increased lines from 12 to 16 to fit the new metrics nicely
529
+ stats_out = gr.Textbox(label="πŸ“Š STATISTICS", lines=16, interactive=False)
530
 
531
  batch_btn.click(
532
  fn=classify_batch,
 
535
  )
536
 
537
  gr.Markdown("""
538
+ **Sample CSV format:**