Spaces:

NOT-OMEGA
/

LogAI-Engine

Running

App Files Files Community

NOT-OMEGA commited on Apr 15

Commit

ace5ccf

verified ·

1 Parent(s): 5b20649

Update classify.py

Browse files

Files changed (1) hide show

classify.py +29 -33

classify.py CHANGED Viewed

@@ -1,14 +1,14 @@
 """
-classify.py — 3-Tier Hybrid Pipeline (V8 — Cloud Container Safe)
 Architecture:
   LegacyCRM → LLM directly
   Others    → Regex → BERT (batch) → LLM fallback
-Changes in V8 (Stability First):
-  - Removed ProcessPoolExecutor: It was causing Out-Of-Memory (OOM) crashes on Hugging Face Spaces by duplicating the BERT model across CPU cores.
-  - Reverted to Sequential Chunks: Protects the 16GB RAM limit and keeps the 500k @lru_cache perfectly intact in the main process.
-  - Retained ThreadPoolExecutor: Only used for LLM API calls (I/O bound), which is safe and won't crash the container.
 """
 from __future__ import annotations
 import os
@@ -16,7 +16,7 @@ import time
 import statistics
 import pandas as pd
 from functools import lru_cache
-from concurrent.futures import ThreadPoolExecutor
 from processor_regex import classify_with_regex
 from processor_bert  import classify_batch as bert_batch
 from processor_llm   import classify_with_llm
@@ -35,7 +35,7 @@ def _make_result(label: str, tier: str, confidence, latency_ms: float) -> dict:
     }
-# ── Caching Layer (Single Process - RAM Safe) ───────────────────────────────
 @lru_cache(maxsize=500000)
 def cached_llm_call(log_msg: str) -> str:
     """Executes the expensive LLM call only if the string misses the cache."""
@@ -70,7 +70,7 @@ def classify_logs(logs: list[tuple[str, str]]) -> list[dict]:
             else:
                 bert_indices.append(i)
-    # ── Step 2: BERT batch (Sequential - RAM Safe) ──────────────────────────
     if bert_indices:
         bert_msgs = [logs[i][1] for i in bert_indices]
@@ -86,7 +86,7 @@ def classify_logs(logs: list[tuple[str, str]]) -> list[dict]:
             else:
                 llm_indices.append(idx)
-    # ── Step 3: LLM (I/O Bound - Threading Safe) ────────────────────────────
     if llm_indices:
         def parallel_llm(idx):
             src, msg = logs[idx]
@@ -100,7 +100,6 @@ def classify_logs(logs: list[tuple[str, str]]) -> list[dict]:
             return idx, _make_result(label, tier, None, t_llm_ms)
-        # ThreadPoolExecutor is safe for Gradio/HF Spaces because it shares memory
         with ThreadPoolExecutor() as executor:
             llm_results = list(executor.map(parallel_llm, llm_indices))
@@ -142,11 +141,16 @@ def pipeline_summary(results: list[dict]) -> dict:
     }
-# ── CSV batch classify (Container Safe Processing) ───────────────────────────
 def classify_csv(input_path: str, output_path: str = "output.csv") -> tuple[str, pd.DataFrame]:
     """
-    Stable Batch Processing for 2M+ Logs on Hugging Face Spaces.
-    Runs chunks sequentially to prevent OOM memory crashes.
     """
     df = pd.read_csv(input_path)
     required = {"source", "log_message"}
@@ -156,18 +160,23 @@ def classify_csv(input_path: str, output_path: str = "output.csv") -> tuple[str,
     log_pairs = list(zip(df["source"], df["log_message"]))
     total_logs = len(log_pairs)
-    # Reduced chunk size slightly to give the container more breathing room
-    chunk_size = 25000
     chunks = [log_pairs[i:i + chunk_size] for i in range(0, total_logs, chunk_size)]
     results = []
-    print(f"🔥 Processing {len(chunks)} chunks sequentially to protect RAM...")
     t_start = time.perf_counter()
-    # Sequential loop: Prevents Gradio from crashing and keeps memory stable
-    for chunk in chunks:
-        results.extend(classify_logs(chunk))
     t_end = time.perf_counter()
     print(f"⏱️ True Wall-Clock Processing Time: {(t_end - t_start):.2f} seconds")
@@ -185,17 +194,4 @@ def classify_csv(input_path: str, output_path: str = "output.csv") -> tuple[str,
 # Aliases
-classify = classify_logs
-# ── Self-test ────────────────────────────────────────────────────────────────
-if __name__ == "__main__":
-    sample = [
-        ("ModernCRM",       "IP 192.168.133.114 blocked due to potential attack"),
-        ("BillingSystem",   "User User12345 logged in."),
-        ("LegacyCRM",       "Case escalation failed due to active timeout."),
-    ]
-    print("Running quick test...")
-    results = classify_logs(sample)
-    print("Done. No errors.")

 """
+classify.py — 3-Tier Hybrid Pipeline (V9 — Balanced CPU & Gradio Safe)
 Architecture:
   LegacyCRM → LLM directly
   Others    → Regex → BERT (batch) → LLM fallback
+Changes in V9:
+  - Fixed CPU Starvation: Limited max_workers to half the CPU cores to prevent Gradio WebSocket timeouts.
+  - Reduced IPC Overhead: Lowered chunk_size to 10,000 to prevent CPU lockups during cross-process data pickling.
+  - Restored Multi-processing: Outer chunks use ProcessPoolExecutor for speed, inner LLM calls use ThreadPoolExecutor.
 """
 from __future__ import annotations
 import os
 import statistics
 import pandas as pd
 from functools import lru_cache
+from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
 from processor_regex import classify_with_regex
 from processor_bert  import classify_batch as bert_batch
 from processor_llm   import classify_with_llm
     }
+# ── Caching Layer (Sharded per Worker) ──────────────────────────────────────
 @lru_cache(maxsize=500000)
 def cached_llm_call(log_msg: str) -> str:
     """Executes the expensive LLM call only if the string misses the cache."""
             else:
                 bert_indices.append(i)
+    # ── Step 2: BERT batch (CPU Bound) ──────────────────────────────────────
     if bert_indices:
         bert_msgs = [logs[i][1] for i in bert_indices]
             else:
                 llm_indices.append(idx)
+    # ── Step 3: LLM (I/O Bound - Threading Applied Here) ────────────────────
     if llm_indices:
         def parallel_llm(idx):
             src, msg = logs[idx]
             return idx, _make_result(label, tier, None, t_llm_ms)
         with ThreadPoolExecutor() as executor:
             llm_results = list(executor.map(parallel_llm, llm_indices))
     }
+# ── Multiprocessing Helper ───────────────────────────────────────────────────
+def _process_chunk(chunk: list[tuple[str, str]]) -> list[dict]:
+    """Top-level helper function required for ProcessPoolExecutor mapping."""
+    return classify_logs(chunk)
+# ── CSV batch classify (Balanced Processing) ─────────────────────────────────
 def classify_csv(input_path: str, output_path: str = "output.csv") -> tuple[str, pd.DataFrame]:
     """
+    Balanced Batch Processing to prevent CPU Starvation UI crashes.
     """
     df = pd.read_csv(input_path)
     required = {"source", "log_message"}
     log_pairs = list(zip(df["source"], df["log_message"]))
     total_logs = len(log_pairs)
+    # FIX: Use exactly half of the available CPU cores (minimum 1).
+    # This leaves the other half for Gradio websockets and the OS.
+    safe_cores = max(1, os.cpu_count() // 2)
+    # FIX: Reduce chunk size to 10,000.
+    # Massive chunks cause CPU lockups during inter-process data pickling.
+    chunk_size = 10000
     chunks = [log_pairs[i:i + chunk_size] for i in range(0, total_logs, chunk_size)]
     results = []
+    print(f"🔥 Firing up {safe_cores} CPU Cores (Leaving remaining for UI)...")
     t_start = time.perf_counter()
+    with ProcessPoolExecutor(max_workers=safe_cores) as executor:
+        for chunk_result in executor.map(_process_chunk, chunks):
+            results.extend(chunk_result)
     t_end = time.perf_counter()
     print(f"⏱️ True Wall-Clock Processing Time: {(t_end - t_start):.2f} seconds")
 # Aliases
+classify = classify_logs