""" nlp/summarizer.py - Surveillance log summarization using facebook/bart-large-cnn """ import time from typing import List, Dict from transformers import pipeline, Pipeline from loguru import logger from config import settings, DEVICE class SurveillanceSummarizer: """Abstractive summarization of surveillance event logs using BART.""" def __init__(self): logger.info(f"Loading summarization model: {settings.SUMMARIZER_MODEL}") device_id = 0 if str(DEVICE) == "cuda" else -1 self.summarizer: Pipeline = pipeline( "summarization", model=settings.SUMMARIZER_MODEL, tokenizer=settings.SUMMARIZER_MODEL, device=device_id, ) logger.info("✅ SurveillanceSummarizer ready.") def _events_to_text(self, events: List[Dict]) -> str: parts = [] for e in events: ts = e.get("timestamp", "") cam = e.get("camera_id", "") activity = e.get("activity_type", "") anomaly = e.get("anomaly_score", 0.0) attrs = e.get("attributes", {}) gender = attrs.get("gender", "") if attrs else "" color = attrs.get("color", "") if attrs else "" parts.append( f"Camera {cam} at {ts}: {gender} person in {color} clothing observed {activity} " f"with anomaly score {anomaly:.2f}." ) return " ".join(parts) def summarize( self, events: List[Dict], min_length: int = 30, max_length: int = 200, ) -> Dict: """Summarize a list of surveillance events.""" if not events: return {"summary": "No events to summarize.", "latency_ms": 0} text = self._events_to_text(events[:30]) # BART max input is ~1024 tokens text = text[:3000] t0 = time.perf_counter() result = self.summarizer( text, min_length=min_length, max_length=max_length, do_sample=False, ) latency_ms = (time.perf_counter() - t0) * 1000 summary = result[0]["summary_text"] logger.debug(f"Summarized {len(events)} events in {latency_ms:.1f}ms") return { "summary": summary, "event_count": len(events), "latency_ms": round(latency_ms, 2), } def summarize_text(self, text: str, min_length: int = 30, max_length: int = 150) -> str: """Summarize arbitrary text string.""" text = text[:3000] result = self.summarizer(text, min_length=min_length, max_length=max_length, do_sample=False) return result[0]["summary_text"]