File size: 2,683 Bytes
2758540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""
nlp/summarizer.py - Surveillance log summarization using facebook/bart-large-cnn
"""
import time
from typing import List, Dict
from transformers import pipeline, Pipeline
from loguru import logger
from config import settings, DEVICE


class SurveillanceSummarizer:
    """Abstractive summarization of surveillance event logs using BART."""

    def __init__(self):
        logger.info(f"Loading summarization model: {settings.SUMMARIZER_MODEL}")
        device_id = 0 if str(DEVICE) == "cuda" else -1
        self.summarizer: Pipeline = pipeline(
            "summarization",
            model=settings.SUMMARIZER_MODEL,
            tokenizer=settings.SUMMARIZER_MODEL,
            device=device_id,
        )
        logger.info("✅ SurveillanceSummarizer ready.")

    def _events_to_text(self, events: List[Dict]) -> str:
        parts = []
        for e in events:
            ts = e.get("timestamp", "")
            cam = e.get("camera_id", "")
            activity = e.get("activity_type", "")
            anomaly = e.get("anomaly_score", 0.0)
            attrs = e.get("attributes", {})
            gender = attrs.get("gender", "") if attrs else ""
            color = attrs.get("color", "") if attrs else ""
            parts.append(
                f"Camera {cam} at {ts}: {gender} person in {color} clothing observed {activity} "
                f"with anomaly score {anomaly:.2f}."
            )
        return " ".join(parts)

    def summarize(
        self,
        events: List[Dict],
        min_length: int = 30,
        max_length: int = 200,
    ) -> Dict:
        """Summarize a list of surveillance events."""
        if not events:
            return {"summary": "No events to summarize.", "latency_ms": 0}

        text = self._events_to_text(events[:30])
        # BART max input is ~1024 tokens
        text = text[:3000]

        t0 = time.perf_counter()
        result = self.summarizer(
            text,
            min_length=min_length,
            max_length=max_length,
            do_sample=False,
        )
        latency_ms = (time.perf_counter() - t0) * 1000
        summary = result[0]["summary_text"]
        logger.debug(f"Summarized {len(events)} events in {latency_ms:.1f}ms")
        return {
            "summary": summary,
            "event_count": len(events),
            "latency_ms": round(latency_ms, 2),
        }

    def summarize_text(self, text: str, min_length: int = 30, max_length: int = 150) -> str:
        """Summarize arbitrary text string."""
        text = text[:3000]
        result = self.summarizer(text, min_length=min_length, max_length=max_length, do_sample=False)
        return result[0]["summary_text"]