Spaces:
Running
Running
| """ | |
| nlp/report.py - Incident Report Generation using google/flan-t5-base | |
| """ | |
| import time | |
| from typing import Dict, List, Optional | |
| from transformers import pipeline, Pipeline | |
| from loguru import logger | |
| from config import settings, DEVICE | |
| REPORT_PROMPT_TEMPLATE = """Generate a structured surveillance incident report based on the following events: | |
| Events: | |
| {events_summary} | |
| Format the report as: | |
| INCIDENT REPORT | |
| Date/Time: [datetime] | |
| Cameras Involved: [cameras] | |
| Subject Description: [physical description] | |
| Activity Observed: [description of events] | |
| Anomaly Level: [low/medium/high] | |
| Recommended Action: [action] | |
| """ | |
| class IncidentReportGenerator: | |
| """ | |
| Generates structured incident reports from surveillance events using Flan-T5. | |
| """ | |
| def __init__(self): | |
| logger.info(f"Loading report generation model: {settings.REPORT_MODEL}") | |
| device_id = 0 if str(DEVICE) == "cuda" else -1 | |
| self.generator: Pipeline = pipeline( | |
| "text2text-generation", | |
| model=settings.REPORT_MODEL, | |
| tokenizer=settings.REPORT_MODEL, | |
| device=device_id, | |
| ) | |
| logger.info("✅ IncidentReportGenerator ready.") | |
| def _format_events(self, events: List[Dict]) -> str: | |
| """Format events list into a readable string for the prompt.""" | |
| lines = [] | |
| for i, e in enumerate(events, 1): | |
| ts = e.get("timestamp", "unknown time") | |
| cam = e.get("camera_id", "unknown camera") | |
| activity = e.get("activity_type", "detected") | |
| attrs = e.get("attributes", {}) | |
| gender = attrs.get("gender", "") if attrs else "" | |
| color = attrs.get("color", "") if attrs else "" | |
| anomaly_score = e.get("anomaly_score", 0.0) | |
| lines.append( | |
| f"{i}. [{ts}] Camera {cam}: {gender} person in {color} clothing, " | |
| f"activity={activity}, anomaly_score={anomaly_score:.2f}" | |
| ) | |
| return "\n".join(lines) | |
| def generate( | |
| self, | |
| events: List[Dict], | |
| person_id: Optional[str] = None, | |
| max_length: int = 512, | |
| severity_hint: Optional[str] = None, | |
| ) -> Dict: | |
| """ | |
| Generate a structured incident report from a list of event records. | |
| Returns: | |
| {"report_text": str, "severity": str, "latency_ms": float} | |
| """ | |
| if not events: | |
| return {"report_text": "No events provided for report generation.", "severity": "low"} | |
| events_summary = self._format_events(events[:20]) # limit for token budget | |
| prompt = REPORT_PROMPT_TEMPLATE.format(events_summary=events_summary) | |
| if person_id: | |
| prompt = f"Person ID: {person_id[:8]}\n" + prompt | |
| t0 = time.perf_counter() | |
| outputs = self.generator( | |
| prompt, | |
| max_new_tokens=max_length, | |
| num_beams=4, | |
| early_stopping=True, | |
| no_repeat_ngram_size=3, | |
| ) | |
| latency_ms = (time.perf_counter() - t0) * 1000 | |
| report_text = outputs[0]["generated_text"] | |
| # Determine severity from anomaly scores | |
| scores = [e.get("anomaly_score", 0.0) for e in events] | |
| avg_anomaly = sum(scores) / max(len(scores), 1) | |
| if avg_anomaly > 0.8: | |
| severity = "critical" | |
| elif avg_anomaly > 0.6: | |
| severity = "high" | |
| elif avg_anomaly > 0.3: | |
| severity = "medium" | |
| else: | |
| severity = "low" | |
| severity = severity_hint or severity | |
| logger.info(f"Report generated in {latency_ms:.1f}ms | severity={severity}") | |
| return { | |
| "report_text": report_text, | |
| "severity": severity, | |
| "event_count": len(events), | |
| "avg_anomaly_score": round(avg_anomaly, 4), | |
| "latency_ms": round(latency_ms, 2), | |
| "person_id": person_id, | |
| } | |