Spaces:

marcuscanhaco
/

weapon-detection-app

Runtime error

weapon-detection-app / src /domain /detectors /cpu.py

Marcus Vinicius Zerbini Canhaço

feat: atualização do detector com otimizações para GPU T4

2dfae6f about 1 year ago

24.5 kB

	import torch
	from transformers import Owlv2Processor, Owlv2ForObjectDetection
	from PIL import Image
	import numpy as np
	import cv2
	import time
	from typing import List, Dict, Tuple, Optional, Union
	import os
	from tqdm import tqdm
	import json
	from pathlib import Path
	from contextlib import nullcontext
	import threading
	import hashlib
	import pickle
	from datetime import datetime
	from dotenv import load_dotenv
	import tempfile
	import subprocess
	import shutil
	import traceback
	import psutil
	import logging
	import gc
	import sys
	from concurrent.futures import ThreadPoolExecutor
	import torch.nn.functional as F
	from .base import BaseDetector, BaseCache


	# Configurar logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Carregar variáveis de ambiente
	load_dotenv()

	class CPUCache(BaseCache):
	"""Cache otimizado para CPU."""
	def __init__(self, max_size: int = 1000):
	super().__init__(max_size)
	self.device = torch.device('cpu')

	class WeaponDetectorCPU(BaseDetector):
	"""Implementação CPU do detector de armas."""
	def __init__(self):
	"""Inicializa variáveis básicas."""
	super().__init__()
	self.default_resolution = 640
	self.device = torch.device('cpu')

	def _get_best_device(self):
	return torch.device('cpu')

	def _initialize(self):
	"""Inicializa o modelo e o processador para execução em CPU."""
	try:
	# Configurações otimizadas para CPU
	torch.set_num_threads(min(8, os.cpu_count()))
	torch.set_num_interop_threads(min(8, os.cpu_count()))

	# Carregar modelo com configurações otimizadas
	cache_dir = os.path.join(tempfile.gettempdir(), 'weapon_detection_cache')
	os.makedirs(cache_dir, exist_ok=True)

	model_name = "google/owlv2-base-patch16"
	logger.info("Carregando modelo e processador...")

	self.owlv2_processor = Owlv2Processor.from_pretrained(
	model_name,
	cache_dir=cache_dir
	)

	self.owlv2_model = Owlv2ForObjectDetection.from_pretrained(
	model_name,
	cache_dir=cache_dir,
	torch_dtype=torch.float32,
	low_cpu_mem_usage=True
	).to(self.device)

	self.owlv2_model.eval()

	# Usar queries do método base
	self.text_queries = self._get_detection_queries()
	logger.info(f"Total de queries carregadas: {len(self.text_queries)}")

	# Processar queries uma única vez
	logger.info("Processando queries...")
	self.processed_text = self.owlv2_processor(
	text=self.text_queries,
	return_tensors="pt",
	padding=True
	).to(self.device)

	# Inicializar cache
	cache_size = int(os.getenv('RESULT_CACHE_SIZE', '1000'))
	self.result_cache = CPUCache(max_size=cache_size)

	logger.info("Inicialização CPU completa!")
	self._initialized = True

	except Exception as e:
	logger.error(f"Erro na inicialização CPU: {str(e)}")
	raise

	def _apply_nms(self, detections: list, iou_threshold: float = 0.5) -> list:
	"""Aplica NMS usando operações em CPU."""
	try:
	if not detections:
	return []

	boxes = torch.tensor([[d["box"][0], d["box"][1], d["box"][2], d["box"][3]] for d in detections])
	scores = torch.tensor([d["confidence"] for d in detections])
	labels = [d["label"] for d in detections]

	area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
	_, order = scores.sort(descending=True)

	keep = []
	while order.numel() > 0:
	if order.numel() == 1:
	keep.append(order.item())
	break
	i = order[0]
	keep.append(i.item())

	xx1 = torch.max(boxes[i, 0], boxes[order[1:], 0])
	yy1 = torch.max(boxes[i, 1], boxes[order[1:], 1])
	xx2 = torch.min(boxes[i, 2], boxes[order[1:], 2])
	yy2 = torch.min(boxes[i, 3], boxes[order[1:], 3])

	w = torch.clamp(xx2 - xx1, min=0)
	h = torch.clamp(yy2 - yy1, min=0)
	inter = w * h

	ovr = inter / (area[i] + area[order[1:]] - inter)
	ids = (ovr <= iou_threshold).nonzero().squeeze()
	if ids.numel() == 0:
	break
	order = order[ids + 1]

	filtered_detections = []
	for idx in keep:
	filtered_detections.append({
	"confidence": scores[idx].item(),
	"box": boxes[idx].tolist(),
	"label": labels[idx]
	})
	return filtered_detections

	except Exception as e:
	logger.error(f"Erro ao aplicar NMS: {str(e)}")
	return []

	def _preprocess_image(self, image: Image.Image) -> Image.Image:
	"""Pré-processa a imagem para o tamanho 640x640 e garante RGB."""
	try:
	target_size = (640, 640)
	if image.mode != 'RGB':
	image = image.convert('RGB')
	if image.size != target_size:
	ratio = min(target_size[0] / image.size[0], target_size[1] / image.size[1])
	new_size = tuple(int(dim * ratio) for dim in image.size)
	image = image.resize(new_size, Image.LANCZOS)
	if new_size != target_size:
	new_image = Image.new('RGB', target_size, (0, 0, 0))
	paste_x = (target_size[0] - new_size[0]) // 2
	paste_y = (target_size[1] - new_size[1]) // 2
	new_image.paste(image, (paste_x, paste_y))
	image = new_image
	return image
	except Exception as e:
	logger.error(f"Erro no pré-processamento: {str(e)}")
	return image

	def detect_objects(self, image: Image.Image, threshold: float = 0.3) -> list:
	"""Detecta objetos em uma imagem utilizando CPU."""
	try:
	image = self._preprocess_image(image)
	with torch.no_grad():
	image_inputs = self.owlv2_processor(
	images=image,
	return_tensors="pt"
	).to(self.device)
	inputs = {image_inputs, self.processed_text}
	outputs = self.owlv2_model(**inputs)

	target_sizes = torch.tensor([image.size[::-1]])
	results = self.owlv2_processor.post_process_grounded_object_detection(
	outputs=outputs,
	target_sizes=target_sizes,
	threshold=threshold
	)[0]

	detections = []
	for score, box, label in zip(results["scores"], results["boxes"], results["labels"]):
	x1, y1, x2, y2 = box.tolist()
	detections.append({
	"confidence": score.item(),
	"box": [int(x1), int(y1), int(x2), int(y2)],
	"label": self.text_queries[label]
	})
	return self._apply_nms(detections)

	except Exception as e:
	logger.error(f"Erro em detect_objects: {str(e)}")
	return []

	def process_video(self, video_path: str, fps: int = None, threshold: float = 0.3, resolution: int = 640) -> tuple:
	"""Processa um vídeo utilizando CPU. Para na primeira detecção encontrada."""
	try:
	metrics = {
	"total_time": 0,
	"frame_extraction_time": 0,
	"analysis_time": 0,
	"frames_analyzed": 0,
	"video_duration": 0,
	"device_type": self.device.type,
	"detections": [],
	"technical": {
	"model": "owlv2-base-patch16-ensemble",
	"input_size": f"{resolution}x{resolution}",
	"nms_threshold": 0.5,
	"preprocessing": "basic",
	"early_stop": True
	},
	}

	start_time = time.time()
	t0 = time.time()
	frames = self.extract_frames(video_path, fps, resolution)
	metrics["frame_extraction_time"] = time.time() - t0
	metrics["frames_analyzed"] = len(frames)

	if not frames:
	logger.warning("Nenhum frame extraído do vídeo")
	return video_path, metrics

	metrics["video_duration"] = len(frames) / (fps or 2)
	t0 = time.time()
	detections = []
	frames_processed = 0

	# Processar um frame por vez para otimizar memória e permitir parada precoce
	for frame_idx, frame in enumerate(frames):
	frames_processed += 1

	# Converter frame para RGB e pré-processar
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	image = Image.fromarray(frame_rgb)
	image = self._preprocess_image(image)

	# Detectar objetos com threshold direto
	with torch.no_grad():
	image_inputs = self.owlv2_processor(
	images=image,
	return_tensors="pt"
	).to(self.device)
	inputs = {image_inputs, self.processed_text}
	outputs = self.owlv2_model(**inputs)

	target_sizes = torch.tensor([image.size[::-1]])
	results = self.owlv2_processor.post_process_grounded_object_detection(
	outputs=outputs,
	target_sizes=target_sizes,
	threshold=threshold # Aplicar threshold diretamente
	)[0]

	# Se encontrou alguma detecção acima do threshold
	if len(results["scores"]) > 0:
	# Pegar a detecção com maior confiança
	max_score_idx = torch.argmax(results["scores"])
	score = results["scores"][max_score_idx].item()
	box = results["boxes"][max_score_idx].tolist()
	label = results["labels"][max_score_idx].item()

	detections.append({
	"frame": frame_idx,
	"confidence": score,
	"box": [int(x) for x in box],
	"label": self.text_queries[label]
	})

	# Atualizar métricas e parar o processamento
	metrics["frames_processed_until_detection"] = frames_processed
	metrics["analysis_time"] = time.time() - t0
	metrics["total_time"] = time.time() - start_time
	metrics["detections"] = detections
	logger.info(f"Detecção encontrada após processar {frames_processed} frames")
	return video_path, metrics

	# Liberar memória a cada 10 frames
	if frames_processed % 10 == 0:
	gc.collect()

	# Se chegou aqui, não encontrou nenhuma detecção
	metrics["analysis_time"] = time.time() - t0
	metrics["total_time"] = time.time() - start_time
	metrics["frames_processed_until_detection"] = frames_processed
	metrics["detections"] = detections
	return video_path, metrics

	except Exception as e:
	logger.error(f"Erro ao processar vídeo: {str(e)}")
	return video_path, {}

	def extract_frames(self, video_path: str, fps: int = 2, resolution: int = 480) -> list:
	"""Extrai frames de um vídeo utilizando ffmpeg."""
	frames = []
	temp_dir = Path(tempfile.mkdtemp())
	try:
	threads = min(os.cpu_count(), 4) # Menor número de threads para CPU
	cmd = [
	'ffmpeg', '-i', video_path,
	'-threads', str(threads),
	'-vf', (f'fps={fps},'
	f'scale={resolution}:{resolution}:force_original_aspect_ratio=decrease:flags=lanczos,'
	f'pad={resolution}:{resolution}:(ow-iw)/2:(oh-ih)/2'),
	'-frame_pts', '1',
	f'{temp_dir}/%d.jpg'
	]
	subprocess.run(cmd, check=True, capture_output=True)
	frame_files = sorted(temp_dir.glob('*.jpg'), key=lambda x: int(x.stem))
	chunk_size = 50 # Menor chunk size para CPU
	with ThreadPoolExecutor(max_workers=threads) as executor:
	for i in range(0, len(frame_files), chunk_size):
	chunk = frame_files[i:i + chunk_size]
	chunk_frames = list(tqdm(
	executor.map(lambda f: cv2.imread(str(f)), chunk),
	desc=f"Carregando frames {i+1}-{min(i+chunk_size, len(frame_files))}",
	total=len(chunk)
	))
	frames.extend(chunk_frames)
	if i % (chunk_size * 5) == 0:
	gc.collect()
	finally:
	shutil.rmtree(temp_dir)
	return frames

	def clear_cache(self):
	"""Limpa o cache de resultados e libera memória."""
	try:
	if hasattr(self, 'result_cache'):
	self.result_cache.clear()
	gc.collect()
	logger.info("Cache CPU limpo com sucesso")
	except Exception as e:
	logger.error(f"Erro ao limpar cache CPU: {str(e)}")

	def _apply_nms(self, detections: list, iou_threshold: float = 0.5) -> list:
	"""Aplica NMS usando operações em CPU."""
	try:
	if not detections:
	return []

	boxes = torch.tensor([[d["box"][0], d["box"][1], d["box"][2], d["box"][3]] for d in detections])
	scores = torch.tensor([d["confidence"] for d in detections])
	labels = [d["label"] for d in detections]

	area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
	_, order = scores.sort(descending=True)

	keep = []
	while order.numel() > 0:
	if order.numel() == 1:
	keep.append(order.item())
	break
	i = order[0]
	keep.append(i.item())

	xx1 = torch.max(boxes[i, 0], boxes[order[1:], 0])
	yy1 = torch.max(boxes[i, 1], boxes[order[1:], 1])
	xx2 = torch.min(boxes[i, 2], boxes[order[1:], 2])
	yy2 = torch.min(boxes[i, 3], boxes[order[1:], 3])

	w = torch.clamp(xx2 - xx1, min=0)
	h = torch.clamp(yy2 - yy1, min=0)
	inter = w * h

	ovr = inter / (area[i] + area[order[1:]] - inter)
	ids = (ovr <= iou_threshold).nonzero().squeeze()
	if ids.numel() == 0:
	break
	order = order[ids + 1]

	filtered_detections = []
	for idx in keep:
	filtered_detections.append({
	"confidence": scores[idx].item(),
	"box": boxes[idx].tolist(),
	"label": labels[idx]
	})
	return filtered_detections

	except Exception as e:
	logger.error(f"Erro ao aplicar NMS: {str(e)}")
	return []

	def _preprocess_image(self, image: Image.Image) -> Image.Image:
	"""Pré-processa a imagem para o tamanho 640x640 e garante RGB."""
	try:
	target_size = (640, 640)
	if image.mode != 'RGB':
	image = image.convert('RGB')
	if image.size != target_size:
	ratio = min(target_size[0] / image.size[0], target_size[1] / image.size[1])
	new_size = tuple(int(dim * ratio) for dim in image.size)
	image = image.resize(new_size, Image.LANCZOS)
	if new_size != target_size:
	new_image = Image.new('RGB', target_size, (0, 0, 0))
	paste_x = (target_size[0] - new_size[0]) // 2
	paste_y = (target_size[1] - new_size[1]) // 2
	new_image.paste(image, (paste_x, paste_y))
	image = new_image
	return image
	except Exception as e:
	logger.error(f"Erro no pré-processamento: {str(e)}")
	return image

	def detect_objects(self, image: Image.Image, threshold: float = 0.3) -> list:
	"""Detecta objetos em uma imagem utilizando CPU."""
	try:
	image = self._preprocess_image(image)
	with torch.no_grad():
	image_inputs = self.owlv2_processor(
	images=image,
	return_tensors="pt"
	).to(self.device)
	inputs = {image_inputs, self.processed_text}
	outputs = self.owlv2_model(**inputs)

	target_sizes = torch.tensor([image.size[::-1]])
	results = self.owlv2_processor.post_process_grounded_object_detection(
	outputs=outputs,
	target_sizes=target_sizes,
	threshold=threshold
	)[0]

	detections = []
	for score, box, label in zip(results["scores"], results["boxes"], results["labels"]):
	x1, y1, x2, y2 = box.tolist()
	detections.append({
	"confidence": score.item(),
	"box": [int(x1), int(y1), int(x2), int(y2)],
	"label": self.text_queries[label]
	})
	return self._apply_nms(detections)

	except Exception as e:
	logger.error(f"Erro em detect_objects: {str(e)}")
	return []

	def process_video(self, video_path: str, fps: int = None, threshold: float = 0.3, resolution: int = 640) -> tuple:
	"""Processa um vídeo utilizando CPU. Para na primeira detecção encontrada."""
	try:
	metrics = {
	"total_time": 0,
	"frame_extraction_time": 0,
	"analysis_time": 0,
	"frames_analyzed": 0,
	"video_duration": 0,
	"device_type": self.device.type,
	"detections": [],
	"technical": {
	"model": "owlv2-base-patch16-ensemble",
	"input_size": f"{resolution}x{resolution}",
	"nms_threshold": 0.5,
	"preprocessing": "basic",
	"early_stop": True
	},
	}

	start_time = time.time()
	t0 = time.time()
	frames = self.extract_frames(video_path, fps, resolution)
	metrics["frame_extraction_time"] = time.time() - t0
	metrics["frames_analyzed"] = len(frames)

	if not frames:
	logger.warning("Nenhum frame extraído do vídeo")
	return video_path, metrics

	metrics["video_duration"] = len(frames) / (fps or 2)
	t0 = time.time()
	detections = []
	frames_processed = 0

	# Processar um frame por vez para otimizar memória e permitir parada precoce
	for frame_idx, frame in enumerate(frames):
	frames_processed += 1

	# Converter frame para RGB e pré-processar
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	image = Image.fromarray(frame_rgb)
	image = self._preprocess_image(image)

	# Detectar objetos com threshold direto
	with torch.no_grad():
	image_inputs = self.owlv2_processor(
	images=image,
	return_tensors="pt"
	).to(self.device)
	inputs = {image_inputs, self.processed_text}
	outputs = self.owlv2_model(**inputs)

	target_sizes = torch.tensor([image.size[::-1]])
	results = self.owlv2_processor.post_process_grounded_object_detection(
	outputs=outputs,
	target_sizes=target_sizes,
	threshold=threshold # Aplicar threshold diretamente
	)[0]

	# Se encontrou alguma detecção acima do threshold
	if len(results["scores"]) > 0:
	# Pegar a detecção com maior confiança
	max_score_idx = torch.argmax(results["scores"])
	score = results["scores"][max_score_idx].item()
	box = results["boxes"][max_score_idx].tolist()
	label = results["labels"][max_score_idx].item()

	detections.append({
	"frame": frame_idx,
	"confidence": score,
	"box": [int(x) for x in box],
	"label": self.text_queries[label]
	})

	# Atualizar métricas e parar o processamento
	metrics["frames_processed_until_detection"] = frames_processed
	metrics["analysis_time"] = time.time() - t0
	metrics["total_time"] = time.time() - start_time
	metrics["detections"] = detections
	logger.info(f"Detecção encontrada após processar {frames_processed} frames")
	return video_path, metrics

	# Liberar memória a cada 10 frames
	if frames_processed % 10 == 0:
	gc.collect()

	# Se chegou aqui, não encontrou nenhuma detecção
	metrics["analysis_time"] = time.time() - t0
	metrics["total_time"] = time.time() - start_time
	metrics["frames_processed_until_detection"] = frames_processed
	metrics["detections"] = detections
	return video_path, metrics

	except Exception as e:
	logger.error(f"Erro ao processar vídeo: {str(e)}")
	return video_path, {}

	def extract_frames(self, video_path: str, fps: int = 2, resolution: int = 480) -> list:
	"""Extrai frames de um vídeo utilizando ffmpeg."""
	frames = []
	temp_dir = Path(tempfile.mkdtemp())
	try:
	threads = min(os.cpu_count(), 4) # Menor número de threads para CPU
	cmd = [
	'ffmpeg', '-i', video_path,
	'-threads', str(threads),
	'-vf', (f'fps={fps},'
	f'scale={resolution}:{resolution}:force_original_aspect_ratio=decrease:flags=lanczos,'
	f'pad={resolution}:{resolution}:(ow-iw)/2:(oh-ih)/2'),
	'-frame_pts', '1',
	f'{temp_dir}/%d.jpg'
	]
	subprocess.run(cmd, check=True, capture_output=True)
	frame_files = sorted(temp_dir.glob('*.jpg'), key=lambda x: int(x.stem))
	chunk_size = 50 # Menor chunk size para CPU
	with ThreadPoolExecutor(max_workers=threads) as executor:
	for i in range(0, len(frame_files), chunk_size):
	chunk = frame_files[i:i + chunk_size]
	chunk_frames = list(tqdm(
	executor.map(lambda f: cv2.imread(str(f)), chunk),
	desc=f"Carregando frames {i+1}-{min(i+chunk_size, len(frame_files))}",
	total=len(chunk)
	))
	frames.extend(chunk_frames)
	if i % (chunk_size * 5) == 0:
	gc.collect()
	finally:
	shutil.rmtree(temp_dir)
	return frames

	def clear_cache(self):
	"""Limpa cache e libera memória."""
	self.result_cache.clear()
	gc.collect()