Spaces:

habulaj
/

subapi

Running

App Files Files Community

habulaj commited on Apr 5

Commit

417fcb1

verified ·

1 Parent(s): f38dc84

Update detect_crop_video.py

Browse files

Files changed (1) hide show

detect_crop_video.py +181 -41

detect_crop_video.py CHANGED Viewed

@@ -237,6 +237,106 @@ def check_nvenc_support():
         return False
 def detect_and_crop_video(video_path, output_video_path, text_cut=True):
     """
     Detecta a região com movimento no vídeo e gera um vídeo cropado.
@@ -252,68 +352,108 @@ def detect_and_crop_video(video_path, output_video_path, text_cut=True):
         return False
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     # Sample frames to detect motion
     num_samples = 15
     indices = np.linspace(0, total_frames - 1, num_samples, dtype=int)
-    frames = []
     for i in indices:
         cap.set(cv2.CAP_PROP_POS_FRAMES, i)
         ret, frame = cap.read()
         if ret:
             gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-            frames.append(gray)
     cap.release()
-    if len(frames) < 2:
-        print(f"❌ Erro: Não foi possível ler frames suficientes ({len(frames)}/{num_samples}) para análise de movimento.")
         return False
-    print(f"🔍 Analisando movimento em {len(frames)} frames amostrados...")
-    # Calculate accumulated difference
-    h, w = frames[0].shape
-    accum_diff = np.zeros((h, w), dtype=np.float32)
-    for i in range(len(frames) - 1):
-        diff = cv2.absdiff(frames[i], frames[i+1])
-        accum_diff = cv2.add(accum_diff, diff.astype(np.float32))
-    accum_diff = cv2.normalize(accum_diff, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
-    _, thresh = cv2.threshold(accum_diff, 20, 255, cv2.THRESH_BINARY)
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))
-    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
-    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
-    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    if not contours:
-        print("❌ Aviso: Nenhum movimento detectado nos frames selecionados.")
-        return False
-    print(f"📊 Encontrados {len(contours)} contornos de movimento iniciais.")
-    x_min, y_min = w, h
-    x_max, y_max = 0, 0
-    found_any = False
-    for c in contours:
-        if cv2.contourArea(c) > 500:
-            found_any = True
-            x, y, cw, ch = cv2.boundingRect(c)
-            x_min = min(x_min, x)
-            y_min = min(y_min, y)
-            x_max = max(x_max, x + cw)
-            y_max = max(y_max, y + ch)
-    if not found_any:
-        print("❌ Aviso: Nenhum movimento significativo (>500px area) detectado.")
-        return False
-    print(f"✅ Movimento consolidado na região: {x_min},{y_min} até {x_max},{y_max}")
     # Inset Logic (2px)
     inset = 2

         return False
+def get_crop_detect_coords(video_path, limit=24, skip=5, duration=5):
+    """
+    Uses ffmpeg cropdetect filter to find the content area (removing black bars).
+    Returns (w, h, x, y) or None if detection fails.
+    """
+    try:
+        # Pula os primeiros segundos (skip) para evitar intros pretas,
+        # analisa por 'duration' segundos.
+        cmd = [
+            "ffmpeg", "-ss", str(skip), "-i", video_path,
+            "-t", str(duration), "-vf", f"cropdetect={limit}:16:0",
+            "-f", "null", "-"
+        ]
+        print(f"🎬 Executando ffmpeg cropdetect...")
+        result = subprocess.run(cmd, capture_output=True, text=True, check=False)
+        # O output do cropdetect sai no stderr
+        output = result.stderr
+        # Procurar pela última linha com 'crop='
+        import re
+        matches = re.findall(r"crop=(\d+):(\d+):(\d+):(\d+)", output)
+        if matches:
+            # Pegar a última ocorrência para garantir que a detecção estabilizou
+            w, h, x, y = map(int, matches[-1])
+            return w, h, x, y
+        return None
+    except Exception as e:
+        print(f"⚠️ Erro ao executar cropdetect: {e}")
+        return None
+def get_content_density_crop(frames, color_var_threshold=8, complexity_threshold=10, min_density=0.15):
+    """
+    Analyzes row-by-row color variance and complexity to find the 'congruent line of colors'.
+    Isolates colorful video frames from monochromatic text overlays.
+    Returns (y_min, y_max).
+    """
+    if not frames:
+        return None
+    num_frames = len(frames)
+    h, w = frames[0].shape[:2]
+    all_y_min = []
+    all_y_max = []
+    for frame in frames:
+        if len(frame.shape) != 3:
+            continue
+        # 1. Color Variance Check (Crucial for 'Várias cores de forma congruente')
+        # In monochrome text (white/black/gray), R, G, B are identical or very close.
+        # Across a real video frame, colors vary significantly along the row.
+        b, g, r = cv2.split(frame.astype(np.int16))
+        rg = r - g
+        gb = g - b
+        br = b - r
+        # Variância de cor na linha
+        color_variance = np.std(rg, axis=1) + np.std(gb, axis=1) + np.std(br, axis=1)
+        # 2. Complexity Density (Variation across the row)
+        # Identifica linhas que são complexas (movimento/textura) em vez de texto isolado
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY).astype(np.int16)
+        diff = np.abs(gray[:, 1:] - gray[:, :-1])
+        row_complexity = np.sum(diff > 15, axis=1) / w
+        # Unimos critérios: Deve ter variância de cor OU ser muito complexo
+        # (Para suportar vídeos P&B, mantemos uma margem de complexidade alta)
+        is_content = (color_variance > color_var_threshold) | (row_complexity > 0.40)
+        # Linhas que superam os critérios de conteúdo congruente
+        content_rows = np.where(is_content)[0]
+        if len(content_rows) > 0:
+            # Encontrar o maior bloco contínuo (pula texto isolado)
+            diffs = np.diff(content_rows)
+            # O split ocorre onde a diferença não é 1 (quebra na continuidade)
+            splits = np.where(diffs != 1)[0] + 1
+            blocks = np.split(content_rows, splits)
+            # Escolher o maior bloco contínuo em termos de número de linhas
+            main_block = max(blocks, key=len)
+            all_y_min.append(main_block[0])
+            all_y_max.append(main_block[-1])
+    if not all_y_min or not all_y_max:
+        return None
+    # Usamos o percentil 50 (mediana) para as fronteiras para estabilidade
+    y_min = int(np.percentile(all_y_min, 50))
+    y_max = int(np.percentile(all_y_max, 50))
+    # Adicionamos uma margem de segurança de 2px para não cortar o frame real
+    y_min = max(0, y_min - 2)
+    y_max = min(h, y_max + 2)
+    return y_min, y_max
 def detect_and_crop_video(video_path, output_video_path, text_cut=True):
     """
     Detecta a região com movimento no vídeo e gera um vídeo cropado.
         return False
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     # Sample frames to detect motion
     num_samples = 15
     indices = np.linspace(0, total_frames - 1, num_samples, dtype=int)
+    frames_gray = []
+    frames_bgr = []
     for i in indices:
         cap.set(cv2.CAP_PROP_POS_FRAMES, i)
         ret, frame = cap.read()
         if ret:
+            frames_bgr.append(frame)
             gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            frames_gray.append(gray)
     cap.release()
+    if len(frames_gray) < 2:
+        print(f"❌ Erro: Não foi possível ler frames suficientes ({len(frames_gray)}/{num_samples}) para análise.")
         return False
+    # ---------------------------------------------------------
+    # Passo 1: Tentar detectar bordas via FFmpeg cropdetect
+    # ---------------------------------------------------------
+    crop_coords = get_crop_detect_coords(video_path)
+    use_motion_fallback = True
+    if crop_coords:
+        cw, ch, cx, cy = crop_coords
+        original_area = w * h
+        crop_area = cw * ch
+        reduction = (1 - crop_area / original_area) * 100
+        # Se houve uma redução significativa (>10%), confiamos no cropdetect
+        if reduction > 10:
+            print(f"✅ Cropdetect sugeriu: {cw}x{ch} @ ({cx},{cy}) | Redução: {reduction:.1f}%")
+            x_min, y_min, x_max, y_max = cx, cy, cx + cw, cy + ch
+            use_motion_fallback = False
+        else:
+            print(f"⏩ Cropdetect sugeriu redução irrelevante ({reduction:.1f}%). Usando motion fallback...")
+    # ---------------------------------------------------------
+    # Passo 2: Fallback para detecção de movimento (OpenCV)
+    # ---------------------------------------------------------
+    if use_motion_fallback:
+        print(f"🔍 Analisando movimento em {len(frames_gray)} frames amostrados...")
+        # Calculate accumulated difference
+        accum_diff = np.zeros((h, w), dtype=np.float32)
+        for i in range(len(frames_gray) - 1):
+            diff = cv2.absdiff(frames_gray[i], frames_gray[i+1])
+            accum_diff = cv2.add(accum_diff, diff.astype(np.float32))
+        accum_diff = cv2.normalize(accum_diff, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
+        _, thresh = cv2.threshold(accum_diff, 20, 255, cv2.THRESH_BINARY)
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))
+        thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
+        thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
+        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        if not contours:
+            print("❌ Aviso: Nenhum movimento detectado nos frames selecionados.")
+            return False
+        print(f"📊 Encontrados {len(contours)} contornos de movimento iniciais.")
+        x_min, y_min = w, h
+        x_max, y_max = 0, 0
+        found_any = False
+        for c in contours:
+            if cv2.contourArea(c) > 500:
+                found_any = True
+                x, y, cw, ch = cv2.boundingRect(c)
+                x_min = min(x_min, x)
+                y_min = min(y_min, y)
+                x_max = max(x_max, x + cw)
+                y_max = max(y_max, y + ch)
+        if not found_any:
+            print("❌ Aviso: Nenhum movimento significativo (>500px area) detectado.")
+            return False
+        print(f"✅ Movimento consolidado na região: {x_min},{y_min} até {x_max},{y_max}")
+    # ---------------------------------------------------------
+    # Passo 3: Refinamento por Densidade de Conteúdo (Garante linha divisória congruente)
+    # ---------------------------------------------------------
+    density_coords = get_content_density_crop(frames_bgr)
+    if density_coords:
+        dy_min, dy_max = density_coords
+        print(f"🎨 Refinamento de densidade sugeriu: Y de {dy_min} até {dy_max}")
+        # Aplicamos o refinamento se ele for mais restritivo (interno) ou se o movimento falhou
+        # Para evitar cortar o vídeo original por erro, conferimos se a área é razoável
+        y_min = max(y_min, dy_min)
+        y_max = min(y_max, dy_max)
+        print(f"✨ Região refinada final: Y de {y_min} até {y_max}")
     # Inset Logic (2px)
     inset = 2