travahacker
commited on
Commit
·
0c4fdca
1
Parent(s):
6948ba2
Add IA-local-auto-cut: cortes automáticos de vídeo com Whisper + Ollama
Browse files- .gitignore +56 -0
- LICENSE +12 -0
- README.md +118 -0
- generate_post_texts_from_cuts.py +174 -0
- interview_cuts.py +350 -0
- persona.example.json +7 -0
- requirements.txt +12 -0
- video_cuts_offline_mac_plus_subs.py +668 -0
.gitignore
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dados sensíveis e gerados
|
| 2 |
+
*.mp4
|
| 3 |
+
*.mov
|
| 4 |
+
*.avi
|
| 5 |
+
*.mkv
|
| 6 |
+
*.webm
|
| 7 |
+
*.wav
|
| 8 |
+
*.mp3
|
| 9 |
+
*_transcript.json
|
| 10 |
+
*_cuts.json
|
| 11 |
+
*_interview_cuts.json
|
| 12 |
+
*_cuts.sh
|
| 13 |
+
*_posts.txt
|
| 14 |
+
*_audio.wav
|
| 15 |
+
*_preview_parts/
|
| 16 |
+
export_parts/
|
| 17 |
+
PREVIEW_*.mp4
|
| 18 |
+
|
| 19 |
+
# Persona (dados pessoais - use persona.example.json como template)
|
| 20 |
+
persona.json
|
| 21 |
+
|
| 22 |
+
# Ambiente e secrets
|
| 23 |
+
.env
|
| 24 |
+
.env.*
|
| 25 |
+
*.pem
|
| 26 |
+
*.key
|
| 27 |
+
secrets/
|
| 28 |
+
credentials/
|
| 29 |
+
|
| 30 |
+
# Python
|
| 31 |
+
__pycache__/
|
| 32 |
+
*.py[cod]
|
| 33 |
+
*$py.class
|
| 34 |
+
*.so
|
| 35 |
+
.Python
|
| 36 |
+
.venv/
|
| 37 |
+
venv/
|
| 38 |
+
ENV/
|
| 39 |
+
.eggs/
|
| 40 |
+
*.egg-info/
|
| 41 |
+
*.egg
|
| 42 |
+
|
| 43 |
+
# IDE e OS
|
| 44 |
+
.DS_Store
|
| 45 |
+
.idea/
|
| 46 |
+
.vscode/
|
| 47 |
+
*.code-workspace
|
| 48 |
+
*.swp
|
| 49 |
+
*.swo
|
| 50 |
+
*~
|
| 51 |
+
|
| 52 |
+
# Logs e temporários
|
| 53 |
+
*.log
|
| 54 |
+
tmp/
|
| 55 |
+
temp/
|
| 56 |
+
.cache/
|
LICENSE
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Creative Commons Attribution 4.0 International (CC BY 4.0)
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 travahacker
|
| 4 |
+
|
| 5 |
+
You are free to:
|
| 6 |
+
Share — copy and redistribute the material in any medium or format
|
| 7 |
+
Adapt — remix, transform, and build upon the material for any purpose, even commercially
|
| 8 |
+
|
| 9 |
+
Under the following terms:
|
| 10 |
+
Attribution — You must give appropriate credit, provide a link to the license, and indicate if changes were made.
|
| 11 |
+
|
| 12 |
+
Full legal code: https://creativecommons.org/licenses/by/4.0/legalcode
|
README.md
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: cc-by-4.0
|
| 3 |
+
language:
|
| 4 |
+
- pt
|
| 5 |
+
tags:
|
| 6 |
+
- video
|
| 7 |
+
- transcription
|
| 8 |
+
- whisper
|
| 9 |
+
- ollama
|
| 10 |
+
- ffmpeg
|
| 11 |
+
- auto-cut
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# IA-local-transcript-autocut
|
| 15 |
+
|
| 16 |
+
Ferramenta para **cortes automáticos de vídeo** usando transcrição local (Whisper) e IA local (Ollama). Tudo roda na sua máquina, sem enviar dados para a nuvem.
|
| 17 |
+
|
| 18 |
+
> Cortes automáticos de vídeo com Whisper + Ollama. Transcreve, propõe cortes com IA local e exporta em MP4.
|
| 19 |
+
|
| 20 |
+
**Repositório:** [github.com/travahacker/IA-local-auto-cut](https://github.com/travahacker/IA-local-auto-cut)
|
| 21 |
+
|
| 22 |
+
## Fluxo
|
| 23 |
+
|
| 24 |
+
1. **Transcreve** o áudio do vídeo com [faster-whisper](https://github.com/SYSTRAN/faster-whisper)
|
| 25 |
+
2. **Propõe cortes** via Ollama (IA local) ou heurísticas (modo React, entrevistas)
|
| 26 |
+
3. **Gera scripts ffmpeg** para exportar os cortes em MP4
|
| 27 |
+
|
| 28 |
+
## Pré-requisitos
|
| 29 |
+
|
| 30 |
+
- **Python 3.9+**
|
| 31 |
+
- **ffmpeg** (instalado no sistema)
|
| 32 |
+
- **Ollama** (opcional, para propostas de cortes com IA — [ollama.ai](https://ollama.ai))
|
| 33 |
+
|
| 34 |
+
## Instalação
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
git clone https://huggingface.co/Veronyka/IA-local-transcript-autocut
|
| 38 |
+
cd IA-local-transcript-autocut
|
| 39 |
+
pip install -r requirements.txt
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
## Uso
|
| 43 |
+
|
| 44 |
+
### 1. Transcrição + cortes com IA (Ollama)
|
| 45 |
+
|
| 46 |
+
```bash
|
| 47 |
+
# Transcreve, propõe cortes com Ollama e gera script de export
|
| 48 |
+
python video_cuts_offline_mac_plus_subs.py seu_video.mp4 --preview
|
| 49 |
+
|
| 50 |
+
# Só transcrever (salva transcript.json)
|
| 51 |
+
python video_cuts_offline_mac_plus_subs.py seu_video.mp4 --only-transcribe
|
| 52 |
+
|
| 53 |
+
# Reusar transcrição existente
|
| 54 |
+
python video_cuts_offline_mac_plus_subs.py seu_video.mp4 --only-propose --reuse-transcript
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
### 2. Modo React (comentários em PT com lead-in em EN)
|
| 58 |
+
|
| 59 |
+
Para vídeos de reação onde você comenta em português sobre conteúdo em inglês:
|
| 60 |
+
|
| 61 |
+
```bash
|
| 62 |
+
python video_cuts_offline_mac_plus_subs.py video.mp4 --react-mode --preview
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
### 3. Cortes para entrevistas (pergunta + resposta)
|
| 66 |
+
|
| 67 |
+
Gera cortes de perguntas curtas seguidas de respostas longas:
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
# Primeiro: transcrever
|
| 71 |
+
python video_cuts_offline_mac_plus_subs.py entrevista.mp4 --only-transcribe
|
| 72 |
+
|
| 73 |
+
# Depois: gerar cortes de entrevista
|
| 74 |
+
python interview_cuts.py entrevista.mp4 --min 60 --max 150 --preview
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
### 4. Títulos e descrições para redes sociais
|
| 78 |
+
|
| 79 |
+
```bash
|
| 80 |
+
python generate_post_texts_from_cuts.py base_do_video
|
| 81 |
+
|
| 82 |
+
# Com IA local (Ollama) para copy mais criativo
|
| 83 |
+
python generate_post_texts_from_cuts.py base_do_video --ollama-model llama3.1:8b
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
## Persona (opcional)
|
| 87 |
+
|
| 88 |
+
Para alinhar os cortes com seu perfil de criador(a), crie um `persona.json` a partir do exemplo:
|
| 89 |
+
|
| 90 |
+
```bash
|
| 91 |
+
cp persona.example.json persona.json
|
| 92 |
+
# Edite persona.json com sua bio, pilares, tom, etc.
|
| 93 |
+
```
|
| 94 |
+
|
| 95 |
+
Use com `--persona persona.json` no script principal.
|
| 96 |
+
|
| 97 |
+
## Opções principais
|
| 98 |
+
|
| 99 |
+
| Flag | Descrição |
|
| 100 |
+
|------|-----------|
|
| 101 |
+
| `--lang pt` | Forçar idioma da transcrição |
|
| 102 |
+
| `--whisper-model small` | Modelo Whisper (tiny, base, small, medium, large) |
|
| 103 |
+
| `--model llama2` | Modelo Ollama para propor cortes |
|
| 104 |
+
| `--max-stories 8` | Número máximo de cortes |
|
| 105 |
+
| `--max-length 60` | Duração máxima por corte (segundos) |
|
| 106 |
+
| `--preview` | Gera vídeo de prévia com todos os cortes |
|
| 107 |
+
| `--persona arquivo.json` | Arquivo com contexto de persona |
|
| 108 |
+
|
| 109 |
+
## Arquivos gerados
|
| 110 |
+
|
| 111 |
+
- `*_transcript.json` — Transcrição com timestamps
|
| 112 |
+
- `*_cuts.json` — Metadados dos cortes propostos
|
| 113 |
+
- `*_cuts.sh` — Script bash para exportar os MP4s
|
| 114 |
+
- `export_parts/` — Pasta com os cortes em MP4
|
| 115 |
+
|
| 116 |
+
## Licença
|
| 117 |
+
|
| 118 |
+
[CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) — Creative Commons Attribution 4.0 International. Uso livre com atribuição.
|
generate_post_texts_from_cuts.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Gera títulos e descrições para redes sociais a partir dos cortes (transcript + cuts).
|
| 4 |
+
|
| 5 |
+
Uso:
|
| 6 |
+
python generate_post_texts_from_cuts.py <base> [--persona "sua persona"] [--hashtags #tag1 #tag2]
|
| 7 |
+
python generate_post_texts_from_cuts.py <base> --ollama-model llama3.1:8b # usa IA local
|
| 8 |
+
"""
|
| 9 |
+
import argparse, json, os, re, requests
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import List, Dict, Any
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def load_json(path):
|
| 15 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 16 |
+
return json.load(f)
|
| 17 |
+
|
| 18 |
+
def cap(s: str, n: int) -> str:
|
| 19 |
+
s = s.strip()
|
| 20 |
+
return (s[:n-1] + "…") if len(s) > n else s
|
| 21 |
+
|
| 22 |
+
def normalize_whitespace(s: str) -> str:
|
| 23 |
+
return re.sub(r"\s+", " ", s).strip()
|
| 24 |
+
|
| 25 |
+
def overlap(a1, a2, b1, b2):
|
| 26 |
+
return max(0.0, min(a2, b2) - max(a1, b1))
|
| 27 |
+
|
| 28 |
+
def collect_text_for_segments(transcript: List[Dict[str, Any]], segments: List[Dict[str, float]]) -> str:
|
| 29 |
+
buf = []
|
| 30 |
+
for seg in segments:
|
| 31 |
+
s, e = float(seg["start"]), float(seg["end"])
|
| 32 |
+
for t in transcript:
|
| 33 |
+
ts, te = float(t["start"]), float(t["end"])
|
| 34 |
+
if overlap(s, e, ts, te) > 0.01:
|
| 35 |
+
buf.append(t.get("text","").strip())
|
| 36 |
+
txt = " ".join(x for x in buf if x)
|
| 37 |
+
return normalize_whitespace(txt)
|
| 38 |
+
|
| 39 |
+
def first_sentence(s: str, max_len=140) -> str:
|
| 40 |
+
s = normalize_whitespace(s)
|
| 41 |
+
m = re.split(r"(?<=[\.\!\?])\s+", s)
|
| 42 |
+
cand = (m[0] if m else s) or s
|
| 43 |
+
return cap(cand, max_len)
|
| 44 |
+
|
| 45 |
+
def build_titles_and_descs(text: str, persona: str, hashtags: List[str],
|
| 46 |
+
yt_len=70, ig_len=140, tt_len=120,
|
| 47 |
+
max_ig_tags=5, max_tt_tags=8) -> Dict[str,str]:
|
| 48 |
+
txt = text or ""
|
| 49 |
+
title = cap(first_sentence(txt, yt_len), yt_len)
|
| 50 |
+
core_ig = first_sentence(txt, ig_len)
|
| 51 |
+
ig = f"{core_ig}\nAssiste até o fim e comenta 👇"
|
| 52 |
+
tags_ig = " ".join(hashtags[:max_ig_tags]) if hashtags else ""
|
| 53 |
+
if tags_ig:
|
| 54 |
+
ig = f"{ig}\n{tags_ig}"
|
| 55 |
+
core_tt = first_sentence(txt, tt_len)
|
| 56 |
+
tt = f"{core_tt}\nCurte e segue p/ mais 🔔"
|
| 57 |
+
tags_tt = " ".join(hashtags[:max_tt_tags]) if hashtags else ""
|
| 58 |
+
if tags_tt:
|
| 59 |
+
tt = f"{tt}\n{tags_tt}"
|
| 60 |
+
return {"yt_title": title, "ig_desc": ig.strip(), "tt_desc": tt.strip()}
|
| 61 |
+
|
| 62 |
+
def call_ollama(model: str, prompt: str, url: str) -> str:
|
| 63 |
+
payload = {
|
| 64 |
+
"model": model,
|
| 65 |
+
"prompt": prompt,
|
| 66 |
+
"temperature": 0.4,
|
| 67 |
+
"stream": False,
|
| 68 |
+
"format": "json",
|
| 69 |
+
"options": {"num_ctx": 8192, "num_predict": 384}
|
| 70 |
+
}
|
| 71 |
+
r = requests.post(url.rstrip("/") + "/api/generate", json=payload, timeout=120)
|
| 72 |
+
r.raise_for_status()
|
| 73 |
+
return r.json().get("response", "")
|
| 74 |
+
|
| 75 |
+
def _coerce_json(raw: str) -> Dict[str, str]:
|
| 76 |
+
txt = (raw or "").strip()
|
| 77 |
+
try:
|
| 78 |
+
return json.loads(txt)
|
| 79 |
+
except Exception:
|
| 80 |
+
pass
|
| 81 |
+
m = re.search(r"\{[\s\S]*\}", txt)
|
| 82 |
+
if not m:
|
| 83 |
+
raise ValueError("no-json-object")
|
| 84 |
+
jtxt = m.group(0)
|
| 85 |
+
jtxt = jtxt.replace("\u201c", '"').replace("\u201d", '"').replace("\u2018", "'").replace("\u2019", "'")
|
| 86 |
+
jtxt = re.sub(r",\s*(\}|\])", r"\1", jtxt)
|
| 87 |
+
if '"' not in jtxt and "'" in jtxt:
|
| 88 |
+
jtxt = jtxt.replace("'", '"')
|
| 89 |
+
return json.loads(jtxt)
|
| 90 |
+
|
| 91 |
+
def with_ollama(text: str, persona: str, hashtags: List[str], model: str, server_url: str) -> Dict[str,str]:
|
| 92 |
+
prompt = f'''
|
| 93 |
+
Responda ESTRITAMENTE em JSON válido (sem texto extra, sem markdown, sem explicações).
|
| 94 |
+
Gere campos:
|
| 95 |
+
- yt_title: string (<= 70 chars, chamativo, sem hashtags)
|
| 96 |
+
- ig_desc: string (≈120–150 chars, termina com linha de hashtags IG)
|
| 97 |
+
- tt_desc: string (≈100–140 chars, termina com linha de hashtags TikTok)
|
| 98 |
+
|
| 99 |
+
PERSONA: {persona or '-'}
|
| 100 |
+
HASHTAGS_IG: {' '.join(hashtags[:5])}
|
| 101 |
+
HASHTAGS_TT: {' '.join(hashtags[:8])}
|
| 102 |
+
|
| 103 |
+
TEXTO_DO_CORTE (transcrição bruta, use para inspirar o copy):
|
| 104 |
+
"""{text.strip()[:2000]}"""
|
| 105 |
+
|
| 106 |
+
Retorne APENAS um objeto JSON com exatamente estas chaves:
|
| 107 |
+
{{
|
| 108 |
+
"yt_title": "...",
|
| 109 |
+
"ig_desc": "...\n{' '.join(hashtags[:5])}",
|
| 110 |
+
"tt_desc": "...\n{' '.join(hashtags[:8])}"
|
| 111 |
+
}}
|
| 112 |
+
'''
|
| 113 |
+
try:
|
| 114 |
+
raw = call_ollama(model, prompt, server_url)
|
| 115 |
+
data = _coerce_json(raw)
|
| 116 |
+
data["yt_title"] = cap(data.get("yt_title",""), 70)
|
| 117 |
+
data["ig_desc"] = cap(data.get("ig_desc",""), 300)
|
| 118 |
+
data["tt_desc"] = cap(data.get("tt_desc",""), 220)
|
| 119 |
+
return data
|
| 120 |
+
except Exception as e:
|
| 121 |
+
print(f"[warn] Ollama retornou JSON inválido: {e}. Usando heurística.")
|
| 122 |
+
return build_titles_and_descs(text, persona, hashtags)
|
| 123 |
+
|
| 124 |
+
def main():
|
| 125 |
+
ap = argparse.ArgumentParser("Gera títulos/descrições para redes a partir dos cortes.")
|
| 126 |
+
ap.add_argument("base", help="Base do arquivo (ex.: 'meu_video' sem sufixos)")
|
| 127 |
+
ap.add_argument("--persona", default="criador(a) de conteúdo",
|
| 128 |
+
help="Breve dica de persona para compor textos")
|
| 129 |
+
ap.add_argument("--hashtags", nargs="*", default=["#criacaodeconteudo","#video","#shorts"],
|
| 130 |
+
help="Hashtags prioritárias")
|
| 131 |
+
ap.add_argument("--ollama-model", default="", help="Modelo Ollama para copy (ex.: llama3.1:8b)")
|
| 132 |
+
ap.add_argument("--ollama-url", default="http://localhost:11434", help="URL do Ollama")
|
| 133 |
+
ap.add_argument("--out", default="", help="Arquivo de saída (default: <base>_posts.txt)")
|
| 134 |
+
args = ap.parse_args()
|
| 135 |
+
|
| 136 |
+
base = args.base
|
| 137 |
+
cuts_path = f"{base}_cuts.json"
|
| 138 |
+
transcript_path = f"{base}_transcript.json"
|
| 139 |
+
if not os.path.exists(cuts_path) or not os.path.exists(transcript_path):
|
| 140 |
+
print(f"ERRO: não achei '{cuts_path}' ou '{transcript_path}'. Rode na pasta correta.")
|
| 141 |
+
raise SystemExit(1)
|
| 142 |
+
|
| 143 |
+
cuts = load_json(cuts_path)
|
| 144 |
+
transcript = load_json(transcript_path)
|
| 145 |
+
|
| 146 |
+
out_path = args.out or f"{base}_posts.txt"
|
| 147 |
+
lines = []
|
| 148 |
+
for i, c in enumerate(cuts, 1):
|
| 149 |
+
segs = c.get("segments") or []
|
| 150 |
+
if not segs and "start" in c and "end" in c:
|
| 151 |
+
segs = [{"start": c["start"], "end": c["end"]}]
|
| 152 |
+
text = collect_text_for_segments(transcript, segs)
|
| 153 |
+
|
| 154 |
+
if args.ollama_model:
|
| 155 |
+
results = with_ollama(text, args.persona, args.hashtags, args.ollama_model, args.ollama_url)
|
| 156 |
+
else:
|
| 157 |
+
results = build_titles_and_descs(text, args.persona, args.hashtags)
|
| 158 |
+
|
| 159 |
+
lines.append(f"Corte {i}")
|
| 160 |
+
lines.append("YouTube Shorts — Título:")
|
| 161 |
+
lines.append("👉 " + results["yt_title"])
|
| 162 |
+
lines.append("")
|
| 163 |
+
lines.append("Instagram Reels — Descrição:")
|
| 164 |
+
lines.append(results["ig_desc"])
|
| 165 |
+
lines.append("")
|
| 166 |
+
lines.append("TikTok — Descrição:")
|
| 167 |
+
lines.append(results["tt_desc"])
|
| 168 |
+
lines.append("\n" + "-"*60 + "\n")
|
| 169 |
+
|
| 170 |
+
Path(out_path).write_text("\n".join(lines).rstrip()+"\n", encoding="utf-8")
|
| 171 |
+
print(f"✅ Gerado: {out_path}")
|
| 172 |
+
|
| 173 |
+
if __name__ == "__main__":
|
| 174 |
+
main()
|
interview_cuts.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
interview_cuts.py — Gera cortes para entrevistas em PT (pergunta curta + resposta longa).
|
| 4 |
+
|
| 5 |
+
Uso típico:
|
| 6 |
+
python interview_cuts.py video.mp4 --min 60 --max 150 --qmax 12 --gap 2.0 --lead-in-question yes --max-cuts 20 --preview
|
| 7 |
+
|
| 8 |
+
Pré-requisitos:
|
| 9 |
+
- Ter o arquivo <base>_transcript.json na mesma pasta do vídeo (gerado pelo video_cuts_offline_mac_plus_subs.py).
|
| 10 |
+
|
| 11 |
+
Saídas:
|
| 12 |
+
- <base>_interview_cuts.json
|
| 13 |
+
- <base>_interview_cuts.sh
|
| 14 |
+
- PREVIEW_<base>_interview.mp4 (opcional)
|
| 15 |
+
"""
|
| 16 |
+
import argparse, json, os, re, shlex, subprocess, math
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import List, Dict, Any
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
import numpy as np
|
| 22 |
+
except Exception:
|
| 23 |
+
np = None
|
| 24 |
+
try:
|
| 25 |
+
from resemblyzer import VoiceEncoder, preprocess_wav
|
| 26 |
+
_HAVE_RESEMBLYZER = True
|
| 27 |
+
except Exception:
|
| 28 |
+
VoiceEncoder = None
|
| 29 |
+
preprocess_wav = None
|
| 30 |
+
_HAVE_RESEMBLYZER = False
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def load_json(p: Path):
|
| 34 |
+
with p.open("r", encoding="utf-8") as f:
|
| 35 |
+
return json.load(f)
|
| 36 |
+
|
| 37 |
+
def save_json(obj, p: Path):
|
| 38 |
+
with p.open("w", encoding="utf-8") as f:
|
| 39 |
+
json.dump(obj, f, ensure_ascii=False, indent=2)
|
| 40 |
+
|
| 41 |
+
def normspace(s: str) -> str:
|
| 42 |
+
return re.sub(r"\s+", " ", (s or "").strip())
|
| 43 |
+
|
| 44 |
+
def first_sentence(s: str, limit=120) -> str:
|
| 45 |
+
s = normspace(s)
|
| 46 |
+
parts = re.split(r"(?<=[.!?])\s+", s)
|
| 47 |
+
out = parts[0] if parts and parts[0] else s
|
| 48 |
+
return out[:limit].rstrip()
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def ensure_wav_16k_mono(video_path: Path) -> Path:
|
| 52 |
+
"""Export a temporary 16k mono wav next to the video if not present."""
|
| 53 |
+
wav_path = video_path.with_suffix(".16k.wav")
|
| 54 |
+
if wav_path.exists():
|
| 55 |
+
return wav_path
|
| 56 |
+
cmd = [
|
| 57 |
+
"ffmpeg", "-y",
|
| 58 |
+
"-i", str(video_path),
|
| 59 |
+
"-ac", "1", "-ar", "16000",
|
| 60 |
+
str(wav_path)
|
| 61 |
+
]
|
| 62 |
+
subprocess.run(cmd, check=True)
|
| 63 |
+
return wav_path
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def diarize_with_resemblyzer(wav_path: Path, n_speakers: int = 2, debug: bool = False):
|
| 67 |
+
"""Lightweight diarization using Resemblyzer."""
|
| 68 |
+
if not _HAVE_RESEMBLYZER or np is None:
|
| 69 |
+
raise RuntimeError("pip install resemblyzer numpy scikit-learn soundfile")
|
| 70 |
+
try:
|
| 71 |
+
from sklearn.cluster import AgglomerativeClustering
|
| 72 |
+
except Exception:
|
| 73 |
+
raise RuntimeError("pip install scikit-learn")
|
| 74 |
+
|
| 75 |
+
wav = preprocess_wav(str(wav_path))
|
| 76 |
+
enc = VoiceEncoder()
|
| 77 |
+
_, partial_embeds, partial_slices = enc.embed_utterance(wav, return_partials=True)
|
| 78 |
+
sr = 16000.0
|
| 79 |
+
duration = float(len(wav)) / sr if len(wav) > 0 else 0.0
|
| 80 |
+
if len(partial_embeds) == 0 or duration <= 0.0:
|
| 81 |
+
return []
|
| 82 |
+
half = 0.8
|
| 83 |
+
n_parts = len(partial_embeds)
|
| 84 |
+
partial_times = np.array([duration/2.0], dtype=float) if duration <= 2*half else np.linspace(half, duration - half, n_parts)
|
| 85 |
+
n_samples = len(partial_embeds)
|
| 86 |
+
if n_samples < 2:
|
| 87 |
+
return []
|
| 88 |
+
X = np.vstack(partial_embeds)
|
| 89 |
+
n_speakers = max(2, int(n_speakers))
|
| 90 |
+
n_clusters = max(2, min(n_speakers, X.shape[0]))
|
| 91 |
+
labels = AgglomerativeClustering(n_clusters=n_clusters).fit_predict(X)
|
| 92 |
+
segs = []
|
| 93 |
+
cur_spk = int(labels[0])
|
| 94 |
+
cur_start = max(0.0, float(partial_times[0] - half))
|
| 95 |
+
cur_end = float(partial_times[0] + half)
|
| 96 |
+
for i in range(1, len(labels)):
|
| 97 |
+
spk = int(labels[i])
|
| 98 |
+
st = float(partial_times[i] - half)
|
| 99 |
+
en = float(partial_times[i] + half)
|
| 100 |
+
if spk == cur_spk and st <= cur_end + 0.1:
|
| 101 |
+
cur_end = max(cur_end, en)
|
| 102 |
+
else:
|
| 103 |
+
segs.append({"start": round(max(0.0, cur_start), 3), "end": round(max(cur_end, cur_start+0.1), 3), "spk": cur_spk})
|
| 104 |
+
cur_spk = spk
|
| 105 |
+
cur_start = st
|
| 106 |
+
cur_end = en
|
| 107 |
+
segs.append({"start": round(max(0.0, cur_start), 3), "end": round(max(cur_end, cur_start+0.1), 3), "spk": cur_spk})
|
| 108 |
+
return segs
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def assign_speakers_to_transcript(transcript: List[Dict[str, Any]], diar_segs: List[Dict[str, Any]]):
|
| 112 |
+
def spk_at(t: float):
|
| 113 |
+
for s in diar_segs:
|
| 114 |
+
if s["start"] - 0.1 <= t <= s["end"] + 0.1:
|
| 115 |
+
return s["spk"]
|
| 116 |
+
if diar_segs:
|
| 117 |
+
bydist = min(diar_segs, key=lambda s: abs((s["start"] + s["end"]) / 2 - t))
|
| 118 |
+
return bydist["spk"]
|
| 119 |
+
return -1
|
| 120 |
+
return [spk_at((float(seg.get("start",0)) + float(seg.get("end",0))) / 2.0) for seg in transcript]
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def detect_questions(transcript: List[Dict[str, Any]], qmax: float, wc_max: int, qmark_required: bool, debug: bool=False) -> List[int]:
|
| 124 |
+
idxs = []
|
| 125 |
+
for i, seg in enumerate(transcript):
|
| 126 |
+
st = float(seg.get("start", 0)); en = float(seg.get("end", 0)); d = max(0.0, en - st)
|
| 127 |
+
text = (seg.get("text") or "").strip()
|
| 128 |
+
wc = len(text.split())
|
| 129 |
+
has_qmark = text.endswith("?")
|
| 130 |
+
dur_ok = d <= qmax
|
| 131 |
+
wc_ok = wc <= wc_max and wc >= 2
|
| 132 |
+
is_q = (has_qmark or dur_ok) and wc_ok
|
| 133 |
+
if qmark_required:
|
| 134 |
+
is_q = has_qmark and wc_ok
|
| 135 |
+
if is_q:
|
| 136 |
+
idxs.append(i)
|
| 137 |
+
return idxs
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def build_interview_cuts(
|
| 141 |
+
transcript: List[Dict[str, Any]],
|
| 142 |
+
min_len: float,
|
| 143 |
+
max_len: float,
|
| 144 |
+
qmax: float,
|
| 145 |
+
gap: float,
|
| 146 |
+
lead_in_question: bool,
|
| 147 |
+
max_cuts: int,
|
| 148 |
+
wc_max: int = 35,
|
| 149 |
+
qmark_required: bool = False,
|
| 150 |
+
spk_labels: List[int] | None = None,
|
| 151 |
+
interviewer_id: int | None = None,
|
| 152 |
+
debug: bool = False,
|
| 153 |
+
) -> List[Dict[str, Any]]:
|
| 154 |
+
if spk_labels is not None and interviewer_id is not None:
|
| 155 |
+
qs = set()
|
| 156 |
+
for i, seg in enumerate(transcript):
|
| 157 |
+
st = float(seg.get("start", 0)); en = float(seg.get("end", 0)); d = en - st
|
| 158 |
+
text = (seg.get("text") or "").strip()
|
| 159 |
+
wc = len(text.split())
|
| 160 |
+
has_q = text.endswith("?")
|
| 161 |
+
if spk_labels[i] == interviewer_id and wc <= wc_max and (d <= qmax or has_q):
|
| 162 |
+
qs.add(i)
|
| 163 |
+
else:
|
| 164 |
+
qs = set(detect_questions(transcript, qmax, wc_max, qmark_required, debug))
|
| 165 |
+
cuts = []
|
| 166 |
+
n = len(transcript)
|
| 167 |
+
i = 0
|
| 168 |
+
while i < n:
|
| 169 |
+
seg = transcript[i]
|
| 170 |
+
st = float(seg.get("start", 0)); en = float(seg.get("end", 0)); d = en - st
|
| 171 |
+
txt = normspace(seg.get("text", ""))
|
| 172 |
+
if not txt or d < 0.2:
|
| 173 |
+
i += 1; continue
|
| 174 |
+
if i in qs:
|
| 175 |
+
j = i + 1
|
| 176 |
+
resp_start = None
|
| 177 |
+
end_time = en
|
| 178 |
+
collected_text = []
|
| 179 |
+
segments = []
|
| 180 |
+
while j < n:
|
| 181 |
+
s2 = transcript[j]
|
| 182 |
+
st2 = float(s2.get("start", 0)); en2 = float(s2.get("end", 0)); d2 = en2 - st2
|
| 183 |
+
txt2 = normspace(s2.get("text", ""))
|
| 184 |
+
if j in qs:
|
| 185 |
+
break
|
| 186 |
+
if d2 < 0.25:
|
| 187 |
+
j += 1
|
| 188 |
+
continue
|
| 189 |
+
if resp_start is not None and st2 - end_time > gap:
|
| 190 |
+
break
|
| 191 |
+
if txt2:
|
| 192 |
+
if resp_start is None:
|
| 193 |
+
resp_start = st2
|
| 194 |
+
segments.append({"start": st2, "end": en2})
|
| 195 |
+
collected_text.append(txt2)
|
| 196 |
+
end_time = en2
|
| 197 |
+
if end_time - (resp_start if resp_start is not None else st) >= max_len:
|
| 198 |
+
break
|
| 199 |
+
j += 1
|
| 200 |
+
if resp_start is not None:
|
| 201 |
+
start_cut = st if lead_in_question else resp_start
|
| 202 |
+
end_cut = end_time
|
| 203 |
+
dur = end_cut - start_cut
|
| 204 |
+
if dur >= min_len * 0.6:
|
| 205 |
+
label = first_sentence(" ".join(collected_text), 70) or "Resposta marcante"
|
| 206 |
+
hook = first_sentence(txt, 90) if lead_in_question else ""
|
| 207 |
+
cuts.append({
|
| 208 |
+
"start": round(start_cut, 3),
|
| 209 |
+
"end": round(end_cut, 3),
|
| 210 |
+
"label": label,
|
| 211 |
+
"hook": hook,
|
| 212 |
+
"reason": "Pergunta curta seguida de resposta longa",
|
| 213 |
+
"segments": ([{"start": st, "end": en}] if lead_in_question else []) + segments
|
| 214 |
+
})
|
| 215 |
+
if len(cuts) >= max_cuts:
|
| 216 |
+
break
|
| 217 |
+
i = max(i + 1, j)
|
| 218 |
+
continue
|
| 219 |
+
else:
|
| 220 |
+
j = i + 1
|
| 221 |
+
end_time = en
|
| 222 |
+
collected = [txt] if txt else []
|
| 223 |
+
segments = [{"start": st, "end": en}]
|
| 224 |
+
while j < n and float(transcript[j].get("start",0)) - end_time <= gap:
|
| 225 |
+
s2 = transcript[j]
|
| 226 |
+
st2 = float(s2.get("start", 0)); en2 = float(s2.get("end", 0))
|
| 227 |
+
t2 = normspace(s2.get("text", ""))
|
| 228 |
+
if en2 - st2 < 0.25:
|
| 229 |
+
j += 1
|
| 230 |
+
continue
|
| 231 |
+
if t2:
|
| 232 |
+
segments.append({"start": st2, "end": en2})
|
| 233 |
+
collected.append(t2)
|
| 234 |
+
end_time = en2
|
| 235 |
+
if end_time - st >= max_len:
|
| 236 |
+
break
|
| 237 |
+
j += 1
|
| 238 |
+
dur = end_time - st
|
| 239 |
+
if dur >= min_len and collected:
|
| 240 |
+
cuts.append({
|
| 241 |
+
"start": round(st, 3),
|
| 242 |
+
"end": round(end_time, 3),
|
| 243 |
+
"label": first_sentence(" ".join(collected), 70) or "Resposta destacada",
|
| 244 |
+
"hook": "",
|
| 245 |
+
"reason": "Resposta contínua em entrevista",
|
| 246 |
+
"segments": segments
|
| 247 |
+
})
|
| 248 |
+
if len(cuts) >= max_cuts:
|
| 249 |
+
break
|
| 250 |
+
i = j
|
| 251 |
+
continue
|
| 252 |
+
return cuts
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
def write_shell_and_preview(video_path: Path, base: str, cuts: List[Dict[str, Any]], preview: bool):
|
| 256 |
+
out_dir = video_path.parent
|
| 257 |
+
sh_path = out_dir / f"{base}_interview_cuts.sh"
|
| 258 |
+
parts_dir = out_dir / "export_parts"
|
| 259 |
+
parts_dir.mkdir(exist_ok=True)
|
| 260 |
+
|
| 261 |
+
lines = ["#!/usr/bin/env bash", "set -e"]
|
| 262 |
+
for k, c in enumerate(cuts, 1):
|
| 263 |
+
ss = c["start"]; ee = c["end"]; dd = round(ee - ss, 3)
|
| 264 |
+
out_file = parts_dir / f"{base}_cut_{k:02}.mp4"
|
| 265 |
+
cmd = (
|
| 266 |
+
f"ffmpeg -hide_banner -loglevel warning -y -ss {ss} -i {shlex.quote(str(video_path))} -t {dd} "
|
| 267 |
+
f"-c:v libx264 -crf 22 -preset veryfast -vf scale=1080:-2:flags=bicubic -c:a aac -b:a 128k {shlex.quote(str(out_file))}"
|
| 268 |
+
)
|
| 269 |
+
lines.append(cmd)
|
| 270 |
+
if preview and cuts:
|
| 271 |
+
plist = out_dir / f"{base}_interview_preview_list.txt"
|
| 272 |
+
with plist.open("w", encoding="utf-8") as f:
|
| 273 |
+
for k in range(1, len(cuts)+1):
|
| 274 |
+
p = parts_dir / f"{base}_cut_{k:02}.mp4"
|
| 275 |
+
f.write(f"file {p.name}\n")
|
| 276 |
+
preview_path = out_dir / f"PREVIEW_{base}_interview.mp4"
|
| 277 |
+
lines.append(f"ffmpeg -hide_banner -loglevel warning -y -f concat -safe 0 -i {shlex.quote(str(plist))} -c copy {shlex.quote(str(preview_path))}")
|
| 278 |
+
|
| 279 |
+
sh_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
| 280 |
+
os.chmod(sh_path, 0o755)
|
| 281 |
+
print(f"✅ Script de export: {sh_path}")
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def main():
|
| 285 |
+
ap = argparse.ArgumentParser("Cortes para entrevistas (pergunta curta + resposta longa)")
|
| 286 |
+
ap.add_argument("video", help="Arquivo de entrada (.mp4/.mov)")
|
| 287 |
+
ap.add_argument("--min", type=float, default=60.0, help="Duração mínima do corte em segundos")
|
| 288 |
+
ap.add_argument("--max", type=float, default=150.0, help="Duração máxima do corte em segundos")
|
| 289 |
+
ap.add_argument("--qmax", type=float, default=12.0, help="Máximo de duração para marcar perguntas")
|
| 290 |
+
ap.add_argument("--gap", type=float, default=2.0, help="Tolerância de gap entre segmentos")
|
| 291 |
+
ap.add_argument("--lead-in-question", choices=["yes","no"], default="yes", help="Incluir pergunta antes da resposta")
|
| 292 |
+
ap.add_argument("--max-cuts", type=int, default=20, help="Limite de cortes")
|
| 293 |
+
ap.add_argument("--preview", action="store_true", help="Gera comando de prévia por concat")
|
| 294 |
+
ap.add_argument("--q-wc-max", type=int, default=35, help="Máximo de palavras para considerar pergunta")
|
| 295 |
+
ap.add_argument("--qmark-required", action="store_true", help="Exigir '?' para marcar pergunta")
|
| 296 |
+
ap.add_argument("--diarize", action="store_true", help="Ativar diarização com Resemblyzer")
|
| 297 |
+
ap.add_argument("--n-speakers", type=int, default=2, help="Número de falantes para clusterizar")
|
| 298 |
+
ap.add_argument("--debug", action="store_true", help="Imprimir diagnóstico")
|
| 299 |
+
args = ap.parse_args()
|
| 300 |
+
|
| 301 |
+
video_path = Path(args.video).expanduser().resolve()
|
| 302 |
+
base = video_path.stem
|
| 303 |
+
transcript_path = video_path.with_name(f"{base}_transcript.json")
|
| 304 |
+
if not transcript_path.exists():
|
| 305 |
+
print(f"ERRO: não achei '{transcript_path.name}'. Gere a transcrição primeiro com video_cuts_offline_mac_plus_subs.py")
|
| 306 |
+
raise SystemExit(1)
|
| 307 |
+
|
| 308 |
+
transcript = load_json(transcript_path)
|
| 309 |
+
|
| 310 |
+
spk_labels = None
|
| 311 |
+
interviewer_id = None
|
| 312 |
+
if args.diarize:
|
| 313 |
+
try:
|
| 314 |
+
wav16k = ensure_wav_16k_mono(video_path)
|
| 315 |
+
diar = diarize_with_resemblyzer(wav16k, n_speakers=args.n_speakers, debug=args.debug)
|
| 316 |
+
if diar:
|
| 317 |
+
spk_labels = assign_speakers_to_transcript(transcript, diar)
|
| 318 |
+
totals = {}
|
| 319 |
+
for i, seg in enumerate(transcript):
|
| 320 |
+
st = float(seg.get("start",0)); en = float(seg.get("end",0)); d = max(0.0, en-st)
|
| 321 |
+
spk = spk_labels[i] if spk_labels and i < len(spk_labels) else -1
|
| 322 |
+
totals[spk] = totals.get(spk, 0.0) + d
|
| 323 |
+
if totals:
|
| 324 |
+
interviewer_id = sorted(totals.items(), key=lambda kv: kv[1])[0][0]
|
| 325 |
+
except Exception as e:
|
| 326 |
+
print(f"[warn] Diarização falhou: {e}. Seguindo sem diarização.")
|
| 327 |
+
|
| 328 |
+
cuts = build_interview_cuts(
|
| 329 |
+
transcript=transcript,
|
| 330 |
+
min_len=args.min,
|
| 331 |
+
max_len=args.max,
|
| 332 |
+
qmax=args.qmax,
|
| 333 |
+
gap=args.gap,
|
| 334 |
+
lead_in_question=(args.lead_in_question=="yes"),
|
| 335 |
+
max_cuts=args.max_cuts,
|
| 336 |
+
wc_max=args.q_wc_max,
|
| 337 |
+
qmark_required=args.qmark_required,
|
| 338 |
+
spk_labels=spk_labels,
|
| 339 |
+
interviewer_id=interviewer_id,
|
| 340 |
+
debug=args.debug,
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
out_json = video_path.with_name(f"{base}_interview_cuts.json")
|
| 344 |
+
save_json(cuts, out_json)
|
| 345 |
+
print(f"✅ Gerado: {out_json}")
|
| 346 |
+
|
| 347 |
+
write_shell_and_preview(video_path, base, cuts, preview=args.preview)
|
| 348 |
+
|
| 349 |
+
if __name__ == "__main__":
|
| 350 |
+
main()
|
persona.example.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bio": "Sua bio ou descrição como criador(a) de conteúdo",
|
| 3 |
+
"pillars": ["tema1", "tema2", "tema3"],
|
| 4 |
+
"audience": "Seu público-alvo",
|
| 5 |
+
"tone": "Tom da sua comunicação (ex: direto, crítico, pedagógico)",
|
| 6 |
+
"redlines": ["o que evitar", "limites de conteúdo"]
|
| 7 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# IA-local-auto-cut — Cortes automáticos de vídeo com transcrição local
|
| 2 |
+
# pip install -r requirements.txt
|
| 3 |
+
|
| 4 |
+
faster-whisper>=1.0.0
|
| 5 |
+
langdetect>=1.0.9
|
| 6 |
+
requests>=2.28.0
|
| 7 |
+
|
| 8 |
+
# Opcional: para interview_cuts com diarização (identificar falantes)
|
| 9 |
+
# resemblyzer>=0.1.1
|
| 10 |
+
# numpy>=1.20.0
|
| 11 |
+
# scikit-learn>=1.0.0
|
| 12 |
+
# soundfile>=0.12.0
|
video_cuts_offline_mac_plus_subs.py
ADDED
|
@@ -0,0 +1,668 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
IA-local-auto-cut: Cortes automáticos de vídeo com transcrição local (Whisper) e IA (Ollama).
|
| 4 |
+
|
| 5 |
+
Transcreve o áudio com faster-whisper, propõe cortes via Ollama ou heurísticas,
|
| 6 |
+
e gera scripts ffmpeg para exportar os cortes.
|
| 7 |
+
"""
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
import json
|
| 11 |
+
import subprocess
|
| 12 |
+
import argparse
|
| 13 |
+
import re
|
| 14 |
+
import requests
|
| 15 |
+
from typing import List, Dict, Any, Optional
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
from langdetect import detect, DetectorFactory
|
| 18 |
+
DetectorFactory.seed = 1234
|
| 19 |
+
|
| 20 |
+
import pathlib
|
| 21 |
+
try:
|
| 22 |
+
import yaml # optional; only used if provided
|
| 23 |
+
except Exception:
|
| 24 |
+
yaml = None
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
from faster_whisper import WhisperModel
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# --- Audio stream probing helper ---
|
| 31 |
+
def probe_audio_streams(video_path: str) -> List[Dict[str, Any]]:
|
| 32 |
+
"""Return list of audio streams with basic metadata using ffprobe."""
|
| 33 |
+
try:
|
| 34 |
+
cmd = [
|
| 35 |
+
"ffprobe", "-v", "error",
|
| 36 |
+
"-select_streams", "a",
|
| 37 |
+
"-show_entries", "stream=index,codec_name,channels,channel_layout:stream_tags=language",
|
| 38 |
+
"-of", "json",
|
| 39 |
+
video_path,
|
| 40 |
+
]
|
| 41 |
+
out = subprocess.check_output(cmd)
|
| 42 |
+
info = json.loads(out.decode("utf-8", errors="ignore"))
|
| 43 |
+
streams = info.get("streams", [])
|
| 44 |
+
# normalize tag:language to just 'language'
|
| 45 |
+
for s in streams:
|
| 46 |
+
tags = s.get("tags") or {}
|
| 47 |
+
if "language" in tags and "language" not in s:
|
| 48 |
+
s["language"] = tags.get("language")
|
| 49 |
+
return streams
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"ffprobe failed, assuming single audio stream: {e}", file=sys.stderr)
|
| 52 |
+
return []
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def extract_audio(video_path: str, audio_path: str, audio_stream: int = -1) -> None:
|
| 56 |
+
"""
|
| 57 |
+
Extract audio from video using ffmpeg.
|
| 58 |
+
If audio_stream == -1, mix all audio streams (if multiple), else extract the specified stream index.
|
| 59 |
+
"""
|
| 60 |
+
streams = probe_audio_streams(video_path)
|
| 61 |
+
try:
|
| 62 |
+
if audio_stream >= 0:
|
| 63 |
+
# map a specific audio stream index
|
| 64 |
+
cmd = [
|
| 65 |
+
"ffmpeg", "-y",
|
| 66 |
+
"-i", video_path,
|
| 67 |
+
"-map", f"0:a:{audio_stream}",
|
| 68 |
+
"-vn",
|
| 69 |
+
"-acodec", "pcm_s16le",
|
| 70 |
+
"-ar", "16000",
|
| 71 |
+
"-ac", "1",
|
| 72 |
+
audio_path,
|
| 73 |
+
]
|
| 74 |
+
else:
|
| 75 |
+
n = len(streams)
|
| 76 |
+
if n <= 1:
|
| 77 |
+
# original behavior (single stream)
|
| 78 |
+
cmd = [
|
| 79 |
+
"ffmpeg", "-y",
|
| 80 |
+
"-i", video_path,
|
| 81 |
+
"-vn",
|
| 82 |
+
"-acodec", "pcm_s16le",
|
| 83 |
+
"-ar", "16000",
|
| 84 |
+
"-ac", "1",
|
| 85 |
+
audio_path,
|
| 86 |
+
]
|
| 87 |
+
else:
|
| 88 |
+
# mix all audio streams into one mono track
|
| 89 |
+
inputs = "".join(f"[0:a:{i}]" for i in range(n))
|
| 90 |
+
filtergraph = f"{inputs}amix=inputs={n}:duration=longest[out]"
|
| 91 |
+
cmd = [
|
| 92 |
+
"ffmpeg", "-y",
|
| 93 |
+
"-i", video_path,
|
| 94 |
+
"-filter_complex", filtergraph,
|
| 95 |
+
"-map", "[out]",
|
| 96 |
+
"-ac", "1",
|
| 97 |
+
"-ar", "16000",
|
| 98 |
+
"-acodec", "pcm_s16le",
|
| 99 |
+
audio_path,
|
| 100 |
+
]
|
| 101 |
+
print(f"Extracting audio: {' '.join(cmd)}")
|
| 102 |
+
subprocess.run(cmd, check=True)
|
| 103 |
+
except subprocess.CalledProcessError as e:
|
| 104 |
+
print(f"ffmpeg failed extracting audio: {e}", file=sys.stderr)
|
| 105 |
+
raise
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def transcribe_audio(
|
| 109 |
+
audio_path: str, lang: str = "", model_size: str = "small"
|
| 110 |
+
) -> List[Dict[str, Any]]:
|
| 111 |
+
"""Transcribe audio using faster-whisper."""
|
| 112 |
+
print(f"Loading Whisper model '{model_size}'...")
|
| 113 |
+
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
| 114 |
+
print(f"Transcribing audio: {audio_path} with language='{lang or 'auto'}'...")
|
| 115 |
+
segments, info = model.transcribe(audio_path, language=lang or None)
|
| 116 |
+
print(f"Detected language: {info.language} with confidence {info.language_probability}")
|
| 117 |
+
if lang and info.language and lang != info.language:
|
| 118 |
+
print(f"[warn] Whisper detected '{info.language}' but --lang was '{lang}'.", file=sys.stderr)
|
| 119 |
+
result = []
|
| 120 |
+
for segment in segments:
|
| 121 |
+
result.append(
|
| 122 |
+
{
|
| 123 |
+
"id": segment.id,
|
| 124 |
+
"seek": segment.seek,
|
| 125 |
+
"start": segment.start,
|
| 126 |
+
"end": segment.end,
|
| 127 |
+
"text": segment.text.strip(),
|
| 128 |
+
}
|
| 129 |
+
)
|
| 130 |
+
return result
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def query_ollama(model: str, prompt: str, temperature: float = 0.2, max_tokens: int = 2048, server_url: str = "http://localhost:11434") -> str:
|
| 134 |
+
"""
|
| 135 |
+
Query Ollama via HTTP /api/generate (works on recent Ollama versions).
|
| 136 |
+
Requires `ollama serve` running locally.
|
| 137 |
+
"""
|
| 138 |
+
url = server_url.rstrip("/") + "/api/generate"
|
| 139 |
+
payload = {
|
| 140 |
+
"model": model,
|
| 141 |
+
"prompt": prompt,
|
| 142 |
+
"temperature": temperature,
|
| 143 |
+
"stream": False,
|
| 144 |
+
"options": {"num_ctx": 8192, "num_predict": max_tokens}
|
| 145 |
+
}
|
| 146 |
+
try:
|
| 147 |
+
r = requests.post(url, json=payload, timeout=600)
|
| 148 |
+
r.raise_for_status()
|
| 149 |
+
data = r.json()
|
| 150 |
+
return data.get("response", "")
|
| 151 |
+
except requests.exceptions.ConnectionError:
|
| 152 |
+
print("Failed to connect to Ollama. Is `ollama serve` running?", file=sys.stderr)
|
| 153 |
+
sys.exit(1)
|
| 154 |
+
except Exception as e:
|
| 155 |
+
print(f"Error querying Ollama: {e}", file=sys.stderr)
|
| 156 |
+
sys.exit(1)
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def load_persona_text(path: str) -> str:
|
| 160 |
+
if not path:
|
| 161 |
+
return ""
|
| 162 |
+
p = pathlib.Path(path)
|
| 163 |
+
if not p.exists():
|
| 164 |
+
print(f"Persona file not found: {path}", file=sys.stderr)
|
| 165 |
+
return ""
|
| 166 |
+
try:
|
| 167 |
+
if p.suffix.lower() in {".yaml", ".yml"} and yaml is not None:
|
| 168 |
+
data = yaml.safe_load(p.read_text(encoding="utf-8"))
|
| 169 |
+
else:
|
| 170 |
+
data = json.loads(p.read_text(encoding="utf-8"))
|
| 171 |
+
except Exception as e:
|
| 172 |
+
print(f"Failed to read persona file {path}: {e}", file=sys.stderr)
|
| 173 |
+
return ""
|
| 174 |
+
# Normalize and stringify
|
| 175 |
+
parts = []
|
| 176 |
+
if isinstance(data, dict):
|
| 177 |
+
bio = data.get("bio") or data.get("about") or ""
|
| 178 |
+
pillars = data.get("pillars") or data.get("topics") or []
|
| 179 |
+
audience = data.get("audience") or ""
|
| 180 |
+
tone = data.get("tone") or ""
|
| 181 |
+
redlines = data.get("redlines") or data.get("donts") or []
|
| 182 |
+
parts.append(f"BIO: {bio}")
|
| 183 |
+
if pillars:
|
| 184 |
+
parts.append("PILARES:" + ", ".join(map(str, pillars)))
|
| 185 |
+
if audience:
|
| 186 |
+
parts.append(f"PÚBLICO: {audience}")
|
| 187 |
+
if tone:
|
| 188 |
+
parts.append(f"TOM: {tone}")
|
| 189 |
+
if redlines:
|
| 190 |
+
parts.append("NÃO FAZER:" + ", ".join(map(str, redlines)))
|
| 191 |
+
else:
|
| 192 |
+
parts.append(str(data))
|
| 193 |
+
return "\n".join([p for p in parts if p])
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
# --- PT/EN heuristic helpers ---
|
| 197 |
+
_pt_words = {" que ", " de ", " pra ", " com ", " não ", " é ", " uma ", " um ", " eu ", " você ", " gente ", " isso ", " então ", " né ", " tá "}
|
| 198 |
+
|
| 199 |
+
def is_ptish(text: str) -> bool:
|
| 200 |
+
t = (text or "").lower()
|
| 201 |
+
if any(ch in t for ch in "áéíóúâêôãõç"):
|
| 202 |
+
return True
|
| 203 |
+
hits = sum(w in t for w in _pt_words)
|
| 204 |
+
en_hits = sum(w in t for w in [" the ", " and ", " to ", " is ", " of "])
|
| 205 |
+
return hits >= max(2, en_hits + 1)
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def _lang_of(text: str) -> str:
|
| 209 |
+
t = (text or "").strip()
|
| 210 |
+
if not t:
|
| 211 |
+
return "unk"
|
| 212 |
+
if is_ptish(t):
|
| 213 |
+
return "pt"
|
| 214 |
+
try:
|
| 215 |
+
code = detect(t)
|
| 216 |
+
if code.startswith("pt"):
|
| 217 |
+
return "pt"
|
| 218 |
+
if code.startswith("en"):
|
| 219 |
+
return "en"
|
| 220 |
+
return code
|
| 221 |
+
except Exception:
|
| 222 |
+
pt_hits = sum(w in t.lower() for w in [" que ", " de ", " pra ", " com ", " não ", " é "])
|
| 223 |
+
en_hits = sum(w in t.lower() for w in [" the ", " and ", " to ", " is ", " of "])
|
| 224 |
+
return "pt" if pt_hits >= en_hits else "en"
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def build_react_cuts(transcript: List[Dict[str, Any]], min_s: int = 60, max_s: int = 180, leadin_s: int = 6, include_en: bool = True, gap_s: float = 1.5) -> List[Dict[str, Any]]:
|
| 228 |
+
"""
|
| 229 |
+
transcript: list of {start,end,text}
|
| 230 |
+
Returns cuts as {"segments":[{"start":..,"end":..}, ...], "label": "react-pt"}
|
| 231 |
+
"""
|
| 232 |
+
ann = []
|
| 233 |
+
for seg in transcript:
|
| 234 |
+
lang = _lang_of(seg.get("text", ""))
|
| 235 |
+
ann.append({**seg, "lang": lang})
|
| 236 |
+
|
| 237 |
+
total_pt = sum((float(seg["end"]) - float(seg["start"])) for seg in ann if seg["lang"] == "pt")
|
| 238 |
+
print(f"[react-mode] PT seconds detected (raw): {total_pt:.1f}s")
|
| 239 |
+
|
| 240 |
+
cuts: List[Dict[str, Any]] = []
|
| 241 |
+
i = 0
|
| 242 |
+
n = len(ann)
|
| 243 |
+
while i < n:
|
| 244 |
+
if ann[i]["lang"] != "pt":
|
| 245 |
+
i += 1
|
| 246 |
+
continue
|
| 247 |
+
block_end = float(ann[i]["end"])
|
| 248 |
+
j = i + 1
|
| 249 |
+
gap_acc = 0.0
|
| 250 |
+
while j < n:
|
| 251 |
+
lang_j = ann[j]["lang"]
|
| 252 |
+
if lang_j == "pt":
|
| 253 |
+
gap_acc = 0.0
|
| 254 |
+
block_end = float(ann[j]["end"])
|
| 255 |
+
j += 1
|
| 256 |
+
continue
|
| 257 |
+
gap = float(ann[j]["end"]) - float(ann[j]["start"])
|
| 258 |
+
if gap_acc + gap <= gap_s:
|
| 259 |
+
gap_acc += gap
|
| 260 |
+
j += 1
|
| 261 |
+
continue
|
| 262 |
+
break
|
| 263 |
+
|
| 264 |
+
pt_segs = []
|
| 265 |
+
for t in range(i, j):
|
| 266 |
+
if ann[t]["lang"] == "pt":
|
| 267 |
+
pt_segs.append({"start": float(ann[t]["start"]), "end": float(ann[t]["end"])})
|
| 268 |
+
if not pt_segs:
|
| 269 |
+
i = j
|
| 270 |
+
continue
|
| 271 |
+
|
| 272 |
+
lead_segments: List[Dict[str, float]] = []
|
| 273 |
+
if include_en:
|
| 274 |
+
k = i - 1
|
| 275 |
+
remaining = float(leadin_s)
|
| 276 |
+
while k >= 0 and remaining > 0 and ann[k]["lang"] == "en":
|
| 277 |
+
s = float(ann[k]["start"]); e = float(ann[k]["end"])
|
| 278 |
+
seg_dur = e - s
|
| 279 |
+
use_s = max(s, e - remaining)
|
| 280 |
+
if e - use_s > 0.05:
|
| 281 |
+
lead_segments.append({"start": use_s, "end": e})
|
| 282 |
+
remaining -= (e - use_s)
|
| 283 |
+
k -= 1
|
| 284 |
+
lead_segments.reverse()
|
| 285 |
+
|
| 286 |
+
acc: List[Dict[str, float]] = []
|
| 287 |
+
acc_len = 0.0
|
| 288 |
+
|
| 289 |
+
def flush_window():
|
| 290 |
+
if not acc:
|
| 291 |
+
return
|
| 292 |
+
segs = []
|
| 293 |
+
if include_en and lead_segments:
|
| 294 |
+
segs.extend(lead_segments)
|
| 295 |
+
segs.extend(acc)
|
| 296 |
+
cuts.append({"segments": segs, "label": "react-pt"})
|
| 297 |
+
|
| 298 |
+
for s in pt_segs:
|
| 299 |
+
seg_len = s["end"] - s["start"]
|
| 300 |
+
if acc_len + seg_len <= max_s:
|
| 301 |
+
acc.append(s); acc_len += seg_len
|
| 302 |
+
if acc_len >= min_s:
|
| 303 |
+
flush_window()
|
| 304 |
+
acc, acc_len = [], 0.0
|
| 305 |
+
else:
|
| 306 |
+
take = max_s - acc_len
|
| 307 |
+
if take > 0.2:
|
| 308 |
+
seg_cut = {"start": s["start"], "end": s["start"] + take}
|
| 309 |
+
acc.append(seg_cut); acc_len += take
|
| 310 |
+
flush_window()
|
| 311 |
+
rest = s["end"] - (s["start"] + take)
|
| 312 |
+
if rest >= 0.2:
|
| 313 |
+
acc = [{"start": s["start"] + take, "end": s["end"]}]
|
| 314 |
+
acc_len = rest
|
| 315 |
+
else:
|
| 316 |
+
acc, acc_len = [], 0.0
|
| 317 |
+
else:
|
| 318 |
+
flush_window()
|
| 319 |
+
acc = [s]; acc_len = seg_len
|
| 320 |
+
if acc_len >= min_s:
|
| 321 |
+
flush_window(); acc, acc_len = [], 0.0
|
| 322 |
+
if acc_len >= min_s * 0.5:
|
| 323 |
+
flush_window()
|
| 324 |
+
|
| 325 |
+
i = j
|
| 326 |
+
|
| 327 |
+
return cuts
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
def propose_cuts(transcript: List[Dict[str, Any]], model: str, max_stories: int, max_length: int, persona_text: str = "") -> List[Dict[str, Any]]:
|
| 331 |
+
"""Propose cuts based on transcript using Ollama model."""
|
| 332 |
+
transcript_text = "\n".join(
|
| 333 |
+
[f"{seg['start']:.2f} --> {seg['end']:.2f}: {seg['text']}" for seg in transcript]
|
| 334 |
+
)
|
| 335 |
+
persona_block = ("\nPERSONA DO(A) CRIADOR(A):\n" + persona_text + "\n") if persona_text else ""
|
| 336 |
+
prompt = (
|
| 337 |
+
"Você é um(a) editor(a) de vídeos curtos. Dada a transcrição com timestamps (em segundos), "
|
| 338 |
+
f"proponha NO MÁXIMO {max_stories} cortes de até {max_length} segundos cada, com começo–meio–fim e potencial de engajamento. "
|
| 339 |
+
"Você PODE montar cada corte como MONTAGEM, juntando trechos não contíguos que conversem entre si (ex.: segundos do minuto 1 + segundos do minuto 3). "
|
| 340 |
+
"Leve em conta a persona, temas e diferenciais do(a) criador(a) para priorizar trechos alinhados. "
|
| 341 |
+
"Responda ESTRITAMENTE em JSON (sem texto fora do JSON), como uma lista de objetos com os campos:\n"
|
| 342 |
+
" - (OU) start (segundos, número) e end (segundos, número) para UM bloco contínuo\n"
|
| 343 |
+
" - (OU) segments: lista de objetos {start, end} para MONTAGEM\n"
|
| 344 |
+
" - label (título curto)\n"
|
| 345 |
+
" - hook (frase de abertura curta, 7–12 palavras, no idioma do trecho)\n"
|
| 346 |
+
" - reason (por que funciona e como se alinha à persona)\n"
|
| 347 |
+
" - score_relevance (0–100, alinhamento com persona/pilares)\n"
|
| 348 |
+
" - score_engagement (0–100, potencial de retenção)\n"
|
| 349 |
+
" - language (pt, en, pt+en, etc.)\n\n"
|
| 350 |
+
+ persona_block +
|
| 351 |
+
"TRANSCRIÇÃO:\n"
|
| 352 |
+
f"{transcript_text}\n\n"
|
| 353 |
+
"EXEMPLOS DE SAÍDA (apenas um deles por item):\n"
|
| 354 |
+
"[{\"start\": 72.0, \"end\": 118.5, \"label\": \"Começo da história\", \"hook\": \"Frase chamativa...\", \"reason\": \"Alinha com X...\", \"score_relevance\": 86, \"score_engagement\": 79, \"language\": \"pt\"}]\n"
|
| 355 |
+
"[{\"segments\":[{\"start\": 12.0, \"end\": 22.5}, {\"start\": 185.0, \"end\": 202.0}], \"label\": \"Conectando pontos\", \"hook\": \"O ponto que ninguém percebe...\", \"reason\": \"Trechos distantes que contam uma ideia completa\", \"score_relevance\": 90, \"score_engagement\": 84, \"language\": \"pt+en\"}]"
|
| 356 |
+
)
|
| 357 |
+
response = query_ollama(model, prompt)
|
| 358 |
+
resp = response.strip()
|
| 359 |
+
resp = re.sub(r"^```(?:json)?", "", resp).strip()
|
| 360 |
+
resp = re.sub(r"```$", "", resp).strip()
|
| 361 |
+
cuts = None
|
| 362 |
+
try:
|
| 363 |
+
cuts = json.loads(resp)
|
| 364 |
+
except json.JSONDecodeError:
|
| 365 |
+
m = re.search(r"\[.*?\]", resp, flags=re.DOTALL)
|
| 366 |
+
if m:
|
| 367 |
+
try:
|
| 368 |
+
cuts = json.loads(m.group(0))
|
| 369 |
+
except Exception:
|
| 370 |
+
cuts = None
|
| 371 |
+
if not isinstance(cuts, list):
|
| 372 |
+
print("Model returned non-JSON or invalid JSON. Falling back to heuristic cuts.", file=sys.stderr)
|
| 373 |
+
cuts = []
|
| 374 |
+
cur_start = None
|
| 375 |
+
cur_end = None
|
| 376 |
+
for seg in transcript:
|
| 377 |
+
s = float(seg["start"]); e = float(seg["end"])
|
| 378 |
+
if cur_start is None:
|
| 379 |
+
cur_start, cur_end = s, e
|
| 380 |
+
elif e - cur_start <= max_length:
|
| 381 |
+
cur_end = e
|
| 382 |
+
else:
|
| 383 |
+
cuts.append({"start": cur_start, "end": cur_end, "label": "trecho"})
|
| 384 |
+
cur_start, cur_end = s, e
|
| 385 |
+
if len(cuts) >= max_stories:
|
| 386 |
+
break
|
| 387 |
+
if len(cuts) < max_stories and cur_start is not None:
|
| 388 |
+
cuts.append({"start": cur_start, "end": cur_end, "label": "trecho"})
|
| 389 |
+
cuts = cuts[:max_stories]
|
| 390 |
+
|
| 391 |
+
norm_cuts = []
|
| 392 |
+
for c in cuts:
|
| 393 |
+
try:
|
| 394 |
+
if "segments" in c and isinstance(c["segments"], list) and c["segments"]:
|
| 395 |
+
segs = []
|
| 396 |
+
total = 0.0
|
| 397 |
+
for seg in c["segments"]:
|
| 398 |
+
s = float(seg["start"]); e = float(seg["end"])
|
| 399 |
+
if e <= s:
|
| 400 |
+
continue
|
| 401 |
+
dur = e - s
|
| 402 |
+
if total + dur > max_length:
|
| 403 |
+
e = s + max(0.01, max_length - total)
|
| 404 |
+
dur = e - s
|
| 405 |
+
segs.append({"start": s, "end": e})
|
| 406 |
+
total += dur
|
| 407 |
+
if total >= max_length:
|
| 408 |
+
break
|
| 409 |
+
if segs:
|
| 410 |
+
c2 = {k: v for k, v in c.items() if k != "segments"}
|
| 411 |
+
c2["segments"] = segs
|
| 412 |
+
norm_cuts.append(c2)
|
| 413 |
+
elif "start" in c and "end" in c:
|
| 414 |
+
s = float(c["start"]); e = float(c["end"])
|
| 415 |
+
if e > s:
|
| 416 |
+
if (e - s) > max_length:
|
| 417 |
+
e = s + max_length
|
| 418 |
+
c2 = dict(c)
|
| 419 |
+
c2["segments"] = [{"start": s, "end": e}]
|
| 420 |
+
norm_cuts.append(c2)
|
| 421 |
+
except Exception:
|
| 422 |
+
continue
|
| 423 |
+
cuts = norm_cuts
|
| 424 |
+
|
| 425 |
+
cleaned = []
|
| 426 |
+
for c in cuts:
|
| 427 |
+
try:
|
| 428 |
+
segs = c.get("segments", [])
|
| 429 |
+
acc = []
|
| 430 |
+
total = 0.0
|
| 431 |
+
for seg in segs:
|
| 432 |
+
s = float(seg["start"]); e = float(seg["end"])
|
| 433 |
+
if e <= s:
|
| 434 |
+
continue
|
| 435 |
+
dur = e - s
|
| 436 |
+
if total + dur > max_length:
|
| 437 |
+
e = s + max(0.01, max_length - total)
|
| 438 |
+
dur = e - s
|
| 439 |
+
acc.append({"start": s, "end": e})
|
| 440 |
+
total += dur
|
| 441 |
+
if total >= max_length:
|
| 442 |
+
break
|
| 443 |
+
if not acc:
|
| 444 |
+
continue
|
| 445 |
+
label = str(c.get("label", "trecho")).strip() or "trecho"
|
| 446 |
+
out = {"segments": acc, "label": label}
|
| 447 |
+
for k in ("hook","reason","score_relevance","score_engagement","language"):
|
| 448 |
+
if k in c:
|
| 449 |
+
out[k] = c[k]
|
| 450 |
+
cleaned.append(out)
|
| 451 |
+
except Exception:
|
| 452 |
+
continue
|
| 453 |
+
|
| 454 |
+
def score_of(c):
|
| 455 |
+
try:
|
| 456 |
+
r = float(c.get("score_relevance", 0))
|
| 457 |
+
e = float(c.get("score_engagement", 0))
|
| 458 |
+
return 0.6*r + 0.4*e
|
| 459 |
+
except Exception:
|
| 460 |
+
return 0.0
|
| 461 |
+
cleaned_sorted = sorted(cleaned, key=score_of, reverse=True) if cleaned else cleaned
|
| 462 |
+
cleaned_sorted = cleaned_sorted[:max_stories]
|
| 463 |
+
print(f"Proposed {len(cleaned_sorted)} cuts (post-sort).")
|
| 464 |
+
return cleaned_sorted
|
| 465 |
+
|
| 466 |
+
|
| 467 |
+
def save_cuts_json(cuts: List[Dict[str, Any]], output_path: str) -> None:
|
| 468 |
+
with open(output_path, "w", encoding="utf-8") as f:
|
| 469 |
+
json.dump(cuts, f, ensure_ascii=False, indent=2)
|
| 470 |
+
print(f"Saved cuts JSON to {output_path}")
|
| 471 |
+
|
| 472 |
+
|
| 473 |
+
def generate_ffmpeg_script(
|
| 474 |
+
cuts: List[Dict[str, Any]], video_path: str, output_script_path: str, reencode: bool = False
|
| 475 |
+
) -> None:
|
| 476 |
+
"""Generate a shell script with ffmpeg commands to extract cuts."""
|
| 477 |
+
lines = [
|
| 478 |
+
"#!/bin/bash",
|
| 479 |
+
"# Generated ffmpeg cut script",
|
| 480 |
+
"set -euo pipefail",
|
| 481 |
+
"",
|
| 482 |
+
f"VIDEO=\"{video_path}\"",
|
| 483 |
+
"BASE=export_parts",
|
| 484 |
+
"mkdir -p \"$BASE\"",
|
| 485 |
+
"",
|
| 486 |
+
]
|
| 487 |
+
for i, cut in enumerate(cuts, 1):
|
| 488 |
+
label = cut.get("label", f"cut_{i}")
|
| 489 |
+
slug = re.sub(r"[^\w\-]+", "-", label.strip().lower()).strip("-") or f"cut-{i}"
|
| 490 |
+
out_file = f"{slug}.mp4"
|
| 491 |
+
parts_dir = f"$BASE/parts_{i:02d}"
|
| 492 |
+
lines.append(f"mkdir -p {parts_dir}")
|
| 493 |
+
segs = cut.get("segments") or ([{"start": cut.get("start"), "end": cut.get("end")}])
|
| 494 |
+
idx = 1
|
| 495 |
+
for seg in segs:
|
| 496 |
+
s = float(seg["start"]); e = float(seg["end"])
|
| 497 |
+
dur = max(0.05, e - s)
|
| 498 |
+
part_name = f"part_{i:02d}_{idx:02d}.mp4"
|
| 499 |
+
if reencode:
|
| 500 |
+
lines.append(
|
| 501 |
+
f"ffmpeg -y -ss {s:.3f} -i \"$VIDEO\" -t {dur:.3f} -vf scale=1080:-2 -c:v libx264 -preset veryfast -crf 22 -c:a aac -b:a 128k \"{parts_dir}/{part_name}\""
|
| 502 |
+
)
|
| 503 |
+
else:
|
| 504 |
+
lines.append(
|
| 505 |
+
f"ffmpeg -y -ss {s:.3f} -i \"$VIDEO\" -t {dur:.3f} -c copy \"{parts_dir}/{part_name}\""
|
| 506 |
+
)
|
| 507 |
+
idx += 1
|
| 508 |
+
list_file = f"{parts_dir}/list.txt"
|
| 509 |
+
lines.append(f"rm -f {list_file} && touch {list_file}")
|
| 510 |
+
lines.append(f"for f in {parts_dir}/part_{i:02d}_*.mp4; do echo \"file '$PWD/$f'\" >> {list_file}; done")
|
| 511 |
+
lines.append(
|
| 512 |
+
f"ffmpeg -y -f concat -safe 0 -i {list_file} -c copy \"{out_file}\""
|
| 513 |
+
)
|
| 514 |
+
lines.append("")
|
| 515 |
+
with open(output_script_path, "w", encoding="utf-8") as f:
|
| 516 |
+
f.write("\n".join(lines))
|
| 517 |
+
os.chmod(output_script_path, 0o755)
|
| 518 |
+
print(f"Generated ffmpeg script: {output_script_path}")
|
| 519 |
+
|
| 520 |
+
|
| 521 |
+
def generate_preview(cuts: List[Dict[str, Any]], video_path: str, base_name: str) -> Optional[str]:
|
| 522 |
+
"""Create a single low-res preview video that concatenates all cuts in order."""
|
| 523 |
+
if not cuts:
|
| 524 |
+
print("No cuts to preview (empty cuts list).", file=sys.stderr)
|
| 525 |
+
return None
|
| 526 |
+
try:
|
| 527 |
+
work_root = Path(f"{base_name}_preview_parts")
|
| 528 |
+
work_root.mkdir(parents=True, exist_ok=True)
|
| 529 |
+
cut_outputs = []
|
| 530 |
+
for i, c in enumerate(cuts, 1):
|
| 531 |
+
segs = c.get("segments") or ([{"start": c.get("start"), "end": c.get("end")}])
|
| 532 |
+
cut_dir = work_root / f"cut_{i:02d}"
|
| 533 |
+
cut_dir.mkdir(parents=True, exist_ok=True)
|
| 534 |
+
part_paths = []
|
| 535 |
+
for j, seg in enumerate(segs, 1):
|
| 536 |
+
s = float(seg["start"]); e = float(seg["end"])
|
| 537 |
+
dur = max(0.05, e - s)
|
| 538 |
+
part = cut_dir / f"part_{i:02d}_{j:02d}.mp4"
|
| 539 |
+
cmd = [
|
| 540 |
+
"ffmpeg", "-y",
|
| 541 |
+
"-ss", f"{s:.3f}", "-i", video_path,
|
| 542 |
+
"-t", f"{dur:.3f}",
|
| 543 |
+
"-vf", "scale=1280:-2",
|
| 544 |
+
"-c:v", "libx264", "-preset", "veryfast", "-crf", "28",
|
| 545 |
+
"-c:a", "aac", "-b:a", "96k",
|
| 546 |
+
str(part),
|
| 547 |
+
]
|
| 548 |
+
subprocess.run(cmd, check=True)
|
| 549 |
+
part_paths.append(part)
|
| 550 |
+
list_file = cut_dir / "concat_list.txt"
|
| 551 |
+
with list_file.open("w", encoding="utf-8") as f:
|
| 552 |
+
for p in part_paths:
|
| 553 |
+
f.write(f"file '{p.resolve()}'\n")
|
| 554 |
+
cut_out = work_root / f"cut_{i:02d}.mp4"
|
| 555 |
+
cmd2 = [
|
| 556 |
+
"ffmpeg", "-y",
|
| 557 |
+
"-f", "concat", "-safe", "0",
|
| 558 |
+
"-i", str(list_file),
|
| 559 |
+
"-c", "copy",
|
| 560 |
+
str(cut_out),
|
| 561 |
+
]
|
| 562 |
+
subprocess.run(cmd2, check=True)
|
| 563 |
+
cut_outputs.append(cut_out)
|
| 564 |
+
list_all = work_root / "all.txt"
|
| 565 |
+
with list_all.open("w", encoding="utf-8") as f:
|
| 566 |
+
for p in cut_outputs:
|
| 567 |
+
f.write(f"file '{p.resolve()}'\n")
|
| 568 |
+
out_path = f"PREVIEW_{base_name}.mp4"
|
| 569 |
+
cmd3 = [
|
| 570 |
+
"ffmpeg", "-y",
|
| 571 |
+
"-f", "concat", "-safe", "0",
|
| 572 |
+
"-i", str(list_all),
|
| 573 |
+
"-c", "copy",
|
| 574 |
+
out_path,
|
| 575 |
+
]
|
| 576 |
+
subprocess.run(cmd3, check=True)
|
| 577 |
+
print(f"Generated preview: {out_path}")
|
| 578 |
+
return out_path
|
| 579 |
+
except Exception as e:
|
| 580 |
+
print(f"Failed to generate preview: {e}", file=sys.stderr)
|
| 581 |
+
return None
|
| 582 |
+
|
| 583 |
+
|
| 584 |
+
def main():
|
| 585 |
+
parser = argparse.ArgumentParser(
|
| 586 |
+
description="Video cuts offline tool with audio transcription and Ollama integration."
|
| 587 |
+
)
|
| 588 |
+
parser.add_argument("video", help="Input video file path")
|
| 589 |
+
parser.add_argument("--lang", default="", help='Language code for transcription (empty for auto-detect)')
|
| 590 |
+
parser.add_argument("--audio-stream", type=int, default=-1, help="Audio stream index (-1 = mix all)")
|
| 591 |
+
parser.add_argument("--model", default="llama2", help="Ollama model for proposing cuts")
|
| 592 |
+
parser.add_argument("--whisper-model", default="small", help="Whisper model (tiny, base, small, medium, large)")
|
| 593 |
+
parser.add_argument("--only-transcribe", action="store_true", help="Only transcribe and save transcript.json")
|
| 594 |
+
parser.add_argument("--only-propose", action="store_true", help="Only propose cuts from existing transcript.json")
|
| 595 |
+
parser.add_argument("--reencode", action="store_true", help="Re-encode video cuts")
|
| 596 |
+
parser.add_argument("--max-stories", type=int, default=8, help="Maximum number of cuts")
|
| 597 |
+
parser.add_argument("--max-length", type=int, default=60, help="Max duration (seconds) per cut")
|
| 598 |
+
parser.add_argument("--preview", action="store_true", help="Generate preview MP4")
|
| 599 |
+
parser.add_argument("--persona", type=str, default="", help="Path to persona JSON/YAML (see persona.example.json)")
|
| 600 |
+
parser.add_argument("--react-mode", action="store_true", help="React mode: PT comments with EN lead-in")
|
| 601 |
+
parser.add_argument("--react-min", type=int, default=60, help="Min duration (s) per cut in react mode")
|
| 602 |
+
parser.add_argument("--react-max", type=int, default=180, help="Max duration (s) per cut in react mode")
|
| 603 |
+
parser.add_argument("--react-leadin", type=int, default=6, help="EN lead-in (s) before comment")
|
| 604 |
+
parser.add_argument("--react-include-en", choices=["yes","no"], default="yes", help="Include EN lead-in")
|
| 605 |
+
parser.add_argument("--react-gap", type=float, default=1.5, help="Gap tolerance (s) between PT segments")
|
| 606 |
+
parser.add_argument("--reuse-transcript", action="store_true", help="Reuse existing transcript.json")
|
| 607 |
+
args = parser.parse_args()
|
| 608 |
+
|
| 609 |
+
video_path = args.video
|
| 610 |
+
base_name = os.path.splitext(os.path.basename(video_path))[0]
|
| 611 |
+
audio_path = f"{base_name}_audio.wav"
|
| 612 |
+
transcript_path = f"{base_name}_transcript.json"
|
| 613 |
+
cuts_json_path = f"{base_name}_cuts.json"
|
| 614 |
+
cuts_script_path = f"{base_name}_cuts.sh"
|
| 615 |
+
|
| 616 |
+
transcript = None
|
| 617 |
+
if not args.only_propose:
|
| 618 |
+
if args.reuse_transcript and os.path.exists(transcript_path):
|
| 619 |
+
print(f"Reusing existing transcript from {transcript_path}")
|
| 620 |
+
with open(transcript_path, "r", encoding="utf-8") as f:
|
| 621 |
+
transcript = json.load(f)
|
| 622 |
+
else:
|
| 623 |
+
if not os.path.exists(audio_path):
|
| 624 |
+
extract_audio(video_path, audio_path, audio_stream=args.audio_stream)
|
| 625 |
+
transcript = transcribe_audio(audio_path, lang=args.lang, model_size=args.whisper_model)
|
| 626 |
+
with open(transcript_path, "w", encoding="utf-8") as f:
|
| 627 |
+
json.dump(transcript, f, ensure_ascii=False, indent=2)
|
| 628 |
+
print(f"Saved transcript to {transcript_path}")
|
| 629 |
+
if args.only_transcribe:
|
| 630 |
+
print("Only transcribe requested, exiting.")
|
| 631 |
+
sys.exit(0)
|
| 632 |
+
else:
|
| 633 |
+
if not os.path.exists(transcript_path):
|
| 634 |
+
print(f"Transcript file {transcript_path} not found.", file=sys.stderr)
|
| 635 |
+
sys.exit(1)
|
| 636 |
+
with open(transcript_path, "r", encoding="utf-8") as f:
|
| 637 |
+
transcript = json.load(f)
|
| 638 |
+
|
| 639 |
+
if args.react_mode:
|
| 640 |
+
cuts = build_react_cuts(
|
| 641 |
+
transcript,
|
| 642 |
+
min_s=args.react_min,
|
| 643 |
+
max_s=args.react_max,
|
| 644 |
+
leadin_s=args.react_leadin,
|
| 645 |
+
include_en=(args.react_include_en == "yes"),
|
| 646 |
+
gap_s=args.react_gap,
|
| 647 |
+
)
|
| 648 |
+
if not cuts:
|
| 649 |
+
print("No PT blocks long enough for react-mode.")
|
| 650 |
+
sys.exit(0)
|
| 651 |
+
save_cuts_json(cuts, cuts_json_path)
|
| 652 |
+
generate_ffmpeg_script(cuts, video_path, cuts_script_path, reencode=args.reencode)
|
| 653 |
+
if args.preview:
|
| 654 |
+
generate_preview(cuts, video_path, base_name)
|
| 655 |
+
print("Done (react-mode).")
|
| 656 |
+
sys.exit(0)
|
| 657 |
+
|
| 658 |
+
persona_text = load_persona_text(args.persona)
|
| 659 |
+
cuts = propose_cuts(transcript, args.model, args.max_stories, args.max_length, persona_text)
|
| 660 |
+
save_cuts_json(cuts, cuts_json_path)
|
| 661 |
+
generate_ffmpeg_script(cuts, video_path, cuts_script_path, reencode=args.reencode)
|
| 662 |
+
if args.preview:
|
| 663 |
+
generate_preview(cuts, video_path, base_name)
|
| 664 |
+
print("Done.")
|
| 665 |
+
|
| 666 |
+
|
| 667 |
+
if __name__ == "__main__":
|
| 668 |
+
main()
|