Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,16 @@ from dataclasses import dataclass
|
|
| 5 |
from typing import List, Tuple, Optional
|
| 6 |
import gradio as gr
|
| 7 |
|
| 8 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
USE_LLM_DEFAULT = True
|
| 10 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
|
| 11 |
|
|
@@ -23,11 +32,10 @@ except Exception:
|
|
| 23 |
LLM = None
|
| 24 |
LLM_AVAILABLE = False
|
| 25 |
|
| 26 |
-
# Config
|
| 27 |
-
FPS = 24
|
| 28 |
-
OUTPUT_DIR = "./Output"
|
| 29 |
-
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 30 |
|
|
|
|
|
|
|
|
|
|
| 31 |
@dataclass
|
| 32 |
class Segment:
|
| 33 |
start_tc: str
|
|
@@ -37,272 +45,452 @@ class Segment:
|
|
| 37 |
text: str
|
| 38 |
score: float
|
| 39 |
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
def parse_timecode_to_frames(tc: str, fps: int = FPS) -> int:
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
raise ValueError(f"Timecode inválido: {tc}")
|
| 46 |
-
hh, mm, ss, ff = map(int, m.groups())
|
| 47 |
-
return hh*3600*fps + mm*60*fps + ss*fps + ff
|
| 48 |
|
| 49 |
def frames_to_timecode(frames: int, fps: int = FPS) -> str:
|
| 50 |
-
hh = frames // (3600*fps)
|
| 51 |
-
rem = frames % (3600*fps)
|
| 52 |
-
mm = rem // (60*fps)
|
| 53 |
-
rem = rem % (60*fps)
|
| 54 |
ss = rem // fps
|
| 55 |
ff = rem % fps
|
| 56 |
return f"{hh:02d}:{mm:02d}:{ss:02d}:{ff:02d}"
|
| 57 |
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
| 59 |
def parse_transcript(txt: str) -> List[Segment]:
|
| 60 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
if not txt or not txt.strip():
|
| 62 |
-
print("⚠️ Transcrição vazia")
|
| 63 |
return []
|
| 64 |
-
|
| 65 |
-
lines =
|
| 66 |
-
results = []
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
r'
|
| 71 |
-
re.IGNORECASE
|
| 72 |
)
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
continue
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
if
|
| 83 |
-
start_tc, end_tc,
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
if
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
try:
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
if end_f > start_f:
|
| 94 |
results.append(Segment(
|
| 95 |
-
start_tc=
|
| 96 |
-
end_tc=
|
| 97 |
-
start_f=
|
| 98 |
-
end_f=
|
| 99 |
-
text=text,
|
| 100 |
score=0.0
|
| 101 |
))
|
| 102 |
-
except Exception
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
continue
|
| 105 |
-
|
| 106 |
-
|
|
|
|
| 107 |
return results
|
| 108 |
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
| 110 |
def parse_manual_timecodes(manual_input: str) -> List[Tuple[str, str]]:
|
| 111 |
if not manual_input or not manual_input.strip():
|
| 112 |
return []
|
| 113 |
-
|
| 114 |
manual_ranges = []
|
| 115 |
lines = manual_input.replace(",", "\n").splitlines()
|
| 116 |
-
|
| 117 |
-
pattern = re.compile(r'(\d{2}:\d{2}:\d{2}[:;]\d{2})\s*[-–—]\s*(\d{2}:\d{2}:\d{2}[:;]\d{2})')
|
| 118 |
-
|
| 119 |
for line in lines:
|
| 120 |
m = pattern.search(line.strip())
|
| 121 |
if m:
|
| 122 |
manual_ranges.append((m.group(1), m.group(2)))
|
| 123 |
-
|
| 124 |
return manual_ranges
|
| 125 |
|
| 126 |
-
# ============ AI HELPERS ============
|
| 127 |
-
def extract_duration_minutes(text: str) -> Optional[float]:
|
| 128 |
-
"""Extrai duração em minutos"""
|
| 129 |
-
text_lower = text.lower()
|
| 130 |
-
|
| 131 |
-
patterns = [
|
| 132 |
-
r'(\d+)\s*minutos?',
|
| 133 |
-
r'(\d+)\s*min\b',
|
| 134 |
-
r'(\d+)m\b',
|
| 135 |
-
r'corte\s+de\s+(\d+)'
|
| 136 |
-
]
|
| 137 |
-
|
| 138 |
-
for pattern in patterns:
|
| 139 |
-
match = re.search(pattern, text_lower)
|
| 140 |
-
if match:
|
| 141 |
-
return float(match.group(1))
|
| 142 |
-
|
| 143 |
-
return None
|
| 144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
def find_keyword_in_segments(segs: List[Segment], keywords: List[str]) -> int:
|
| 146 |
-
|
| 147 |
-
if not keywords:
|
| 148 |
return 0
|
| 149 |
-
|
| 150 |
-
best_idx = 0
|
| 151 |
-
best_score = 0
|
| 152 |
-
|
| 153 |
for idx, seg in enumerate(segs):
|
| 154 |
text_lower = seg.text.lower()
|
| 155 |
score = sum(1 for kw in keywords if kw.lower() in text_lower)
|
| 156 |
-
|
| 157 |
if score > best_score:
|
| 158 |
-
best_score = score
|
| 159 |
-
best_idx = idx
|
| 160 |
-
|
| 161 |
return best_idx
|
| 162 |
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
start_seg = segs[start_idx]
|
| 169 |
-
start_frame = start_seg.start_f
|
| 170 |
-
duration_frames = int(duration_min * 60 * FPS)
|
| 171 |
-
end_frame = start_frame + duration_frames
|
| 172 |
-
|
| 173 |
-
# Pega texto dos primeiros segmentos
|
| 174 |
text_parts = []
|
| 175 |
-
for seg in
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
return Segment(
|
| 181 |
start_tc=frames_to_timecode(start_frame),
|
| 182 |
end_tc=frames_to_timecode(end_frame),
|
| 183 |
start_f=start_frame,
|
| 184 |
end_f=end_frame,
|
| 185 |
-
text=
|
| 186 |
score=100.0
|
| 187 |
)
|
| 188 |
|
| 189 |
-
def process_with_ai(segs: List[Segment], instructions: str) -> List[Segment]:
|
| 190 |
-
"""Processa com IA"""
|
| 191 |
-
|
| 192 |
-
# Extrai duração
|
| 193 |
-
duration = extract_duration_minutes(instructions)
|
| 194 |
-
|
| 195 |
-
# Identifica palavras-chave importantes
|
| 196 |
-
keywords = []
|
| 197 |
-
text_lower = instructions.lower()
|
| 198 |
-
|
| 199 |
-
keyword_map = {
|
| 200 |
-
'tenista': ['tenista', 'tênis', 'tenis', 'jogador', 'kinguios'],
|
| 201 |
-
'maria': ['maria', 'josé', 'jose', 'casal', 'seguro'],
|
| 202 |
-
'protocolo': ['protocolo', 'rodar', 'dependência', 'dependencia'],
|
| 203 |
-
}
|
| 204 |
-
|
| 205 |
-
for key, terms in keyword_map.items():
|
| 206 |
-
if any(term in text_lower for term in terms):
|
| 207 |
-
keywords.extend(terms)
|
| 208 |
-
|
| 209 |
-
print(f"📊 Duração: {duration}min | Keywords: {keywords[:3]}")
|
| 210 |
-
|
| 211 |
-
# Encontra ponto de início
|
| 212 |
-
start_idx = 0
|
| 213 |
-
|
| 214 |
-
if LLM_AVAILABLE and keywords:
|
| 215 |
-
try:
|
| 216 |
-
# Cria preview dos segmentos
|
| 217 |
-
preview = []
|
| 218 |
-
for i, s in enumerate(segs[:100]):
|
| 219 |
-
preview.append(f"{i}|{s.start_tc}|{s.text[:60]}")
|
| 220 |
-
|
| 221 |
-
preview_text = "\n".join(preview[:80])
|
| 222 |
-
|
| 223 |
-
prompt = f"""Encontre o índice onde começa o assunto solicitado.
|
| 224 |
-
|
| 225 |
-
BUSCAR: {' '.join(keywords[:3])}
|
| 226 |
-
|
| 227 |
-
SEGMENTOS (formato: índice|timecode|texto):
|
| 228 |
-
{preview_text}
|
| 229 |
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
return [result]
|
| 259 |
else:
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
#
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
|
|
|
|
|
|
|
|
|
| 275 |
try:
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
pass
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
if not segs:
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
for s in segs:
|
| 303 |
-
score = 0
|
| 304 |
-
text = s.text.lower()
|
| 305 |
-
|
| 306 |
if "medo" in text or "coragem" in text:
|
| 307 |
score += weight_emotion
|
| 308 |
if "nunca" in text or "de repente" in text:
|
|
@@ -311,18 +499,21 @@ def select_segments(transcript_txt: str, use_llm: bool, num_segments: int,
|
|
| 311 |
score += weight_learn
|
| 312 |
if "segredo" in text or "verdade" in text:
|
| 313 |
score += weight_viral
|
| 314 |
-
|
| 315 |
if custom_keywords:
|
| 316 |
for kw in custom_keywords.split(","):
|
| 317 |
if kw.strip().lower() in text:
|
| 318 |
score += 3.0
|
| 319 |
-
|
| 320 |
s.score = score
|
| 321 |
-
|
| 322 |
segs.sort(key=lambda x: x.score, reverse=True)
|
| 323 |
return segs[:num_segments]
|
| 324 |
|
| 325 |
-
|
|
|
|
|
|
|
|
|
|
| 326 |
def deep_copy_element(elem: ET.Element) -> ET.Element:
|
| 327 |
new = ET.Element(elem.tag, attrib=dict(elem.attrib))
|
| 328 |
new.text = elem.text
|
|
@@ -331,42 +522,42 @@ def deep_copy_element(elem: ET.Element) -> ET.Element:
|
|
| 331 |
new.append(deep_copy_element(child))
|
| 332 |
return new
|
| 333 |
|
|
|
|
| 334 |
def edit_xml(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
|
| 335 |
root = tree.getroot()
|
| 336 |
seq = root.find(".//sequence")
|
| 337 |
-
|
| 338 |
if seq is None:
|
| 339 |
raise ValueError("Sequence não encontrada no XML")
|
| 340 |
-
|
| 341 |
v_track = seq.find(".//media/video/track")
|
| 342 |
a_track = seq.find(".//media/audio/track")
|
| 343 |
-
|
| 344 |
if not v_track or not a_track:
|
| 345 |
raise ValueError("Trilhas de vídeo/áudio não encontradas")
|
| 346 |
-
|
| 347 |
v_template = v_track.find("./clipitem")
|
| 348 |
a_template = a_track.find("./clipitem")
|
| 349 |
-
|
| 350 |
# Limpa clips existentes
|
| 351 |
for clip in list(v_track.findall("./clipitem")):
|
| 352 |
v_track.remove(clip)
|
| 353 |
for clip in list(a_track.findall("./clipitem")):
|
| 354 |
a_track.remove(clip)
|
| 355 |
-
|
| 356 |
# Adiciona novos clips
|
| 357 |
timeline_pos = 0
|
| 358 |
-
|
| 359 |
for i, seg in enumerate(segs, 1):
|
| 360 |
duration = seg.end_f - seg.start_f
|
| 361 |
-
|
| 362 |
-
|
|
|
|
|
|
|
| 363 |
v_clip = ET.Element("clipitem", {"id": f"clip-v{i}"})
|
| 364 |
ET.SubElement(v_clip, "name").text = f"Clip {i}"
|
| 365 |
ET.SubElement(v_clip, "start").text = str(timeline_pos)
|
| 366 |
ET.SubElement(v_clip, "end").text = str(timeline_pos + duration)
|
| 367 |
ET.SubElement(v_clip, "in").text = str(seg.start_f)
|
| 368 |
ET.SubElement(v_clip, "out").text = str(seg.end_f)
|
| 369 |
-
|
| 370 |
if v_template is not None:
|
| 371 |
rate = v_template.find("rate")
|
| 372 |
if rate is not None:
|
|
@@ -374,15 +565,15 @@ def edit_xml(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
|
|
| 374 |
file_elem = v_template.find("file")
|
| 375 |
if file_elem is not None:
|
| 376 |
v_clip.append(deep_copy_element(file_elem))
|
| 377 |
-
|
| 378 |
-
# Áudio
|
| 379 |
a_clip = ET.Element("clipitem", {"id": f"clip-a{i}"})
|
| 380 |
ET.SubElement(a_clip, "name").text = f"Clip {i}"
|
| 381 |
ET.SubElement(a_clip, "start").text = str(timeline_pos)
|
| 382 |
ET.SubElement(a_clip, "end").text = str(timeline_pos + duration)
|
| 383 |
ET.SubElement(a_clip, "in").text = str(seg.start_f)
|
| 384 |
ET.SubElement(a_clip, "out").text = str(seg.end_f)
|
| 385 |
-
|
| 386 |
if a_template is not None:
|
| 387 |
rate = a_template.find("rate")
|
| 388 |
if rate is not None:
|
|
@@ -390,128 +581,180 @@ def edit_xml(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
|
|
| 390 |
file_elem = a_template.find("file")
|
| 391 |
if file_elem is not None:
|
| 392 |
a_clip.append(deep_copy_element(file_elem))
|
| 393 |
-
|
| 394 |
v_track.append(v_clip)
|
| 395 |
a_track.append(a_clip)
|
| 396 |
-
|
| 397 |
timeline_pos += duration
|
| 398 |
-
|
| 399 |
return tree
|
| 400 |
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
if not xml_file:
|
| 407 |
-
return "
|
| 408 |
-
|
| 409 |
try:
|
| 410 |
-
# Lê transcrição se necessário
|
| 411 |
transcript = ""
|
| 412 |
manual = parse_manual_timecodes(manual_timecodes)
|
| 413 |
-
|
| 414 |
-
if not manual:
|
| 415 |
-
|
| 416 |
-
return "❌ Envie a transcrição (.txt)", None, f"LLM: {LLM_AVAILABLE}"
|
| 417 |
-
|
| 418 |
-
with open(txt_file.name, "r", encoding="utf-8") as f:
|
| 419 |
transcript = f.read()
|
| 420 |
-
|
| 421 |
# Seleciona segmentos
|
| 422 |
segments = select_segments(
|
| 423 |
transcript, use_llm and LLM_AVAILABLE, num_segments,
|
| 424 |
custom_keywords, manual_timecodes, natural_instructions,
|
| 425 |
weight_emotion, weight_break, weight_learn, weight_viral
|
| 426 |
)
|
| 427 |
-
|
| 428 |
if not segments:
|
| 429 |
-
return "
|
| 430 |
-
|
| 431 |
# Edita XML
|
| 432 |
tree = ET.parse(xml_file.name)
|
| 433 |
tree = edit_xml(tree, segments)
|
| 434 |
-
|
| 435 |
# Salva
|
| 436 |
basename = os.path.splitext(os.path.basename(xml_file.name))[0]
|
| 437 |
output = os.path.join(OUTPUT_DIR, f"{basename}_EDITADO.xml")
|
| 438 |
tree.write(output, encoding="utf-8", xml_declaration=True)
|
| 439 |
-
|
| 440 |
# Resumo
|
| 441 |
total_sec = sum((s.end_f - s.start_f) / FPS for s in segments)
|
| 442 |
-
total_min = total_sec / 60
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
summary = f"✅ {len(segments)} corte(s) | {total_min:.1f} min total | Modo: {mode}\n\n"
|
| 447 |
-
|
| 448 |
for i, seg in enumerate(segments, 1):
|
| 449 |
dur_sec = (seg.end_f - seg.start_f) / FPS
|
| 450 |
-
|
| 451 |
if seg.text and len(seg.text) > 50:
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
return summary, output, status
|
| 458 |
-
|
| 459 |
except Exception as e:
|
| 460 |
import traceback
|
| 461 |
traceback.print_exc()
|
| 462 |
-
return f"
|
|
|
|
| 463 |
|
| 464 |
-
#
|
|
|
|
|
|
|
| 465 |
with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere") as demo:
|
| 466 |
-
gr.Markdown("#
|
| 467 |
-
gr.Markdown("Cortes
|
| 468 |
-
|
| 469 |
with gr.Row():
|
| 470 |
-
xml_in = gr.File(label="
|
| 471 |
-
txt_in = gr.File(label="
|
| 472 |
-
|
| 473 |
with gr.Row():
|
| 474 |
-
use_llm = gr.Checkbox(label="
|
| 475 |
-
num_segments = gr.Slider(2, 20, 5, 1, label="Segmentos (automático)")
|
| 476 |
-
|
| 477 |
-
with gr.Accordion("
|
| 478 |
gr.Markdown("""
|
| 479 |
-
|
| 480 |
-
-
|
| 481 |
-
-
|
| 482 |
-
-
|
|
|
|
| 483 |
""")
|
| 484 |
natural_instructions = gr.Textbox(
|
| 485 |
-
label="
|
| 486 |
-
placeholder='Ex: "
|
| 487 |
lines=2
|
| 488 |
)
|
| 489 |
-
|
| 490 |
-
with gr.Accordion("
|
| 491 |
manual_timecodes = gr.Textbox(
|
| 492 |
label="Timecodes (um por linha)",
|
| 493 |
placeholder="00:21:18:09 - 00:31:18:09",
|
| 494 |
lines=3
|
| 495 |
)
|
| 496 |
-
|
| 497 |
-
with gr.Accordion("
|
| 498 |
-
custom_keywords = gr.Textbox(label="Palavras-chave")
|
| 499 |
with gr.Row():
|
| 500 |
-
weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="
|
| 501 |
-
weight_break = gr.Slider(0, 5, 1.5, 0.1, label="
|
| 502 |
with gr.Row():
|
| 503 |
-
weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="
|
| 504 |
-
weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="
|
| 505 |
-
|
| 506 |
-
btn = gr.Button("
|
| 507 |
-
|
| 508 |
with gr.Row():
|
| 509 |
with gr.Column(scale=2):
|
| 510 |
-
summary_out = gr.Textbox(label="
|
| 511 |
with gr.Column(scale=1):
|
| 512 |
status_out = gr.Textbox(label="Status")
|
| 513 |
-
file_out = gr.File(label="
|
| 514 |
-
|
| 515 |
btn.click(
|
| 516 |
process_files,
|
| 517 |
[xml_in, txt_in, use_llm, num_segments, custom_keywords,
|
|
@@ -521,4 +764,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere") as demo:
|
|
| 521 |
)
|
| 522 |
|
| 523 |
if __name__ == "__main__":
|
| 524 |
-
demo.launch()
|
|
|
|
| 5 |
from typing import List, Tuple, Optional
|
| 6 |
import gradio as gr
|
| 7 |
|
| 8 |
+
# =========================
|
| 9 |
+
# Configurações Gerais
|
| 10 |
+
# =========================
|
| 11 |
+
FPS = 24
|
| 12 |
+
OUTPUT_DIR = "./Output"
|
| 13 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 14 |
+
|
| 15 |
+
# =========================
|
| 16 |
+
# LLM (opcional - Gemini)
|
| 17 |
+
# =========================
|
| 18 |
USE_LLM_DEFAULT = True
|
| 19 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "").strip()
|
| 20 |
|
|
|
|
| 32 |
LLM = None
|
| 33 |
LLM_AVAILABLE = False
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
# =========================
|
| 37 |
+
# Modelos
|
| 38 |
+
# =========================
|
| 39 |
@dataclass
|
| 40 |
class Segment:
|
| 41 |
start_tc: str
|
|
|
|
| 45 |
text: str
|
| 46 |
score: float
|
| 47 |
|
| 48 |
+
|
| 49 |
+
# =========================
|
| 50 |
+
# Funções de Timecode
|
| 51 |
+
# =========================
|
| 52 |
+
def _tc_to_hmsf(tc: str, fps: int = FPS) -> Tuple[int, int, int, int]:
|
| 53 |
+
"""
|
| 54 |
+
Converte timecode para (hh, mm, ss, ff). Aceita:
|
| 55 |
+
- HH:MM:SS:FF ou HH:MM:SS;FF
|
| 56 |
+
- HH:MM:SS[.,]mmm (milissegundos)
|
| 57 |
+
- H:MM:SS (sem frames)
|
| 58 |
+
"""
|
| 59 |
+
s = tc.strip()
|
| 60 |
+
|
| 61 |
+
# HH:MM:SS:FF ou HH:MM:SS;FF
|
| 62 |
+
m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})[:;](\d{2})$', s)
|
| 63 |
+
if m:
|
| 64 |
+
hh, mm, ss, ff = map(int, m.groups())
|
| 65 |
+
return hh, mm, ss, ff
|
| 66 |
+
|
| 67 |
+
# HH:MM:SS[.,]mmm
|
| 68 |
+
m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})[.,](\d{1,3})$', s)
|
| 69 |
+
if m:
|
| 70 |
+
hh, mm, ss, ms = map(int, m.groups())
|
| 71 |
+
ff = int(round((ms / 1000.0) * fps))
|
| 72 |
+
if ff >= fps:
|
| 73 |
+
ss += 1
|
| 74 |
+
ff = 0
|
| 75 |
+
return hh, mm, ss, ff
|
| 76 |
+
|
| 77 |
+
# H:MM:SS
|
| 78 |
+
m = re.match(r'^(\d{1,2}):(\d{2}):(\d{2})$', s)
|
| 79 |
+
if m:
|
| 80 |
+
hh, mm, ss = map(int, m.groups())
|
| 81 |
+
return hh, mm, ss, 0
|
| 82 |
+
|
| 83 |
+
raise ValueError(f"Timecode inválido: {tc}")
|
| 84 |
+
|
| 85 |
+
|
| 86 |
def parse_timecode_to_frames(tc: str, fps: int = FPS) -> int:
|
| 87 |
+
hh, mm, ss, ff = _tc_to_hmsf(tc, fps)
|
| 88 |
+
return hh * 3600 * fps + mm * 60 * fps + ss * fps + ff
|
| 89 |
+
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
def frames_to_timecode(frames: int, fps: int = FPS) -> str:
|
| 92 |
+
hh = frames // (3600 * fps)
|
| 93 |
+
rem = frames % (3600 * fps)
|
| 94 |
+
mm = rem // (60 * fps)
|
| 95 |
+
rem = rem % (60 * fps)
|
| 96 |
ss = rem // fps
|
| 97 |
ff = rem % fps
|
| 98 |
return f"{hh:02d}:{mm:02d}:{ss:02d}:{ff:02d}"
|
| 99 |
|
| 100 |
+
|
| 101 |
+
# =========================
|
| 102 |
+
# Parser de Transcrição
|
| 103 |
+
# =========================
|
| 104 |
def parse_transcript(txt: str) -> List[Segment]:
|
| 105 |
+
"""
|
| 106 |
+
Aceita múltiplos formatos:
|
| 107 |
+
A) Uma linha: 00:00:00:00 - 00:00:10:00 Texto...
|
| 108 |
+
B) Duas linhas: 00:00:00:00 - 00:00:10:00 \n Texto...
|
| 109 |
+
C) SRT/VTT com setas:
|
| 110 |
+
1
|
| 111 |
+
00:00:05,120 --> 00:00:08,300
|
| 112 |
+
Texto linha 1
|
| 113 |
+
Texto linha 2
|
| 114 |
+
[linha em branco]
|
| 115 |
+
"""
|
| 116 |
if not txt or not txt.strip():
|
|
|
|
| 117 |
return []
|
| 118 |
+
|
| 119 |
+
lines = [l.rstrip() for l in txt.splitlines()]
|
| 120 |
+
results: List[Segment] = []
|
| 121 |
+
|
| 122 |
+
line_range = re.compile(
|
| 123 |
+
r'^\s*\[?\s*(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*[-—–]\s*'
|
| 124 |
+
r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*\]?\s*(.*)$'
|
|
|
|
| 125 |
)
|
| 126 |
+
arrow = re.compile(
|
| 127 |
+
r'(\d{1,2}:\d{2}:\d{2}(?:[.,]\d{1,3}|[:;]\d{2})?)\s*-->\s*'
|
| 128 |
+
r'(\d{1,2}:\d{2}:\d{2}(?:[.,]\d{1,3}|[:;]\d{2})?)'
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
i = 0
|
| 132 |
+
while i < len(lines):
|
| 133 |
+
raw = lines[i].strip()
|
| 134 |
+
if not raw or raw.lower() == "desconhecido":
|
| 135 |
+
i += 1
|
| 136 |
continue
|
| 137 |
+
|
| 138 |
+
# Casos A e B (com traço)
|
| 139 |
+
m = line_range.match(raw)
|
| 140 |
+
if m:
|
| 141 |
+
start_tc, end_tc, trailing_text = m.groups()
|
| 142 |
+
text_parts = []
|
| 143 |
+
|
| 144 |
+
if trailing_text.strip():
|
| 145 |
+
text_parts.append(trailing_text.strip())
|
| 146 |
+
else:
|
| 147 |
+
# Texto nas linhas seguintes até linha em branco ou novo bloco
|
| 148 |
+
j = i + 1
|
| 149 |
+
while j < len(lines):
|
| 150 |
+
nxt = lines[j].strip()
|
| 151 |
+
if not nxt:
|
| 152 |
+
break
|
| 153 |
+
if line_range.match(nxt):
|
| 154 |
+
break
|
| 155 |
+
if re.match(r'^\d+\s*$', nxt): # índice SRT
|
| 156 |
+
break
|
| 157 |
+
if arrow.search(nxt): # linha SRT com -->
|
| 158 |
+
break
|
| 159 |
+
text_parts.append(nxt)
|
| 160 |
+
j += 1
|
| 161 |
+
i = j - 1
|
| 162 |
+
|
| 163 |
+
text = " ".join(text_parts).strip()
|
| 164 |
try:
|
| 165 |
+
sf = parse_timecode_to_frames(start_tc)
|
| 166 |
+
ef = parse_timecode_to_frames(end_tc)
|
| 167 |
+
if ef > sf:
|
|
|
|
| 168 |
results.append(Segment(
|
| 169 |
+
start_tc=frames_to_timecode(sf),
|
| 170 |
+
end_tc=frames_to_timecode(ef),
|
| 171 |
+
start_f=sf,
|
| 172 |
+
end_f=ef,
|
| 173 |
+
text=text if text else f"{start_tc} - {end_tc}",
|
| 174 |
score=0.0
|
| 175 |
))
|
| 176 |
+
except Exception:
|
| 177 |
+
pass
|
| 178 |
+
i += 1
|
| 179 |
+
continue
|
| 180 |
+
|
| 181 |
+
# Caso C (SRT/VTT com -->)
|
| 182 |
+
if arrow.search(raw) or (i + 1 < len(lines) and arrow.search(lines[i + 1])):
|
| 183 |
+
# Se a linha atual não tem arrow, tente a próxima (muitos SRTs têm um índice numérico antes)
|
| 184 |
+
line_with_tc = raw if arrow.search(raw) else lines[i + 1]
|
| 185 |
+
mm = arrow.search(line_with_tc)
|
| 186 |
+
if mm:
|
| 187 |
+
start_tc, end_tc = mm.groups()
|
| 188 |
+
j = i + 1 if line_with_tc == raw else i + 2
|
| 189 |
+
text_parts = []
|
| 190 |
+
while j < len(lines):
|
| 191 |
+
nxt = lines[j].strip()
|
| 192 |
+
if not nxt:
|
| 193 |
+
break
|
| 194 |
+
# próximo bloco: índice seguido de timecode
|
| 195 |
+
if re.match(r'^\d+\s*$', nxt) and (j + 1 < len(lines) and arrow.search(lines[j + 1])):
|
| 196 |
+
break
|
| 197 |
+
if arrow.search(nxt):
|
| 198 |
+
break
|
| 199 |
+
text_parts.append(nxt)
|
| 200 |
+
j += 1
|
| 201 |
+
|
| 202 |
+
text = " ".join(text_parts).strip()
|
| 203 |
+
try:
|
| 204 |
+
sf = parse_timecode_to_frames(start_tc)
|
| 205 |
+
ef = parse_timecode_to_frames(end_tc)
|
| 206 |
+
if ef > sf:
|
| 207 |
+
results.append(Segment(
|
| 208 |
+
start_tc=frames_to_timecode(sf),
|
| 209 |
+
end_tc=frames_to_timecode(ef),
|
| 210 |
+
start_f=sf,
|
| 211 |
+
end_f=ef,
|
| 212 |
+
text=text,
|
| 213 |
+
score=0.0
|
| 214 |
+
))
|
| 215 |
+
except Exception:
|
| 216 |
+
pass
|
| 217 |
+
|
| 218 |
+
# Avança o ponteiro para depois do bloco
|
| 219 |
+
i = j + 1
|
| 220 |
continue
|
| 221 |
+
|
| 222 |
+
i += 1
|
| 223 |
+
|
| 224 |
return results
|
| 225 |
|
| 226 |
+
|
| 227 |
+
# =========================
|
| 228 |
+
# Minutagens Manuais
|
| 229 |
+
# =========================
|
| 230 |
def parse_manual_timecodes(manual_input: str) -> List[Tuple[str, str]]:
|
| 231 |
if not manual_input or not manual_input.strip():
|
| 232 |
return []
|
| 233 |
+
|
| 234 |
manual_ranges = []
|
| 235 |
lines = manual_input.replace(",", "\n").splitlines()
|
| 236 |
+
pattern = re.compile(r'(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)\s*[-–—]\s*(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)')
|
|
|
|
|
|
|
| 237 |
for line in lines:
|
| 238 |
m = pattern.search(line.strip())
|
| 239 |
if m:
|
| 240 |
manual_ranges.append((m.group(1), m.group(2)))
|
|
|
|
| 241 |
return manual_ranges
|
| 242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
+
# =========================
|
| 245 |
+
# Interpretação do Comando (NLP simples)
|
| 246 |
+
# =========================
|
| 247 |
+
@dataclass
|
| 248 |
+
class CommandSpec:
|
| 249 |
+
total_segments: int # quantidade de cortes
|
| 250 |
+
per_segment_seconds: Optional[int] # duração por corte (segundos), se especificada
|
| 251 |
+
total_minutes: Optional[float] # duração total (minutos), alternativa ao per_segment_seconds
|
| 252 |
+
start_timecode: Optional[str] # início explícito
|
| 253 |
+
keywords: List[str] # termos para achar o começo
|
| 254 |
+
use_best_moments: bool # flag para "melhores momentos"
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
def parse_natural_command(text: str) -> CommandSpec:
|
| 258 |
+
"""
|
| 259 |
+
Extrai:
|
| 260 |
+
- quantidade de cortes: "3 cortes", "crie 2"
|
| 261 |
+
- duração por corte: "cortes de 30s", "clipes de 1min", "1 minuto"
|
| 262 |
+
- duração total: "corte de 10 minutos", "15min", "faça 5 minutos"
|
| 263 |
+
- timecode de início: "começando em 00:02:10:00" ou "a partir de 00:02:10,500"
|
| 264 |
+
- palavras-chave: "sobre X", "da parte do X", "tema X", "palavra X"
|
| 265 |
+
- melhores momentos: presença de "melhores momentos"
|
| 266 |
+
Regras:
|
| 267 |
+
- se per_segment_seconds e total_minutes vierem juntos, prioriza per_segment_seconds (mais específico)
|
| 268 |
+
- caso apenas total_minutes: cria 1 corte dessa duração (ou divide pelos 'total_segments' se quantidade também vier)
|
| 269 |
+
"""
|
| 270 |
+
s = text.strip().lower()
|
| 271 |
+
|
| 272 |
+
# quantidade de cortes
|
| 273 |
+
count = 1
|
| 274 |
+
m = re.search(r'(\d+)\s*(?:cortes?|clipes?)\b', s)
|
| 275 |
+
if m:
|
| 276 |
+
count = max(1, int(m.group(1)))
|
| 277 |
+
else:
|
| 278 |
+
m = re.search(r'\bcrie\s+(\d+)\b', s)
|
| 279 |
+
if m:
|
| 280 |
+
count = max(1, int(m.group(1)))
|
| 281 |
+
|
| 282 |
+
# duração por corte (segundos)
|
| 283 |
+
per_seg_sec = None
|
| 284 |
+
m = re.search(r'(\d+)\s*(?:segundos?|s)\b', s)
|
| 285 |
+
if m:
|
| 286 |
+
per_seg_sec = int(m.group(1))
|
| 287 |
+
else:
|
| 288 |
+
# "de 30s", "30 s", etc.
|
| 289 |
+
m = re.search(r'de\s+(\d+)\s*s\b', s)
|
| 290 |
+
if m:
|
| 291 |
+
per_seg_sec = int(m.group(1))
|
| 292 |
+
|
| 293 |
+
# duração por corte em minutos -> segundos
|
| 294 |
+
if per_seg_sec is None:
|
| 295 |
+
m = re.search(r'(\d+)\s*(?:minutos?|min)\b', s)
|
| 296 |
+
if m:
|
| 297 |
+
per_seg_sec = int(m.group(1)) * 60
|
| 298 |
+
else:
|
| 299 |
+
# "de 1min"
|
| 300 |
+
m = re.search(r'de\s+(\d+)\s*min\b', s)
|
| 301 |
+
if m:
|
| 302 |
+
per_seg_sec = int(m.group(1)) * 60
|
| 303 |
+
|
| 304 |
+
# duração total (minutos)
|
| 305 |
+
total_min = None
|
| 306 |
+
# expressões como "corte de 10 minutos", "faça 5 minutos", "crie 15min"
|
| 307 |
+
m = re.search(r'\b(?:corte|faça|faca|crie|criar|gerar|make|montar)\b.*?(\d+)\s*(?:minutos?|min)\b', s)
|
| 308 |
+
if m:
|
| 309 |
+
total_min = float(m.group(1))
|
| 310 |
+
else:
|
| 311 |
+
m = re.search(r'\b(\d+)\s*(?:minutos?|min)\b', s)
|
| 312 |
+
if m:
|
| 313 |
+
total_min = float(m.group(1))
|
| 314 |
+
|
| 315 |
+
# timecode de início explícito
|
| 316 |
+
m = re.search(r'(?:começando|comecando|a partir de|starting at|start at)\s*(\d{1,2}:\d{2}:\d{2}(?:[:;]\d{2}|[.,]\d{1,3})?)', s)
|
| 317 |
+
start_tc = m.group(1) if m else None
|
| 318 |
+
|
| 319 |
+
# palavras-chave depois de "sobre", "da parte do", "tema", "assunto"
|
| 320 |
+
kw = []
|
| 321 |
+
kw_match = re.search(r'(?:sobre|da parte do|tema|assunto)\s+(.+)', s)
|
| 322 |
+
if kw_match:
|
| 323 |
+
# pega o resto da frase e quebra por vírgula
|
| 324 |
+
tail = kw_match.group(1)
|
| 325 |
+
kw = [t.strip() for t in re.split(r'[,\.;/]', tail) if t.strip()]
|
| 326 |
+
|
| 327 |
+
# flag de "melhores momentos"
|
| 328 |
+
best = bool(re.search(r'melhores momentos', s))
|
| 329 |
+
|
| 330 |
+
return CommandSpec(
|
| 331 |
+
total_segments=count,
|
| 332 |
+
per_segment_seconds=per_seg_sec,
|
| 333 |
+
total_minutes=total_min,
|
| 334 |
+
start_timecode=start_tc,
|
| 335 |
+
keywords=kw,
|
| 336 |
+
use_best_moments=best
|
| 337 |
+
)
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
# =========================
|
| 341 |
+
# Utilidades de seleção
|
| 342 |
+
# =========================
|
| 343 |
def find_keyword_in_segments(segs: List[Segment], keywords: List[str]) -> int:
|
| 344 |
+
if not segs or not keywords:
|
|
|
|
| 345 |
return 0
|
| 346 |
+
best_idx, best_score = 0, -1
|
|
|
|
|
|
|
|
|
|
| 347 |
for idx, seg in enumerate(segs):
|
| 348 |
text_lower = seg.text.lower()
|
| 349 |
score = sum(1 for kw in keywords if kw.lower() in text_lower)
|
|
|
|
| 350 |
if score > best_score:
|
| 351 |
+
best_idx, best_score = idx, score
|
|
|
|
|
|
|
| 352 |
return best_idx
|
| 353 |
|
| 354 |
+
|
| 355 |
+
def create_continuous_segment_from(start_frame: int, duration_frames: int, segs_preview: List[Segment]) -> Segment:
|
| 356 |
+
end_frame = max(start_frame + duration_frames, start_frame + 1)
|
| 357 |
+
# preview opcional do texto
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
text_parts = []
|
| 359 |
+
for seg in segs_preview[:10]:
|
| 360 |
+
if seg.text:
|
| 361 |
+
text_parts.append(seg.text[:80])
|
| 362 |
+
combined = " ".join(text_parts)[:300]
|
|
|
|
| 363 |
return Segment(
|
| 364 |
start_tc=frames_to_timecode(start_frame),
|
| 365 |
end_tc=frames_to_timecode(end_frame),
|
| 366 |
start_f=start_frame,
|
| 367 |
end_f=end_frame,
|
| 368 |
+
text=("Corte contínuo: " + combined) if combined else "Corte contínuo",
|
| 369 |
score=100.0
|
| 370 |
)
|
| 371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
|
| 373 |
+
def process_with_command(
|
| 374 |
+
segs: List[Segment],
|
| 375 |
+
command: str,
|
| 376 |
+
use_llm: bool
|
| 377 |
+
) -> List[Segment]:
|
| 378 |
+
"""
|
| 379 |
+
Processa instruções naturais. Funciona com ou sem transcrição:
|
| 380 |
+
- sem transcrição: cria cortes contínuos a partir do timecode (ou 00:00)
|
| 381 |
+
- com transcrição: usa keywords/LLM para achar início e criar cortes
|
| 382 |
+
Regras de duração:
|
| 383 |
+
- se per_segment_seconds for fornecido -> aplica em cada corte
|
| 384 |
+
- do contrário, se total_minutes e total_segments > 1 -> divide igualmente
|
| 385 |
+
- se apenas total_minutes -> 1 corte com essa duração
|
| 386 |
+
- default se nada especificado -> 1 corte de 60s
|
| 387 |
+
"""
|
| 388 |
+
spec = parse_natural_command(command)
|
| 389 |
+
|
| 390 |
+
# Determinar duração por corte (segundos)
|
| 391 |
+
if spec.per_segment_seconds:
|
| 392 |
+
per_seg_seconds = spec.per_segment_seconds
|
| 393 |
+
total_segments = max(1, spec.total_segments)
|
| 394 |
+
elif spec.total_minutes and spec.total_segments and spec.total_segments > 1:
|
| 395 |
+
total_seconds = int(spec.total_minutes * 60)
|
| 396 |
+
total_segments = spec.total_segments
|
| 397 |
+
per_seg_seconds = max(1, total_seconds // total_segments)
|
| 398 |
+
elif spec.total_minutes:
|
| 399 |
+
per_seg_seconds = int(spec.total_minutes * 60)
|
| 400 |
+
total_segments = 1
|
|
|
|
| 401 |
else:
|
| 402 |
+
per_seg_seconds = 60
|
| 403 |
+
total_segments = max(1, spec.total_segments)
|
| 404 |
+
|
| 405 |
+
# Determinar ponto de início (frame)
|
| 406 |
+
start_frame = 0
|
| 407 |
+
if spec.start_timecode:
|
| 408 |
+
try:
|
| 409 |
+
start_frame = parse_timecode_to_frames(spec.start_timecode)
|
| 410 |
+
except Exception:
|
| 411 |
+
start_frame = 0
|
| 412 |
+
|
| 413 |
+
# Se houver transcrição, tentar achar índice inicial por palavra-chave/LLM
|
| 414 |
+
start_idx = None
|
| 415 |
+
if segs:
|
| 416 |
+
if spec.keywords:
|
| 417 |
+
start_idx = find_keyword_in_segments(segs, spec.keywords)
|
| 418 |
+
|
| 419 |
+
if use_llm and LLM_AVAILABLE and segs:
|
| 420 |
try:
|
| 421 |
+
# prepara um preview leve de 80 segmentos (índice|tc|texto)
|
| 422 |
+
preview = []
|
| 423 |
+
for i, s in enumerate(segs[:80]):
|
| 424 |
+
preview.append(f"{i}|{s.start_tc}|{(s.text or '')[:60]}")
|
| 425 |
+
preview_text = "\n".join(preview)
|
| 426 |
+
|
| 427 |
+
prompt = f"""Encontre o índice inicial do assunto solicitado, retornando apenas o número (ex: 42).
|
| 428 |
+
|
| 429 |
+
BUSCAR: {' '.join(spec.keywords[:5]) or '(sem keywords)'}
|
| 430 |
+
|
| 431 |
+
SEGMENTOS (índice|timecode|texto):
|
| 432 |
+
{preview_text}
|
| 433 |
+
"""
|
| 434 |
+
response = LLM.generate_content(
|
| 435 |
+
prompt,
|
| 436 |
+
generation_config={"temperature": 0.1, "max_output_tokens": 20}
|
| 437 |
+
)
|
| 438 |
+
text = (response.text or "").strip()
|
| 439 |
+
m = re.search(r'\b(\d+)\b', text)
|
| 440 |
+
if m:
|
| 441 |
+
idx = int(m.group(1))
|
| 442 |
+
if 0 <= idx < len(segs):
|
| 443 |
+
start_idx = idx
|
| 444 |
+
except Exception:
|
| 445 |
pass
|
| 446 |
+
|
| 447 |
+
# Construir cortes
|
| 448 |
+
segments_out: List[Segment] = []
|
| 449 |
+
|
|
|
|
| 450 |
if not segs:
|
| 451 |
+
# Sem transcrição: cortes contínuos a partir do timecode (ou zero)
|
| 452 |
+
base_frame = start_frame
|
| 453 |
+
for _ in range(total_segments):
|
| 454 |
+
duration_frames = int(per_seg_seconds * FPS)
|
| 455 |
+
seg = create_continuous_segment_from(base_frame, duration_frames, [])
|
| 456 |
+
segments_out.append(seg)
|
| 457 |
+
base_frame = seg.end_f
|
| 458 |
+
return segments_out
|
| 459 |
+
|
| 460 |
+
# Com transcrição
|
| 461 |
+
# Determina start_frame baseado em start_idx ou em timecode explícito
|
| 462 |
+
if start_idx is not None and 0 <= start_idx < len(segs):
|
| 463 |
+
start_frame = segs[start_idx].start_f
|
| 464 |
+
# Se já havia start_timecode, preserva; se não, usa 0 como fallback
|
| 465 |
+
base_frame = max(0, start_frame)
|
| 466 |
+
|
| 467 |
+
for _ in range(total_segments):
|
| 468 |
+
duration_frames = int(per_seg_seconds * FPS)
|
| 469 |
+
# usa preview de texto para descrição
|
| 470 |
+
seg_preview = segs[start_idx:start_idx + 10] if (start_idx is not None) else segs[:10]
|
| 471 |
+
seg = create_continuous_segment_from(base_frame, duration_frames, seg_preview)
|
| 472 |
+
segments_out.append(seg)
|
| 473 |
+
base_frame = seg.end_f
|
| 474 |
+
|
| 475 |
+
return segments_out
|
| 476 |
+
|
| 477 |
+
|
| 478 |
+
# =========================
|
| 479 |
+
# Modo Automático (score simples)
|
| 480 |
+
# =========================
|
| 481 |
+
def auto_score_segments(
|
| 482 |
+
segs: List[Segment],
|
| 483 |
+
num_segments: int,
|
| 484 |
+
custom_keywords: str,
|
| 485 |
+
weight_emotion: float,
|
| 486 |
+
weight_break: float,
|
| 487 |
+
weight_learn: float,
|
| 488 |
+
weight_viral: float
|
| 489 |
+
) -> List[Segment]:
|
| 490 |
for s in segs:
|
| 491 |
+
score = 0.0
|
| 492 |
+
text = (s.text or "").lower()
|
| 493 |
+
|
| 494 |
if "medo" in text or "coragem" in text:
|
| 495 |
score += weight_emotion
|
| 496 |
if "nunca" in text or "de repente" in text:
|
|
|
|
| 499 |
score += weight_learn
|
| 500 |
if "segredo" in text or "verdade" in text:
|
| 501 |
score += weight_viral
|
| 502 |
+
|
| 503 |
if custom_keywords:
|
| 504 |
for kw in custom_keywords.split(","):
|
| 505 |
if kw.strip().lower() in text:
|
| 506 |
score += 3.0
|
| 507 |
+
|
| 508 |
s.score = score
|
| 509 |
+
|
| 510 |
segs.sort(key=lambda x: x.score, reverse=True)
|
| 511 |
return segs[:num_segments]
|
| 512 |
|
| 513 |
+
|
| 514 |
+
# =========================
|
| 515 |
+
# Edição de XML (Premiere)
|
| 516 |
+
# =========================
|
| 517 |
def deep_copy_element(elem: ET.Element) -> ET.Element:
|
| 518 |
new = ET.Element(elem.tag, attrib=dict(elem.attrib))
|
| 519 |
new.text = elem.text
|
|
|
|
| 522 |
new.append(deep_copy_element(child))
|
| 523 |
return new
|
| 524 |
|
| 525 |
+
|
| 526 |
def edit_xml(tree: ET.ElementTree, segs: List[Segment]) -> ET.ElementTree:
|
| 527 |
root = tree.getroot()
|
| 528 |
seq = root.find(".//sequence")
|
|
|
|
| 529 |
if seq is None:
|
| 530 |
raise ValueError("Sequence não encontrada no XML")
|
| 531 |
+
|
| 532 |
v_track = seq.find(".//media/video/track")
|
| 533 |
a_track = seq.find(".//media/audio/track")
|
|
|
|
| 534 |
if not v_track or not a_track:
|
| 535 |
raise ValueError("Trilhas de vídeo/áudio não encontradas")
|
| 536 |
+
|
| 537 |
v_template = v_track.find("./clipitem")
|
| 538 |
a_template = a_track.find("./clipitem")
|
| 539 |
+
|
| 540 |
# Limpa clips existentes
|
| 541 |
for clip in list(v_track.findall("./clipitem")):
|
| 542 |
v_track.remove(clip)
|
| 543 |
for clip in list(a_track.findall("./clipitem")):
|
| 544 |
a_track.remove(clip)
|
| 545 |
+
|
| 546 |
# Adiciona novos clips
|
| 547 |
timeline_pos = 0
|
|
|
|
| 548 |
for i, seg in enumerate(segs, 1):
|
| 549 |
duration = seg.end_f - seg.start_f
|
| 550 |
+
if duration <= 0:
|
| 551 |
+
continue
|
| 552 |
+
|
| 553 |
+
# Vídeo
|
| 554 |
v_clip = ET.Element("clipitem", {"id": f"clip-v{i}"})
|
| 555 |
ET.SubElement(v_clip, "name").text = f"Clip {i}"
|
| 556 |
ET.SubElement(v_clip, "start").text = str(timeline_pos)
|
| 557 |
ET.SubElement(v_clip, "end").text = str(timeline_pos + duration)
|
| 558 |
ET.SubElement(v_clip, "in").text = str(seg.start_f)
|
| 559 |
ET.SubElement(v_clip, "out").text = str(seg.end_f)
|
| 560 |
+
|
| 561 |
if v_template is not None:
|
| 562 |
rate = v_template.find("rate")
|
| 563 |
if rate is not None:
|
|
|
|
| 565 |
file_elem = v_template.find("file")
|
| 566 |
if file_elem is not None:
|
| 567 |
v_clip.append(deep_copy_element(file_elem))
|
| 568 |
+
|
| 569 |
+
# Áudio
|
| 570 |
a_clip = ET.Element("clipitem", {"id": f"clip-a{i}"})
|
| 571 |
ET.SubElement(a_clip, "name").text = f"Clip {i}"
|
| 572 |
ET.SubElement(a_clip, "start").text = str(timeline_pos)
|
| 573 |
ET.SubElement(a_clip, "end").text = str(timeline_pos + duration)
|
| 574 |
ET.SubElement(a_clip, "in").text = str(seg.start_f)
|
| 575 |
ET.SubElement(a_clip, "out").text = str(seg.end_f)
|
| 576 |
+
|
| 577 |
if a_template is not None:
|
| 578 |
rate = a_template.find("rate")
|
| 579 |
if rate is not None:
|
|
|
|
| 581 |
file_elem = a_template.find("file")
|
| 582 |
if file_elem is not None:
|
| 583 |
a_clip.append(deep_copy_element(file_elem))
|
| 584 |
+
|
| 585 |
v_track.append(v_clip)
|
| 586 |
a_track.append(a_clip)
|
|
|
|
| 587 |
timeline_pos += duration
|
| 588 |
+
|
| 589 |
return tree
|
| 590 |
|
| 591 |
+
|
| 592 |
+
# =========================
|
| 593 |
+
# Seleção (orquestração)
|
| 594 |
+
# =========================
|
| 595 |
+
def select_segments(
|
| 596 |
+
transcript_txt: str,
|
| 597 |
+
use_llm: bool,
|
| 598 |
+
num_segments: int,
|
| 599 |
+
custom_keywords: str,
|
| 600 |
+
manual_timecodes: str,
|
| 601 |
+
natural_instructions: str,
|
| 602 |
+
weight_emotion: float,
|
| 603 |
+
weight_break: float,
|
| 604 |
+
weight_learn: float,
|
| 605 |
+
weight_viral: float
|
| 606 |
+
) -> List[Segment]:
|
| 607 |
+
|
| 608 |
+
# 1) Manual
|
| 609 |
+
manual = parse_manual_timecodes(manual_timecodes)
|
| 610 |
+
if manual:
|
| 611 |
+
result = []
|
| 612 |
+
for start_tc, end_tc in manual:
|
| 613 |
+
try:
|
| 614 |
+
result.append(Segment(
|
| 615 |
+
start_tc=frames_to_timecode(parse_timecode_to_frames(start_tc)),
|
| 616 |
+
end_tc=frames_to_timecode(parse_timecode_to_frames(end_tc)),
|
| 617 |
+
start_f=parse_timecode_to_frames(start_tc),
|
| 618 |
+
end_f=parse_timecode_to_frames(end_tc),
|
| 619 |
+
text=f"Manual: {start_tc} - {end_tc}",
|
| 620 |
+
score=100.0
|
| 621 |
+
))
|
| 622 |
+
except Exception:
|
| 623 |
+
pass
|
| 624 |
+
return result
|
| 625 |
+
|
| 626 |
+
# 2) Parser de transcrição (se houver)
|
| 627 |
+
segs = parse_transcript(transcript_txt) if transcript_txt else []
|
| 628 |
+
|
| 629 |
+
# 3) Linguagem natural (sempre permitido; funciona com ou sem transcrição)
|
| 630 |
+
if natural_instructions.strip():
|
| 631 |
+
return process_with_command(segs, natural_instructions, use_llm and LLM_AVAILABLE)
|
| 632 |
+
|
| 633 |
+
# 4) Automático
|
| 634 |
+
if not segs:
|
| 635 |
+
raise ValueError("Nenhum segmento encontrado. Forneça uma transcrição, minutagens ou um comando em linguagem natural.")
|
| 636 |
+
return auto_score_segments(
|
| 637 |
+
segs, num_segments, custom_keywords,
|
| 638 |
+
weight_emotion, weight_break, weight_learn, weight_viral
|
| 639 |
+
)
|
| 640 |
+
|
| 641 |
+
|
| 642 |
+
# =========================
|
| 643 |
+
# Pipeline principal
|
| 644 |
+
# =========================
|
| 645 |
+
def process_files(
|
| 646 |
+
xml_file, txt_file, use_llm, num_segments,
|
| 647 |
+
custom_keywords, manual_timecodes, natural_instructions,
|
| 648 |
+
weight_emotion, weight_break, weight_learn, weight_viral
|
| 649 |
+
):
|
| 650 |
if not xml_file:
|
| 651 |
+
return "Envie o XML", None, f"LLM: {LLM_AVAILABLE}"
|
| 652 |
+
|
| 653 |
try:
|
| 654 |
+
# Lê transcrição apenas se necessário
|
| 655 |
transcript = ""
|
| 656 |
manual = parse_manual_timecodes(manual_timecodes)
|
| 657 |
+
|
| 658 |
+
if not manual and txt_file:
|
| 659 |
+
with open(txt_file.name, "r", encoding="utf-8-sig") as f:
|
|
|
|
|
|
|
|
|
|
| 660 |
transcript = f.read()
|
| 661 |
+
|
| 662 |
# Seleciona segmentos
|
| 663 |
segments = select_segments(
|
| 664 |
transcript, use_llm and LLM_AVAILABLE, num_segments,
|
| 665 |
custom_keywords, manual_timecodes, natural_instructions,
|
| 666 |
weight_emotion, weight_break, weight_learn, weight_viral
|
| 667 |
)
|
| 668 |
+
|
| 669 |
if not segments:
|
| 670 |
+
return "Nenhum segmento selecionado", None, f"LLM: {LLM_AVAILABLE}"
|
| 671 |
+
|
| 672 |
# Edita XML
|
| 673 |
tree = ET.parse(xml_file.name)
|
| 674 |
tree = edit_xml(tree, segments)
|
| 675 |
+
|
| 676 |
# Salva
|
| 677 |
basename = os.path.splitext(os.path.basename(xml_file.name))[0]
|
| 678 |
output = os.path.join(OUTPUT_DIR, f"{basename}_EDITADO.xml")
|
| 679 |
tree.write(output, encoding="utf-8", xml_declaration=True)
|
| 680 |
+
|
| 681 |
# Resumo
|
| 682 |
total_sec = sum((s.end_f - s.start_f) / FPS for s in segments)
|
| 683 |
+
total_min = total_sec / 60.0
|
| 684 |
+
mode = "MANUAL" if manual else ("IA/NATURAL" if natural_instructions.strip() else "AUTOMÁTICO")
|
| 685 |
+
|
| 686 |
+
summary_lines = [f"{len(segments)} corte(s) | {total_min:.1f} min total | Modo: {mode}"]
|
|
|
|
|
|
|
| 687 |
for i, seg in enumerate(segments, 1):
|
| 688 |
dur_sec = (seg.end_f - seg.start_f) / FPS
|
| 689 |
+
line = f"{i}. {seg.start_tc} → {seg.end_tc} ({dur_sec/60:.1f} min)"
|
| 690 |
if seg.text and len(seg.text) > 50:
|
| 691 |
+
line += f"\n {seg.text[:120]}..."
|
| 692 |
+
summary_lines.append(line)
|
| 693 |
+
summary = "\n".join(summary_lines)
|
| 694 |
+
|
| 695 |
+
status = f"Sucesso | {mode} | {total_min:.1f} min | LLM: {LLM_AVAILABLE}"
|
| 696 |
return summary, output, status
|
| 697 |
+
|
| 698 |
except Exception as e:
|
| 699 |
import traceback
|
| 700 |
traceback.print_exc()
|
| 701 |
+
return f"Erro: {str(e)}", None, f"LLM: {LLM_AVAILABLE}"
|
| 702 |
+
|
| 703 |
|
| 704 |
+
# =========================
|
| 705 |
+
# Interface (Gradio)
|
| 706 |
+
# =========================
|
| 707 |
with gr.Blocks(theme=gr.themes.Soft(), title="Editor XML Premiere") as demo:
|
| 708 |
+
gr.Markdown("# Editor XML Premiere - IA")
|
| 709 |
+
gr.Markdown("Cortes com transcrição, minutagens ou comando em linguagem natural.")
|
| 710 |
+
|
| 711 |
with gr.Row():
|
| 712 |
+
xml_in = gr.File(label="XML do Premiere", file_types=[".xml"])
|
| 713 |
+
txt_in = gr.File(label="Transcrição (.txt) - opcional", file_types=[".txt"])
|
| 714 |
+
|
| 715 |
with gr.Row():
|
| 716 |
+
use_llm = gr.Checkbox(label="Usar IA (Gemini) quando útil", value=USE_LLM_DEFAULT and LLM_AVAILABLE)
|
| 717 |
+
num_segments = gr.Slider(2, 20, 5, 1, label="Segmentos (modo automático)")
|
| 718 |
+
|
| 719 |
+
with gr.Accordion("Comando em linguagem natural", open=True):
|
| 720 |
gr.Markdown("""
|
| 721 |
+
Exemplos:
|
| 722 |
+
- "Crie 1 corte de 10 minutos começando da parte do tenista"
|
| 723 |
+
- "Quero 3 cortes de 30s sobre Maria e José"
|
| 724 |
+
- "Faça 2 cortes de 45s começando em 00:02:10:00"
|
| 725 |
+
Se não fornecer transcrição, os cortes serão contínuos a partir do timecode indicado (ou 00:00:00:00).
|
| 726 |
""")
|
| 727 |
natural_instructions = gr.Textbox(
|
| 728 |
+
label="Seu comando",
|
| 729 |
+
placeholder='Ex: "Crie 2 cortes de 45s sobre coragem e disciplina, começando em 00:01:00:00"',
|
| 730 |
lines=2
|
| 731 |
)
|
| 732 |
+
|
| 733 |
+
with gr.Accordion("Minutagens manuais", open=False):
|
| 734 |
manual_timecodes = gr.Textbox(
|
| 735 |
label="Timecodes (um por linha)",
|
| 736 |
placeholder="00:21:18:09 - 00:31:18:09",
|
| 737 |
lines=3
|
| 738 |
)
|
| 739 |
+
|
| 740 |
+
with gr.Accordion("Modo automático (com transcrição)", open=False):
|
| 741 |
+
custom_keywords = gr.Textbox(label="Palavras-chave (separadas por vírgula)")
|
| 742 |
with gr.Row():
|
| 743 |
+
weight_emotion = gr.Slider(0, 5, 2.0, 0.1, label="Peso: emoção")
|
| 744 |
+
weight_break = gr.Slider(0, 5, 1.5, 0.1, label="Peso: quebra")
|
| 745 |
with gr.Row():
|
| 746 |
+
weight_learn = gr.Slider(0, 5, 1.2, 0.1, label="Peso: aprendizado")
|
| 747 |
+
weight_viral = gr.Slider(0, 5, 1.0, 0.1, label="Peso: viral")
|
| 748 |
+
|
| 749 |
+
btn = gr.Button("Processar", variant="primary", size="lg")
|
| 750 |
+
|
| 751 |
with gr.Row():
|
| 752 |
with gr.Column(scale=2):
|
| 753 |
+
summary_out = gr.Textbox(label="Resumo", lines=12)
|
| 754 |
with gr.Column(scale=1):
|
| 755 |
status_out = gr.Textbox(label="Status")
|
| 756 |
+
file_out = gr.File(label="Download")
|
| 757 |
+
|
| 758 |
btn.click(
|
| 759 |
process_files,
|
| 760 |
[xml_in, txt_in, use_llm, num_segments, custom_keywords,
|
|
|
|
| 764 |
)
|
| 765 |
|
| 766 |
if __name__ == "__main__":
|
| 767 |
+
demo.launch()
|