#!/usr/bin/env python3 """ Gera títulos e descrições para redes sociais a partir dos cortes (transcript + cuts). Uso: python generate_post_texts_from_cuts.py [--persona "sua persona"] [--hashtags #tag1 #tag2] python generate_post_texts_from_cuts.py --ollama-model llama3.1:8b # usa IA local """ import argparse, json, os, re, requests from pathlib import Path from typing import List, Dict, Any def load_json(path): with open(path, "r", encoding="utf-8") as f: return json.load(f) def cap(s: str, n: int) -> str: s = s.strip() return (s[:n-1] + "…") if len(s) > n else s def normalize_whitespace(s: str) -> str: return re.sub(r"\s+", " ", s).strip() def overlap(a1, a2, b1, b2): return max(0.0, min(a2, b2) - max(a1, b1)) def collect_text_for_segments(transcript: List[Dict[str, Any]], segments: List[Dict[str, float]]) -> str: buf = [] for seg in segments: s, e = float(seg["start"]), float(seg["end"]) for t in transcript: ts, te = float(t["start"]), float(t["end"]) if overlap(s, e, ts, te) > 0.01: buf.append(t.get("text","").strip()) txt = " ".join(x for x in buf if x) return normalize_whitespace(txt) def first_sentence(s: str, max_len=140) -> str: s = normalize_whitespace(s) m = re.split(r"(?<=[\.\!\?])\s+", s) cand = (m[0] if m else s) or s return cap(cand, max_len) def build_titles_and_descs(text: str, persona: str, hashtags: List[str], yt_len=70, ig_len=140, tt_len=120, max_ig_tags=5, max_tt_tags=8) -> Dict[str,str]: txt = text or "" title = cap(first_sentence(txt, yt_len), yt_len) core_ig = first_sentence(txt, ig_len) ig = f"{core_ig}\nAssiste até o fim e comenta 👇" tags_ig = " ".join(hashtags[:max_ig_tags]) if hashtags else "" if tags_ig: ig = f"{ig}\n{tags_ig}" core_tt = first_sentence(txt, tt_len) tt = f"{core_tt}\nCurte e segue p/ mais 🔔" tags_tt = " ".join(hashtags[:max_tt_tags]) if hashtags else "" if tags_tt: tt = f"{tt}\n{tags_tt}" return {"yt_title": title, "ig_desc": ig.strip(), "tt_desc": tt.strip()} def call_ollama(model: str, prompt: str, url: str) -> str: payload = { "model": model, "prompt": prompt, "temperature": 0.4, "stream": False, "format": "json", "options": {"num_ctx": 8192, "num_predict": 384} } r = requests.post(url.rstrip("/") + "/api/generate", json=payload, timeout=120) r.raise_for_status() return r.json().get("response", "") def _coerce_json(raw: str) -> Dict[str, str]: txt = (raw or "").strip() try: return json.loads(txt) except Exception: pass m = re.search(r"\{[\s\S]*\}", txt) if not m: raise ValueError("no-json-object") jtxt = m.group(0) jtxt = jtxt.replace("\u201c", '"').replace("\u201d", '"').replace("\u2018", "'").replace("\u2019", "'") jtxt = re.sub(r",\s*(\}|\])", r"\1", jtxt) if '"' not in jtxt and "'" in jtxt: jtxt = jtxt.replace("'", '"') return json.loads(jtxt) def with_ollama(text: str, persona: str, hashtags: List[str], model: str, server_url: str) -> Dict[str,str]: prompt = f''' Responda ESTRITAMENTE em JSON válido (sem texto extra, sem markdown, sem explicações). Gere campos: - yt_title: string (<= 70 chars, chamativo, sem hashtags) - ig_desc: string (≈120–150 chars, termina com linha de hashtags IG) - tt_desc: string (≈100–140 chars, termina com linha de hashtags TikTok) PERSONA: {persona or '-'} HASHTAGS_IG: {' '.join(hashtags[:5])} HASHTAGS_TT: {' '.join(hashtags[:8])} TEXTO_DO_CORTE (transcrição bruta, use para inspirar o copy): """{text.strip()[:2000]}""" Retorne APENAS um objeto JSON com exatamente estas chaves: {{ "yt_title": "...", "ig_desc": "...\n{' '.join(hashtags[:5])}", "tt_desc": "...\n{' '.join(hashtags[:8])}" }} ''' try: raw = call_ollama(model, prompt, server_url) data = _coerce_json(raw) data["yt_title"] = cap(data.get("yt_title",""), 70) data["ig_desc"] = cap(data.get("ig_desc",""), 300) data["tt_desc"] = cap(data.get("tt_desc",""), 220) return data except Exception as e: print(f"[warn] Ollama retornou JSON inválido: {e}. Usando heurística.") return build_titles_and_descs(text, persona, hashtags) def main(): ap = argparse.ArgumentParser("Gera títulos/descrições para redes a partir dos cortes.") ap.add_argument("base", help="Base do arquivo (ex.: 'meu_video' sem sufixos)") ap.add_argument("--persona", default="criador(a) de conteúdo", help="Breve dica de persona para compor textos") ap.add_argument("--hashtags", nargs="*", default=["#criacaodeconteudo","#video","#shorts"], help="Hashtags prioritárias") ap.add_argument("--ollama-model", default="", help="Modelo Ollama para copy (ex.: llama3.1:8b)") ap.add_argument("--ollama-url", default="http://localhost:11434", help="URL do Ollama") ap.add_argument("--out", default="", help="Arquivo de saída (default: _posts.txt)") args = ap.parse_args() base = args.base cuts_path = f"{base}_cuts.json" transcript_path = f"{base}_transcript.json" if not os.path.exists(cuts_path) or not os.path.exists(transcript_path): print(f"ERRO: não achei '{cuts_path}' ou '{transcript_path}'. Rode na pasta correta.") raise SystemExit(1) cuts = load_json(cuts_path) transcript = load_json(transcript_path) out_path = args.out or f"{base}_posts.txt" lines = [] for i, c in enumerate(cuts, 1): segs = c.get("segments") or [] if not segs and "start" in c and "end" in c: segs = [{"start": c["start"], "end": c["end"]}] text = collect_text_for_segments(transcript, segs) if args.ollama_model: results = with_ollama(text, args.persona, args.hashtags, args.ollama_model, args.ollama_url) else: results = build_titles_and_descs(text, args.persona, args.hashtags) lines.append(f"Corte {i}") lines.append("YouTube Shorts — Título:") lines.append("👉 " + results["yt_title"]) lines.append("") lines.append("Instagram Reels — Descrição:") lines.append(results["ig_desc"]) lines.append("") lines.append("TikTok — Descrição:") lines.append(results["tt_desc"]) lines.append("\n" + "-"*60 + "\n") Path(out_path).write_text("\n".join(lines).rstrip()+"\n", encoding="utf-8") print(f"✅ Gerado: {out_path}") if __name__ == "__main__": main()