File size: 2,883 Bytes
afb5eee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import json
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

MODEL_NAME = os.getenv("MODEL_NAME", "google/flan-t5-base")

_tok = None
_model = None

def _load():
    global _tok, _model
    if _tok is None or _model is None:
        _tok = AutoTokenizer.from_pretrained(MODEL_NAME)
        _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)

def _gen(text: str) -> str:
    _load()
    inputs = _tok(text, return_tensors="pt", truncation=True)
    out = _model.generate(**inputs, max_new_tokens=350, do_sample=False)
    return _tok.decode(out[0], skip_special_tokens=True).strip()

SCHEMA = r"""
Return STRICT JSON only (no markdown, no commentary). Shape:

{
  "output_ext": "mp4|mov|mkv|mp3|wav|gif",
  "trim": {"start": "0", "end": "10"} | null,
  "resize": "1280:720" | "1920:1080" | "1080:1920" | null,
  "crop": "w:h:x:y" | null,
  "fps": 30 | null,

  "text_overlays": [
    {"text":"...", "pos":"top-left|top-right|bottom-left|bottom-right|center|bottom-center",
     "start":"0"|null, "end":"10"|null}
  ],

  "logo_overlay": {"file":"input/..png", "pos":"top-right|top-left|bottom-right|bottom-left|center",
                   "scale":"180:180"|null, "opacity":0.7|null} | null,

  "subtitles": {"file":"input/..srt|input/..vtt"} | null,

  "audio_mix": [
    {"file":"input/..mp3|input/..wav|input/..m4a", "volume":0.2, "loop":true|false}
  ],

  "effects": {
    "video_filters": ["hue=s=0", "eq=contrast=1.1", "boxblur=2:1"] ,
    "audio_filters": ["afade=t=in:st=0:d=1", "loudnorm"]
  },

  "notes":"short"
}

Rules:
- The FIRST downloaded file (input0) is the main video/audio source.
- Only reference provided local paths exactly.
"""

def plan(local_files: list[str], probes: dict, user_prompt: str) -> dict:
    files_list = "\n".join([f"- {f}" for f in local_files]) or "- (none)"
    text = f"""
You are a media editing planner for FFmpeg.

Available local files:
{files_list}

ffprobe metadata (JSON, may be partial):
{json.dumps(probes)[:6500]}

User request:
{user_prompt}

{SCHEMA}
"""
    raw = _gen(text)
    a = raw.find("{")
    b = raw.rfind("}")
    if a == -1 or b == -1:
        raise ValueError(f"Planner did not return JSON. Got: {raw[:220]}")
    return json.loads(raw[a:b+1])

def repair(local_files: list[str], probes: dict, user_prompt: str, last_cmd: str, stderr_tail: str) -> dict:
    files_list = "\n".join([f"- {f}" for f in local_files]) or "- (none)"
    text = f"""
You are fixing a failed FFmpeg plan.

Files:
{files_list}

Metadata:
{json.dumps(probes)[:6500]}

User request:
{user_prompt}

Last command:
{last_cmd}

FFmpeg stderr tail:
{stderr_tail[-2400:]}

Return corrected JSON ONLY.

{SCHEMA}
"""
    raw = _gen(text)
    a = raw.find("{")
    b = raw.rfind("}")
    if a == -1 or b == -1:
        raise ValueError(f"Repair did not return JSON. Got: {raw[:220]}")
    return json.loads(raw[a:b+1])