Spaces:
Sleeping
Sleeping
File size: 2,883 Bytes
afb5eee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | import os
import json
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
MODEL_NAME = os.getenv("MODEL_NAME", "google/flan-t5-base")
_tok = None
_model = None
def _load():
global _tok, _model
if _tok is None or _model is None:
_tok = AutoTokenizer.from_pretrained(MODEL_NAME)
_model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
def _gen(text: str) -> str:
_load()
inputs = _tok(text, return_tensors="pt", truncation=True)
out = _model.generate(**inputs, max_new_tokens=350, do_sample=False)
return _tok.decode(out[0], skip_special_tokens=True).strip()
SCHEMA = r"""
Return STRICT JSON only (no markdown, no commentary). Shape:
{
"output_ext": "mp4|mov|mkv|mp3|wav|gif",
"trim": {"start": "0", "end": "10"} | null,
"resize": "1280:720" | "1920:1080" | "1080:1920" | null,
"crop": "w:h:x:y" | null,
"fps": 30 | null,
"text_overlays": [
{"text":"...", "pos":"top-left|top-right|bottom-left|bottom-right|center|bottom-center",
"start":"0"|null, "end":"10"|null}
],
"logo_overlay": {"file":"input/..png", "pos":"top-right|top-left|bottom-right|bottom-left|center",
"scale":"180:180"|null, "opacity":0.7|null} | null,
"subtitles": {"file":"input/..srt|input/..vtt"} | null,
"audio_mix": [
{"file":"input/..mp3|input/..wav|input/..m4a", "volume":0.2, "loop":true|false}
],
"effects": {
"video_filters": ["hue=s=0", "eq=contrast=1.1", "boxblur=2:1"] ,
"audio_filters": ["afade=t=in:st=0:d=1", "loudnorm"]
},
"notes":"short"
}
Rules:
- The FIRST downloaded file (input0) is the main video/audio source.
- Only reference provided local paths exactly.
"""
def plan(local_files: list[str], probes: dict, user_prompt: str) -> dict:
files_list = "\n".join([f"- {f}" for f in local_files]) or "- (none)"
text = f"""
You are a media editing planner for FFmpeg.
Available local files:
{files_list}
ffprobe metadata (JSON, may be partial):
{json.dumps(probes)[:6500]}
User request:
{user_prompt}
{SCHEMA}
"""
raw = _gen(text)
a = raw.find("{")
b = raw.rfind("}")
if a == -1 or b == -1:
raise ValueError(f"Planner did not return JSON. Got: {raw[:220]}")
return json.loads(raw[a:b+1])
def repair(local_files: list[str], probes: dict, user_prompt: str, last_cmd: str, stderr_tail: str) -> dict:
files_list = "\n".join([f"- {f}" for f in local_files]) or "- (none)"
text = f"""
You are fixing a failed FFmpeg plan.
Files:
{files_list}
Metadata:
{json.dumps(probes)[:6500]}
User request:
{user_prompt}
Last command:
{last_cmd}
FFmpeg stderr tail:
{stderr_tail[-2400:]}
Return corrected JSON ONLY.
{SCHEMA}
"""
raw = _gen(text)
a = raw.find("{")
b = raw.rfind("}")
if a == -1 or b == -1:
raise ValueError(f"Repair did not return JSON. Got: {raw[:220]}")
return json.loads(raw[a:b+1]) |