Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| MODEL_NAME = os.getenv("MODEL_NAME", "google/flan-t5-base") | |
| _tok = None | |
| _model = None | |
| def _load(): | |
| global _tok, _model | |
| if _tok is None or _model is None: | |
| _tok = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| _model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME) | |
| def _gen(text: str) -> str: | |
| _load() | |
| inputs = _tok(text, return_tensors="pt", truncation=True) | |
| out = _model.generate(**inputs, max_new_tokens=350, do_sample=False) | |
| return _tok.decode(out[0], skip_special_tokens=True).strip() | |
| SCHEMA = r""" | |
| Return STRICT JSON only (no markdown, no commentary). Shape: | |
| { | |
| "output_ext": "mp4|mov|mkv|mp3|wav|gif", | |
| "trim": {"start": "0", "end": "10"} | null, | |
| "resize": "1280:720" | "1920:1080" | "1080:1920" | null, | |
| "crop": "w:h:x:y" | null, | |
| "fps": 30 | null, | |
| "text_overlays": [ | |
| {"text":"...", "pos":"top-left|top-right|bottom-left|bottom-right|center|bottom-center", | |
| "start":"0"|null, "end":"10"|null} | |
| ], | |
| "logo_overlay": {"file":"input/..png", "pos":"top-right|top-left|bottom-right|bottom-left|center", | |
| "scale":"180:180"|null, "opacity":0.7|null} | null, | |
| "subtitles": {"file":"input/..srt|input/..vtt"} | null, | |
| "audio_mix": [ | |
| {"file":"input/..mp3|input/..wav|input/..m4a", "volume":0.2, "loop":true|false} | |
| ], | |
| "effects": { | |
| "video_filters": ["hue=s=0", "eq=contrast=1.1", "boxblur=2:1"] , | |
| "audio_filters": ["afade=t=in:st=0:d=1", "loudnorm"] | |
| }, | |
| "notes":"short" | |
| } | |
| Rules: | |
| - The FIRST downloaded file (input0) is the main video/audio source. | |
| - Only reference provided local paths exactly. | |
| """ | |
| def plan(local_files: list[str], probes: dict, user_prompt: str) -> dict: | |
| files_list = "\n".join([f"- {f}" for f in local_files]) or "- (none)" | |
| text = f""" | |
| You are a media editing planner for FFmpeg. | |
| Available local files: | |
| {files_list} | |
| ffprobe metadata (JSON, may be partial): | |
| {json.dumps(probes)[:6500]} | |
| User request: | |
| {user_prompt} | |
| {SCHEMA} | |
| """ | |
| raw = _gen(text) | |
| a = raw.find("{") | |
| b = raw.rfind("}") | |
| if a == -1 or b == -1: | |
| raise ValueError(f"Planner did not return JSON. Got: {raw[:220]}") | |
| return json.loads(raw[a:b+1]) | |
| def repair(local_files: list[str], probes: dict, user_prompt: str, last_cmd: str, stderr_tail: str) -> dict: | |
| files_list = "\n".join([f"- {f}" for f in local_files]) or "- (none)" | |
| text = f""" | |
| You are fixing a failed FFmpeg plan. | |
| Files: | |
| {files_list} | |
| Metadata: | |
| {json.dumps(probes)[:6500]} | |
| User request: | |
| {user_prompt} | |
| Last command: | |
| {last_cmd} | |
| FFmpeg stderr tail: | |
| {stderr_tail[-2400:]} | |
| Return corrected JSON ONLY. | |
| {SCHEMA} | |
| """ | |
| raw = _gen(text) | |
| a = raw.find("{") | |
| b = raw.rfind("}") | |
| if a == -1 or b == -1: | |
| raise ValueError(f"Repair did not return JSON. Got: {raw[:220]}") | |
| return json.loads(raw[a:b+1]) |