111 / app.py
194130157a's picture
Create app.py
ba1ad38 verified
import os
import sys
import subprocess
import re
import time
import zipfile
import json
import shutil
from concurrent.futures import ThreadPoolExecutor, as_completed
# 1. 自动安装依赖
def ensure_dependencies():
try:
import gradio
import requests
except ImportError:
print("Installing required packages: gradio, requests...")
subprocess.check_call([sys.executable, "-m", "pip", "install", "gradio", "requests"])
ensure_dependencies()
import gradio as gr
import requests
# ================= 配置区域 =================
DEFAULT_LLM_API_KEY = "sk-DZ5g7Zu0lFDlR7mBkbNsZLFTt1KBqA8ocsAH1mcvsZDWtydx"
DEFAULT_VIDEO_API_KEY = "sk-G6LN0uC2BVclZjx1ObDJPkMZTZvtjau1Ss7GjCvRLJyI5euU"
MERCHANT_BASE_URL = "https://xingjiabiapi.com"
VEO_MODEL = "veo_3_1-fast"
VIDEO_SIZE = "16x9"
TEXT_MODEL = "gemini-3-pro-preview-thinking"
# ===============================================
# --- 阶段一:工程师 (Engineer) ---
# ===============================================
DEFAULT_ARCHITECT_PROMPT = """
你是一位严谨的**首席工艺工程师**。
任务:设计《全生命周期工艺说明书》。
要求:
1. 必须从**原材料获取 (Source)** 开始(如采矿、采摘)。
2. 必须以**成品包装 (Packaging)** 结束。
3. 必须包含 20-30 个物理步骤,且步骤之间必须遵循物理定律。
4. 输出格式:Step N | [Name] | [Mechanism] | [Input State] -> [Output State]
"""
# ===============================================
# --- 阶段二:导演 1.0 (Director V1 - Skeleton) ---
# ===============================================
# 它的任务是把工程师的技术文档,拆解成 120 个“剧情梗概”。
# 它不负责画面连贯,只负责把故事讲完,填满 120 个坑。
# ===============================================
# --- 阶段三:导演 2.0 (Director V2 - Continuity) ---
# ===============================================
# 它的任务是“缝合”。它拿着 V1 的梗概,根据“上一个镜头的结尾”,写出“当前镜头的详细Prompt”。
DEFAULT_DIRECTOR_V2_PROMPT = """
你是一位 **IMAX 视觉连续性导演 (Visual Continuity Director)**。
你正在进行**“单镜头递归生成”**工作。
**你的输入:**
1. **剧情梗概 (From Director V1)**: 当前镜头应该发生什么动作。
2. **上一镜头的结尾画面 (Freeze Frame)**: 观众在上一秒看到的最后画面是什么。
**你的任务:**
编写当前镜头的 Veo Prompt。
**🔥 绝对铁律 (The Continuity Law):**
* **入口锁死 (Entry Lock)**: 当前镜头的 `Start_Frame_Visual` 必须**100% 匹配**上一镜头的结尾。
* *例子*: 如果上一镜头结束时“苹果在空中被切开了一半”,当前镜头必须从“空中半开的苹果”开始继续切,绝不能从“完整的苹果”开始。
* **状态演变 (State Evolution)**: 描述动作过程。
* **出口冻结 (Exit Freeze)**: 清晰定义当前镜头结束时的画面,为了给下一个镜头做铺垫。
**输出包含两部分:**
1. **Veo Prompt**: 给视频模型看的提示词。
2. **[NEXT_LINK]**: 给下一个 LLM 对话看的“结尾状态描述”。
"""
# ===============================================
# --- 核心功能函数 ---
# ===============================================
def generate_process_architecture(topic, api_key, architect_prompt):
"""阶段一:工程师生成工艺流程"""
if not topic: return "❌ 请先输入产品名称"
if not api_key: return "❌ 请先输入 LLM API Key"
print(f"🧠 [Engineer] 正在设计《{topic}》工艺流程...")
user_content = f"Design a Full Lifecycle Industrial Process Protocol for: {topic}. From Source to Packaging."
url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
data = {
"model": TEXT_MODEL,
"messages": [{"role": "system", "content": architect_prompt}, {"role": "user", "content": user_content}],
"temperature": 0.5, "max_tokens": 4096
}
try:
res = requests.post(url, headers=headers, json=data, timeout=120)
return res.json()['choices'][0]['message']['content']
except Exception as e: return f"Error: {e}"
def director_v1_skeleton(topic, architecture, total_shots, api_key):
"""阶段二:导演 1.0 生成剧情骨架 (一次性生成 120 行)"""
print(f"🎬 [Director V1] 正在规划 {total_shots} 个镜头的叙事大纲...")
prompt = f"""
Role: Director V1 (Narrative Architect).
Input: Engineering Protocol for {topic}.
Task: Break this down into exactly {total_shots} SHORT scene descriptions (Skeleton List).
Structure Rules:
- Shots 1-{int(total_shots*0.1)}: Epic Raw Material Acquisition (Nature/Mining).
- The Rest: Factory Processing, Assembly, Packing.
Output Format (Plain List):
1. [Brief description of action]
2. [Brief description of action]
...
{total_shots}. [Final Product Stored]
Only output the numbered list. No intro/outro.
"""
url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
data = {
"model": TEXT_MODEL,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.7, "max_tokens": 8192
}
try:
res = requests.post(url, headers=headers, json=data, timeout=120)
content = res.json()['choices'][0]['message']['content']
# 提取每一行
lines = [line.strip() for line in content.split('\n') if re.match(r'^\d+[\.:]', line.strip())]
# 如果生成不够,强制截断或填充(简单处理)
return lines[:total_shots]
except Exception as e:
print(f"Skeleton Error: {e}")
return []
def director_v2_shot_generation(index, total, skeleton_idea, prev_end_state, api_key):
"""阶段三:导演 2.0 递归生成详细 Prompt"""
# 构建上下文:告诉 V2 上一个镜头怎么结束的
user_content = f"""
Current Shot: {index}/{total}
📖 **Director V1's Plan (Narrative)**:
"{skeleton_idea}"
🔗 **Previous Shot End State (Visual Reality)**:
"{prev_end_state}"
**TASK**:
Write the detailed Veo Prompt for Shot {index}.
1. **Start Visual**: Must match 'Previous Shot End State'.
2. **Action**: Execute 'Director V1's Plan'.
3. **End Visual**: Prepare for the next shot.
**OUTPUT FORMAT**:
Shot {index}/{total} | [Short Title]
Sora Prompt (English): (Start_State): [Matches Prev] (Action): [...] (End_State): [...] (Details): [8k, hyper-realistic...]
[NEXT_LINK]: [Describe the VISUAL STATE of the very last frame. Be specific about object position and deformation.]
"""
url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
data = {
"model": TEXT_MODEL,
"messages": [
{"role": "system", "content": DEFAULT_DIRECTOR_V2_PROMPT},
{"role": "user", "content": user_content}
],
"temperature": 0.6, "max_tokens": 2048
}
try:
res = requests.post(url, headers=headers, json=data, timeout=60)
content = res.json()['choices'][0]['message']['content']
# 分离 Prompt 和 Next Link
if "[NEXT_LINK]:" in content:
parts = content.split("[NEXT_LINK]:")
prompt_text = parts[0].strip()
next_state = parts[1].strip()
else:
prompt_text = content
next_state = "Object continues in current state." # 降级处理
return prompt_text, next_state
except Exception as e:
print(f"V2 Error Shot {index}: {e}")
return None, prev_end_state
def execute_recursive_generation(topic, architecture, count, api_key):
"""执行总流程:V1 -> V2 Loop"""
logs = []
full_script = ""
# Step 1: Director V1 (Skeleton)
logs.append("🎬 [Director V1] 正在生成叙事骨架 (Skeleton)...")
yield "\n".join(logs), None
skeleton_list = director_v1_skeleton(topic, architecture, count, api_key)
if not skeleton_list:
logs.append("❌ V1 骨架生成失败")
yield "\n".join(logs), None; return
logs.append(f"✅ V1 完成。共规划 {len(skeleton_list)} 个镜头。")
logs.append("🔄 [Director V2] 启动递归生成 (Continuity Loop)...")
yield "\n".join(logs), "\n".join(skeleton_list) # 预览骨架
# Step 2: Director V2 Loop
prev_state = "Black screen fades into a massive, cinematic view of the raw material source in nature."
for i, skeleton_line in enumerate(skeleton_list):
idx = i + 1
logs.append(f"🔨 [Processing] Shot {idx}/{count}...")
yield "\n".join(logs[-10:]), full_script # 滚动日志
prompt_text, next_state = director_v2_shot_generation(idx, count, skeleton_line, prev_state, api_key)
if prompt_text:
full_script += f"{prompt_text}\n\n"
prev_state = next_state # 传递接力棒
logs.append(f"✨ Shot {idx} 完成 | 🔗 状态传递: {next_state[:30]}...")
else:
logs.append(f"⚠️ Shot {idx} 失败,跳过。")
yield "\n".join(logs[-10:]), full_script
time.sleep(0.5)
logs.append("🎉 所有镜头脚本生成完毕!准备提交给 Veo。")
yield "\n".join(logs), full_script
# ===============================================
# --- 阶段四:Veo 渲染 (Worker) ---
# ===============================================
def generate_veo_video(prompt, filename, save_dir, video_key):
"""单个视频渲染任务"""
if not prompt: return {"status": "error", "msg": "Empty prompt"}
# 强制加上画质词
final_prompt = f"Wide screen 16x9 video. Hyper-realistic documentary. {prompt} --ar 16x9"
url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {video_key.strip()}"}
data = {
"model": VEO_MODEL,
"messages": [{"role": "user", "content": final_prompt}],
"size": VIDEO_SIZE, "seconds": 8
}
try:
res = requests.post(url, headers=headers, json=data, timeout=300)
if res.status_code != 200: return {"status": "error", "msg": f"API Error {res.status_code}"}
content = res.json()['choices'][0]['message']['content']
video_url = re.search(r'(https?://[^\s)"]+)', content).group(1)
video_data = requests.get(video_url).content
save_path = os.path.join(save_dir, filename + ".mp4")
with open(save_path, "wb") as f: f.write(video_data)
return {"status": "success", "file": save_path}
except Exception as e:
return {"status": "error", "msg": str(e)}
def batch_render_videos(script_text, topic, video_key, progress=gr.Progress()):
"""批量并发渲染"""
if not script_text or not video_key: yield "❌ 缺参数", None, None; return
# 准备目录
session_dir = os.path.join("AutoSaved_Videos", f"{topic}_{int(time.time())}")
os.makedirs(session_dir, exist_ok=True)
# 解析脚本
matches = re.findall(r"Shot\s+(\d+).*?Sora Prompt \(English\):\s*(.*?)(?=\[NEXT_LINK\]|Shot|$)", script_text, re.DOTALL)
logs = [f"🚀 启动 Veo 并发渲染,共 {len(matches)} 个任务..."]
yield "\n".join(logs), None, None
generated_files = []
with ThreadPoolExecutor(max_workers=5) as executor: # 控制并发数为5,防止超时
futures = {}
for shot_num, p_text in matches:
fname = f"Shot_{int(shot_num):03d}"
futures[executor.submit(generate_veo_video, p_text, fname, session_dir, video_key)] = fname
completed = 0
for future in as_completed(futures):
res = future.result()
completed += 1
progress(completed/len(matches))
if res['status'] == 'success':
logs.append(f"✅ [{completed}] 视频完成: {os.path.basename(res['file'])}")
generated_files.append(res['file'])
else:
logs.append(f"❌ [{completed}] 失败: {res['msg']}")
yield "\n".join(logs[-15:]), generated_files, None
# 打包
if generated_files:
shutil.make_archive(session_dir, 'zip', session_dir)
yield "\n".join(logs), generated_files, f"{session_dir}.zip"
# ===============================================
# --- UI 界面 ---
# ===============================================
with gr.Blocks(title="Veo 4-Stage Serial Production") as app:
gr.Markdown("## 🏭 Veo 工业大片:四阶段串行生产线 (4-Stage Serial Pipeline)")
with gr.Row():
llm_key = gr.Textbox(label="1. LLM Key (Gemini)", value=DEFAULT_LLM_API_KEY, type="password")
video_key = gr.Textbox(label="4. Video Key (Veo)", value=DEFAULT_VIDEO_API_KEY, type="password")
topic = gr.Textbox(label="产品名称", placeholder="例如:Cola Bottle")
# 阶段 1
btn_eng = gr.Button("🛠️ 阶段1:工程师 (生成架构)", variant="secondary")
arch_out = gr.Textbox(label="工艺架构", lines=5)
# 阶段 2 & 3
count_sl = gr.Slider(minimum=1, maximum=150, value=120, label="镜头数量")
btn_dir = gr.Button("📝 阶段2 & 3:导演 V1+V2 (递归生成连贯脚本)", variant="primary")
with gr.Row():
logs_out = gr.Textbox(label="生产日志 (V2 状态链)", lines=10)
script_out = gr.Textbox(label="最终连贯脚本", lines=10, interactive=True)
# 阶段 4
btn_render = gr.Button("🎬 阶段4:Veo 并发渲染", variant="stop")
zip_out = gr.File(label="下载 ZIP")
# 事件绑定
btn_eng.click(generate_process_architecture, [topic, llm_key, gr.State(DEFAULT_ARCHITECT_PROMPT)], arch_out)
btn_dir.click(execute_recursive_generation, [topic, arch_out, count_sl, llm_key], [logs_out, script_out])
btn_render.click(batch_render_videos, [script_out, topic, video_key], [logs_out, gr.State([]), zip_out])
app.launch()