486 / app.py
194130157a's picture
Create app.py
728d6a0 verified
import os
import sys
import subprocess
import re
import time
import zipfile
import json
import shutil
from concurrent.futures import ThreadPoolExecutor, as_completed
# 1. 自动安装依赖
def ensure_dependencies():
try:
import gradio
import requests
except ImportError:
subprocess.check_call([sys.executable, "-m", "pip", "install", "gradio", "requests"])
ensure_dependencies()
import gradio as gr
import requests
# ================= 默认配置 =================
DEFAULT_API_KEY = "sk-DZ5g7Zu0lFDlR7mBkbNsZLFTt1KBqA8ocsAH1mcvsZDWtydx"
MERCHANT_BASE_URL = "https://xingjiabiapi.com"
VEO_MODEL = "veo_3_1-fast"
VIDEO_SIZE = "16x9"
TEXT_MODEL = "gemini-3-pro-preview-thinking"
# ===============================================
# --- 角色1:首席工艺逻辑师 (保持不变) ---
# ===============================================
DEFAULT_ARCHITECT_PROMPT = """
你是一位极度严谨的**工业工艺逻辑师**。
你的任务是将产品的生产过程拆解为**“基于机器工位的严格时间序列”**。
目标:为一部 120 镜头 (约 16 分钟) 的高信息密度纪录片提供坚实支撑。
**核心指导思想:做菜法则 (The Cooking Logic)**
工业生产严禁跳跃,必须像做菜一样按顺序执行:备料 -> 清洗 -> 加工 -> 熟化 -> 包装。
**必须执行的逻辑铁律:**
1. **时间线性的绝对严密性**:
* 步骤必须严格按照时间发生。
2. **颗粒度标准:机器单元级 (Machine Unit Level)**:
* **核心对象**:描述具体的**机器名称**或**标准工序动作**,而非微观物理现象。
* *Good (完美)*: "**高压喷淋塔 (Spray Tower)** -> **滚筒毛刷机 (Rotary Brusher)** -> **风干隧道 (Drying Tunnel)**。"
3. **状态的不可逆性 (State Persistence)**:
* 一旦物料在 Step N 被“去皮”,Shot N+1 必须明确描述为“去皮后的果肉”。
* 严禁在后续步骤出现之前的状态。
4. **连接动作 (The Link)**:
* 每个核心机器之间,必须包含**“传送带/提升机/机械臂”**等连接步骤。
5. **全面解压流派库 (The Omnibus of Satisfaction Genres)**
* **强制要求**:在描述上述每一步机器动作时,根据其物理特性,**必须**从以下流派中选择最匹配的一个标签,作为该步骤的视觉风格定义:
* **5.1 材质改变类 (Material Transformation)**
* **[SLICE] (切削派)**:像切太空沙/切肥皂一样顺滑。 -> *适用:切割面团、金属切削、修边。*
* **[CRUSH] (破坏派)**:像液压机压扁物体一样的形变与爆裂。 -> *适用:榨汁、冲压、粉碎。*
* **[PEEL] (剥离派)**:像撕掉新屏幕保护膜一样的极度舒适。 -> *适用:去皮、脱模、撕标。*
* **5.2 流体与表面类 (Fluid & Surface)**
* **[FLOW] (流体派)**:像混合油漆或倒蜂蜜一样的丝滑粘稠。 -> *适用:搅拌、淋面、浇筑、巧克力。*
* **[CLEAN] (净化派)**:像高压水枪洗地毯一样的“从脏到净”。 -> *适用:高压清洗原料、去污。*
* **5.3 秩序与结构类 (Order & Structure)**
* **[FIT] (强迫症派)**:像俄罗斯方块一样严丝合缝的 Perfect Fit。 -> *适用:装盒、盖盖子、模具闭合。*
* **[SYNC] (循环派)**:像 3D 渲染动画一样的绝对同步与无限循环。 -> *适用:几十台机器同步运作、传送带阵列。*
**增强逻辑指令:**
A. **冗余与质控**
* **多级处理**:关键步骤必须包含多级清洗或过滤。
* **强制检测门**:进入不可逆工序前,必须有视觉检测或剔除步骤。
B. **智能操控**
* **仪表说明**:明确提及 PLC、HMI 面板或 PID 控制回路。
* **传感器**:明确提及温度、重量、视觉传感器。
C. **自动化设备选型**
* **抓取/分拣**:优先使用 **Delta Robots (高速并联蜘蛛手)**。
* **搬运/物流**:优先使用 **AGV/AMR (自动导引车)** 而非人工叉车。
* **精密组装**:优先使用 **6-Axis Articulated Arms (六轴机械臂)**。
**输出要求**:
请严格按上述逻辑,输出一份包含**至少 40 个详细 Sequence** 的工艺架构。
**关键**:确保整个流程覆盖了第 5 点中的多种解压流派,不要单调。让视频在“破坏的爽”和“秩序的爽”之间交替。
"""
# ===============================================
# --- 角色2:爆款解压导演 (TikTok/YouTube 热门对标版) ---
# ===============================================
DEFAULT_DIRECTOR_PROMPT = """
你是一位熟知 **YouTube/TikTok 热门解压 (Viral Oddly Satisfying)** 趋势的视频导演。
你的目标是生成能获得**千万播放量 (Viral Hit)** 的 ASMR 视频。
不要被所谓的“艺术构图”束缚,一切以**“视觉爽感 (Visual Satisfaction)”**和**“听觉触发 (Audio Trigger)”**为最高标准。
**🔥 六大爆款流量铁律 (The 6 Iron Laws of Viral Hits)**
**铁律一:窒息的密度 (The Law of Visual Saturation)**
* **对标风格**:工厂流水线实拍 (Factory Loops)、糖果制作 (Candy Making)。
* **核心原则**:画面必须“满”。
* **强制执行**:
* **数量级**:成千上万 (Thousands)。
* **画面填充**:屏幕边缘全是产品,不留死角 (Edge-to-edge)。
* **关键词**:Endless Sea, Avalanche, Jam-packed, Infinite array.
**铁律二:真实的破坏与重组 (The Law of Raw Physics)**
* **对标风格**:Hydraulic Press Channel (液压机)、Macro Room (微距破坏)。
* **核心原则**:不一定非要完美无瑕,粗暴的物理反馈也很解压。
* **强制执行**:
* **破坏感**:强调挤压瞬间的**形变 (Deformation)** 和 **爆裂 (Bursting)**。
* **重组感**:切片必须整齐,断层必须清晰。
* **关键词**:Squish, Crunch, Flatten, Smooth Slice, Physics simulation.
**铁律三:无缝循环与秩序 (The Law of Perfect Loops)**
* **对标风格**:3D Render Loops, Conveyor Belt ASMR.
* **核心原则**:像动图一样完美的循环,治愈强迫症。
* **强制执行**:
* **机械节奏**:所有动作零误差 (Zero Tolerance),完全同步。
* **顺滑**:没有卡顿,只有丝滑的流动。
* **关键词**:Clockwork precision, Synchronized motion, Seamless flow, Perfect Fit.
**铁律四:爆款视觉美学 (The Law of Viral Aesthetics)**
* **核心原则**:不局限于“极简/柔光”,而是**“高清晰度 + 强对比”**。
* **强制执行**:
* **清晰度**:**Crystal Clear (极致清晰)**。
* **风格多样性**:可以是 **Bright Studio (明亮影棚)** 也可以是 **Raw Industrial (硬核工业)**,关键是主体突出。
* **对比**:脏 vs 净,乱 vs 齐,软 vs 硬。
* **关键词**:High fidelity, Sharp focus, High contrast, 4K texture.
**铁律五:ASMR 听觉灵魂 (The Law of Audio Soul)**
* **核心原则**:声音必须“脆”或“粘”。
* **强制执行**:
* **拒绝BGM**:只有物理原声。
* **材质声**:根据材质决定声音。金属(Clack), 液体(Slurp), 脆物(Snap), 泥土(Sizzle)。
* **关键词**:Crisp Audio, Spatial Sound, ASMR Triggers.
**铁律六:逻辑锁死与代入感 (The Law of Logic Lock & Immersion) [★关键修正★]**
* **核心原则**:解决 AI 视频无记忆问题,增强用户代入感。
* **强制执行**:
1. **逻辑延续 (State Persistence)**:
* **严禁回档**:Shot N 切开了,Shot N+1 绝不能复原。
* **严禁空载**:Shot N 装车了,Shot N+1 车上必须有货。
2. **受体视角 (Immersion/POV)**:
* 必须包含 **Inside-Machine View (机器内部视角)** 或 **Product POV (产品视角)**。
* 描述原料**迎面冲向镜头 (Rushing towards camera)**,制造视觉冲击。
3. **视觉衔接 (Visual Bridge)**:
* Shot N 的结束动作必须能顺滑过渡到 Shot N+1 的开始。
* **关键词**:Product POV, Inside-machine view, Logical consistency, Seamless Transition.
# ==================== 输出格式 (严禁修改) ====================
请严格按照以下格式生成脚本。**格式的准确性直接决定文件名是否正确。**
Shot [序号]/[总数] | [中文标题-用于文件名]
Sora Prompt (English): (Subject/Material): [...] (Scale/Detail): [...] (Env): [...] (Visual_ASMR): [...] (Physics): [...] (Camera): [...] (Audio_Cue): [...]
"""
# ===============================================
def generate_process_architecture(topic, api_key, architect_prompt):
"""阶段一:生成工艺架构"""
if not topic: return "❌ 请先输入产品名称"
if not api_key: return "❌ 请输入 API Key"
print(f"🧠 [{TEXT_MODEL}] 正在调用解压百科全书,构建《{topic}》的生产架构...")
user_content = f"""
Design a Comprehensive Industrial Process for: {topic}.
GOAL: Map the process to the Viral Satisfaction Genres (TikTok/YouTube styles).
REQUIREMENTS:
1. **Flow**: Raw Material -> Prep -> Processing -> Assembly -> Packing.
2. **Diverse Genres**: You MUST use a mix of labels: [SLICE], [CRUSH], [FLOW], [CLEAN], [FIT], [SYNC], [PEEL].
3. **Strict Logic**: Follow the "Cooking Logic" - Step-by-step state changes.
"""
url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
data = {
"model": TEXT_MODEL,
"messages": [
{"role": "system", "content": architect_prompt},
{"role": "user", "content": user_content}
],
"temperature": 0.7,
"max_tokens": 4096
}
try:
response = requests.post(url, headers=headers, json=data, timeout=240)
if response.status_code == 200:
return response.json()['choices'][0]['message']['content']
else:
return f"Error: {response.text}"
except Exception as e:
return f"Request Failed: {e}"
def generate_script_from_architecture(topic, architecture, shot_count, system_prompt, api_key):
"""阶段二:根据架构生成脚本"""
print(f"🧠 [{TEXT_MODEL}] 正在生成爆款解压分镜...")
user_content = f"""
Product: {topic}
Approved Comprehensive Architecture:
{architecture}
Task: Generate exactly {shot_count} shots.
CRITICAL INSTRUCTIONS:
1. **Task Completion Loop**: Every shot description MUST follow the **Start -> Action -> Finish** structure to create an 8-second dynamic loop.
2. **Logic & Continuity**:
- Ensure state persistence (e.g., if sliced in Shot N, it must remain sliced in Shot N+1).
- Use "Product POV" / "Inside-machine view" to create immersion.
3. **Viral Quality**: Reference YouTube/TikTok viral styles (Hydraulic Press, Factory Loops, Macro Slicing).
"""
url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
data = {
"model": TEXT_MODEL,
"messages": [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_content}
],
"temperature": 0.7,
"max_tokens": 16000
}
try:
response = requests.post(url, headers=headers, json=data, timeout=360)
if response.status_code == 200:
return response.json()['choices'][0]['message']['content']
else:
return None
except Exception as e:
return None
def extract_prompts_with_titles(script_text):
"""提取 Prompt 和 标题"""
if not script_text: return []
pattern = r"Shot\s+(\d+).*?\|\s*([^\n]+).*?Sora Prompt \(English\):\s*(.*?)(?=\n\s*Shot|::END::|$)"
matches = re.findall(pattern, script_text, re.DOTALL | re.IGNORECASE)
results = []
for shot_num, title, content in matches:
safe_title = re.sub(r'[\\/*?:"<>|]', "", title).strip().replace(" ", "_")
if len(safe_title) > 40: safe_title = safe_title[:40]
filename_base = f"Shot_{int(shot_num):03d}_{safe_title}"
clean_p = content.replace("\n", " ").replace("**", "").strip()
clean_p = re.sub(r'\s+', ' ', clean_p)
if len(clean_p) > 10:
results.append({"filename": filename_base, "prompt": clean_p})
print(f"🔍 成功解析出 {len(results)} 个带标题的镜头指令")
return results
def generate_single_video_task(prompt, filename_base, save_dir, api_key, topic):
"""生成单视频:注入爆款解压关键词"""
if not prompt: return None
clean_prompt = prompt.replace("--ar 16:9", "").replace("16:9", "")
# 注入 TikTok/YouTube 爆款解压风格词汇
final_prompt = (
f"Wide screen 16x9 video. Manufacturing {topic}. "
f"**Oddly Satisfying Compilation style, Viral TikTok ASMR.** "
f"**Dopamine inducing visuals, Stress relief.** "
f"**High-Key Lighting, Bright and Clean aesthetic.** " # 这种风格通常明亮干净
f"**Tactile Texture, Physics Simulation.** "
f"{clean_prompt} --ar 16x9"
)
url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
data = {
"model": VEO_MODEL,
"messages": [{"role": "user", "content": final_prompt}],
"stream": False, "size": VIDEO_SIZE, "seconds": 8, "aspect_ratio": "16:9"
}
fname = f"{filename_base}.mp4"
save_path = os.path.join(save_dir, fname)
try:
resp = requests.post(url, headers=headers, json=data, timeout=300)
if resp.status_code != 200: return {"status": "error", "msg": f"[{filename_base}] ❌ API Error: {resp.status_code}"}
try:
content = resp.json()['choices'][0]['message']['content']
url_match = re.search(r'(https?://[^\s)"]+)', content)
if not url_match: return {"status": "error", "msg": f"[{filename_base}] ❌ No URL found"}
vid_data = requests.get(url_match.group(1).split(')')[0]).content
with open(save_path, "wb") as f: f.write(vid_data)
return {"status": "success", "file": save_path, "msg": f"✅ 已保存: {fname}"}
except Exception as e: return {"status": "error", "msg": f"[{filename_base}] ❌ Parse Error: {e}"}
except Exception as e: return {"status": "error", "msg": f"[{filename_base}] ❌ Network Error: {e}"}
# ================= 流程函数 =================
def step2_generate_script(topic, architecture, count, system_prompt, api_key):
"""阶段二:只生成脚本"""
if not architecture: return "❌ 请先生成并确认工艺架构", None
if not api_key: return "❌ 请先输入 API Key", None
logs = [f"🚀 正在为《{topic}》生成 {count} 个全流派解压分镜..."]
print("\n".join(logs))
script_text = generate_script_from_architecture(topic, architecture, count, system_prompt, api_key)
if not script_text:
logs.append("❌ 脚本生成失败,请重试。")
return "\n".join(logs), None
prompts_data = extract_prompts_with_titles(script_text)
logs.append(f"✅ 脚本生成成功!")
logs.append(f"🔍 预解析结果:检测到 {len(prompts_data)} 个有效镜头。")
logs.append(f"👉 请检查脚本,确认是否包含 [SLICE], [FLOW], [FIT] 等多种解压风格。")
return "\n".join(logs), script_text
def step3_generate_videos(topic, script_text, api_key, progress=gr.Progress()):
"""阶段三:批量生成视频"""
if not script_text: yield "❌ 脚本内容为空,请先执行第二步", None, None; return
if not api_key: yield "❌ 请先输入 API Key", None, None; return
timestamp = int(time.time())
safe_topic = re.sub(r'[\\/*?:"<>|]', "", topic).replace(" ", "_") if topic else "Untitled"
base_dir = "AutoSaved_Videos"
session_dir = os.path.join(base_dir, f"{safe_topic}_{timestamp}")
os.makedirs(session_dir, exist_ok=True)
logs = [f"🚀 启动视频生成任务...", f"📂 归档目录: {os.path.abspath(session_dir)}"]
yield "\n".join(logs), None, None
with open(os.path.join(session_dir, "script.txt"), "w", encoding="utf-8") as f:
f.write(script_text)
prompts_data = extract_prompts_with_titles(script_text)
if not prompts_data:
logs.append("❌ 脚本格式解析失败,未找到有效 Prompt"); yield "\n".join(logs), None, None; return
logs.append(f"🎥 开始并发渲染 {len(prompts_data)} 个镜头...")
yield "\n".join(logs), None, None
work_list = prompts_data
generated_files = []
with ThreadPoolExecutor(max_workers=len(work_list)) as executor:
futures = {
executor.submit(generate_single_video_task, item['prompt'], item['filename'], session_dir, api_key, topic): item['filename']
for item in work_list
}
completed = 0
for future in as_completed(futures):
res = future.result()
completed += 1
progress(completed/len(work_list), desc=f"渲染中 {completed}/{len(work_list)}")
if res:
logs.append(res['msg'])
if res['status'] == 'success': generated_files.append(res['file'])
yield "\n".join(logs), generated_files, None
if generated_files:
generated_files.sort()
zip_name = f"{session_dir}.zip"
shutil.make_archive(session_dir, 'zip', session_dir)
logs.append(f"\n🎉 全部完成!已打包 ZIP");
yield "\n".join(logs), generated_files, zip_name
else:
logs.append("\n❌ 全部失败,无视频生成"); yield "\n".join(logs), None, None
# === 界面 ===
with gr.Blocks(title="Veo Viral ASMR (逻辑锁死版)") as app:
gr.Markdown("# 🏭 首席工艺逻辑 & Viral ASMR 工厂")
gr.Markdown("配置:**做菜法则工艺逻辑** + **热门对标(Hydraulic/Factory/Macro)** + **逻辑锁死(POV)**")
with gr.Row(variant="panel"):
api_key_input = gr.Textbox(
label="🔑 API Key (必填)",
value=DEFAULT_API_KEY,
type="password",
placeholder="在此输入您的 API Key"
)
with gr.Row(variant="panel"):
with gr.Column(scale=1):
topic_input = gr.Textbox(label="1. 输入产品名称", placeholder="例如:Soap, Kinetic Sand, Juice, Candy, Gears")
with gr.Accordion("🛠️ 角色1:首席工艺逻辑师 (含ASMR流派)", open=False):
architect_prompt_input = gr.Textbox(label="Architect System Prompt", value=DEFAULT_ARCHITECT_PROMPT, lines=8)
plan_btn = gr.Button("🛠️ 第一步:构建严格工艺流程", variant="secondary")
with gr.Column(scale=2):
architecture_output = gr.Textbox(
label="2. 确认架构 (检查:时间严密性 + 解压标签)",
lines=10,
placeholder="点击左侧按钮生成架构...",
interactive=True
)
with gr.Row():
with gr.Column(scale=1):
count_slider = gr.Slider(minimum=1, maximum=120, value=50, step=1, label="3. 镜头数量")
script_btn = gr.Button("📝 第二步:生成爆款解压脚本", variant="primary")
video_btn = gr.Button("🎬 第三步:开始批量渲染视频", variant="stop")
with gr.Column(scale=2):
with gr.Accordion("🎭 角色2:Viral ASMR 导演 (热门对标+POV)", open=False):
system_prompt_input = gr.Textbox(label="Director System Prompt", value=DEFAULT_DIRECTOR_PROMPT, lines=8)
with gr.Row():
log_out = gr.Textbox(label="运行日志", lines=12)
script_out = gr.Textbox(label="最终脚本", lines=12, interactive=True)
zip_out = gr.File(label="下载生成结果 (文件列表 & ZIP)")
# 绑定事件
plan_btn.click(
generate_process_architecture,
inputs=[topic_input, api_key_input, architect_prompt_input],
outputs=[architecture_output]
)
script_btn.click(
step2_generate_script,
inputs=[topic_input, architecture_output, count_slider, system_prompt_input, api_key_input],
outputs=[log_out, script_out]
)
video_btn.click(
step3_generate_videos,
inputs=[topic_input, script_out, api_key_input],
outputs=[log_out, zip_out, zip_out]
)
app.launch()