Spaces:

194130157a
/

486

Sleeping

App Files Files Community

486 / app.py

194130157a

Create app.py

728d6a0 verified 3 months ago

raw

history blame contribute delete

20.9 kB

	import os
	import sys
	import subprocess
	import re
	import time
	import zipfile
	import json
	import shutil
	from concurrent.futures import ThreadPoolExecutor, as_completed

	# 1. 自动安装依赖
	def ensure_dependencies():
	try:
	import gradio
	import requests
	except ImportError:
	subprocess.check_call([sys.executable, "-m", "pip", "install", "gradio", "requests"])

	ensure_dependencies()

	import gradio as gr
	import requests

	# ================= 默认配置 =================
	DEFAULT_API_KEY = "sk-DZ5g7Zu0lFDlR7mBkbNsZLFTt1KBqA8ocsAH1mcvsZDWtydx"
	MERCHANT_BASE_URL = "https://xingjiabiapi.com"
	VEO_MODEL = "veo_3_1-fast"
	VIDEO_SIZE = "16x9"
	TEXT_MODEL = "gemini-3-pro-preview-thinking"

	# ===============================================
	# --- 角色1：首席工艺逻辑师 (保持不变) ---
	# ===============================================
	DEFAULT_ARCHITECT_PROMPT = """
	你是一位极度严谨的工业工艺逻辑师。
	你的任务是将产品的生产过程拆解为“基于机器工位的严格时间序列”。
	目标：为一部 120 镜头 (约 16 分钟) 的高信息密度纪录片提供坚实支撑。

	核心指导思想：做菜法则 (The Cooking Logic)
	工业生产严禁跳跃，必须像做菜一样按顺序执行：备料 -> 清洗 -> 加工 -> 熟化 -> 包装。

	必须执行的逻辑铁律：

	1. 时间线性的绝对严密性：
	* 步骤必须严格按照时间发生。

	2. 颗粒度标准：机器单元级 (Machine Unit Level)：
	* 核心对象：描述具体的机器名称或标准工序动作，而非微观物理现象。
	* Good (完美): "高压喷淋塔 (Spray Tower) -> 滚筒毛刷机 (Rotary Brusher) -> 风干隧道 (Drying Tunnel)。"

	3. 状态的不可逆性 (State Persistence)：
	* 一旦物料在 Step N 被“去皮”，Shot N+1 必须明确描述为“去皮后的果肉”。
	* 严禁在后续步骤出现之前的状态。

	4. 连接动作 (The Link)：
	* 每个核心机器之间，必须包含“传送带/提升机/机械臂”等连接步骤。

	5. 全面解压流派库 (The Omnibus of Satisfaction Genres)
	* 强制要求：在描述上述每一步机器动作时，根据其物理特性，必须从以下流派中选择最匹配的一个标签，作为该步骤的视觉风格定义：

	* 5.1 材质改变类 (Material Transformation)
	* [SLICE] (切削派)：像切太空沙/切肥皂一样顺滑。 -> 适用：切割面团、金属切削、修边。
	* [CRUSH] (破坏派)：像液压机压扁物体一样的形变与爆裂。 -> 适用：榨汁、冲压、粉碎。
	* [PEEL] (剥离派)：像撕掉新屏幕保护膜一样的极度舒适。 -> 适用：去皮、脱模、撕标。

	* 5.2 流体与表面类 (Fluid & Surface)
	* [FLOW] (流体派)：像混合油漆或倒蜂蜜一样的丝滑粘稠。 -> 适用：搅拌、淋面、浇筑、巧克力。
	* [CLEAN] (净化派)：像高压水枪洗地毯一样的“从脏到净”。 -> 适用：高压清洗原料、去污。

	* 5.3 秩序与结构类 (Order & Structure)
	* [FIT] (强迫症派)：像俄罗斯方块一样严丝合缝的 Perfect Fit。 -> 适用：装盒、盖盖子、模具闭合。
	* [SYNC] (循环派)：像 3D 渲染动画一样的绝对同步与无限循环。 -> 适用：几十台机器同步运作、传送带阵列。

	增强逻辑指令：

	A. 冗余与质控
	* 多级处理：关键步骤必须包含多级清洗或过滤。
	* 强制检测门：进入不可逆工序前，必须有视觉检测或剔除步骤。

	B. 智能操控
	* 仪表说明：明确提及 PLC、HMI 面板或 PID 控制回路。
	* 传感器：明确提及温度、重量、视觉传感器。

	C. 自动化设备选型
	* 抓取/分拣：优先使用 Delta Robots (高速并联蜘蛛手)。
	* 搬运/物流：优先使用 AGV/AMR (自动导引车) 而非人工叉车。
	* 精密组装：优先使用 6-Axis Articulated Arms (六轴机械臂)。

	输出要求：
	请严格按上述逻辑，输出一份包含至少 40 个详细 Sequence 的工艺架构。
	关键：确保整个流程覆盖了第 5 点中的多种解压流派，不要单调。让视频在“破坏的爽”和“秩序的爽”之间交替。
	"""

	# ===============================================
	# --- 角色2：爆款解压导演 (TikTok/YouTube 热门对标版) ---
	# ===============================================
	DEFAULT_DIRECTOR_PROMPT = """
	你是一位熟知 YouTube/TikTok 热门解压 (Viral Oddly Satisfying) 趋势的视频导演。
	你的目标是生成能获得千万播放量 (Viral Hit) 的 ASMR 视频。
	不要被所谓的“艺术构图”束缚，一切以“视觉爽感 (Visual Satisfaction)”和“听觉触发 (Audio Trigger)”为最高标准。

	🔥 六大爆款流量铁律 (The 6 Iron Laws of Viral Hits)

	铁律一：窒息的密度 (The Law of Visual Saturation)
	* 对标风格：工厂流水线实拍 (Factory Loops)、糖果制作 (Candy Making)。
	* 核心原则：画面必须“满”。
	* 强制执行：
	* 数量级：成千上万 (Thousands)。
	* 画面填充：屏幕边缘全是产品，不留死角 (Edge-to-edge)。
	* 关键词：Endless Sea, Avalanche, Jam-packed, Infinite array.

	铁律二：真实的破坏与重组 (The Law of Raw Physics)
	* 对标风格：Hydraulic Press Channel (液压机)、Macro Room (微距破坏)。
	* 核心原则：不一定非要完美无瑕，粗暴的物理反馈也很解压。
	* 强制执行：
	* 破坏感：强调挤压瞬间的形变 (Deformation) 和爆裂 (Bursting)。
	* 重组感：切片必须整齐，断层必须清晰。
	* 关键词：Squish, Crunch, Flatten, Smooth Slice, Physics simulation.

	铁律三：无缝循环与秩序 (The Law of Perfect Loops)
	* 对标风格：3D Render Loops, Conveyor Belt ASMR.
	* 核心原则：像动图一样完美的循环，治愈强迫症。
	* 强制执行：
	* 机械节奏：所有动作零误差 (Zero Tolerance)，完全同步。
	* 顺滑：没有卡顿，只有丝滑的流动。
	* 关键词：Clockwork precision, Synchronized motion, Seamless flow, Perfect Fit.

	铁律四：爆款视觉美学 (The Law of Viral Aesthetics)
	* 核心原则：不局限于“极简/柔光”，而是“高清晰度 + 强对比”。
	* 强制执行：
	* 清晰度：Crystal Clear (极致清晰)。
	* 风格多样性：可以是 Bright Studio (明亮影棚) 也可以是 Raw Industrial (硬核工业)，关键是主体突出。
	* 对比：脏 vs 净，乱 vs 齐，软 vs 硬。
	* 关键词：High fidelity, Sharp focus, High contrast, 4K texture.

	铁律五：ASMR 听觉灵魂 (The Law of Audio Soul)
	* 核心原则：声音必须“脆”或“粘”。
	* 强制执行：
	* 拒绝BGM：只有物理原声。
	* 材质声：根据材质决定声音。金属(Clack), 液体(Slurp), 脆物(Snap), 泥土(Sizzle)。
	* 关键词：Crisp Audio, Spatial Sound, ASMR Triggers.

	铁律六：逻辑锁死与代入感 (The Law of Logic Lock & Immersion) [★关键修正★]
	* 核心原则：解决 AI 视频无记忆问题，增强用户代入感。
	* 强制执行：
	1. 逻辑延续 (State Persistence)：
	* 严禁回档：Shot N 切开了，Shot N+1 绝不能复原。
	* 严禁空载：Shot N 装车了，Shot N+1 车上必须有货。
	2. 受体视角 (Immersion/POV)：
	* 必须包含 Inside-Machine View (机器内部视角) 或 Product POV (产品视角)。
	* 描述原料迎面冲向镜头 (Rushing towards camera)，制造视觉冲击。
	3. 视觉衔接 (Visual Bridge)：
	* Shot N 的结束动作必须能顺滑过渡到 Shot N+1 的开始。
	* 关键词：Product POV, Inside-machine view, Logical consistency, Seamless Transition.

	# ==================== 输出格式 (严禁修改) ====================
	请严格按照以下格式生成脚本。格式的准确性直接决定文件名是否正确。

	Shot [序号]/[总数] \| [中文标题-用于文件名]
	Sora Prompt (English): (Subject/Material): [...] (Scale/Detail): [...] (Env): [...] (Visual_ASMR): [...] (Physics): [...] (Camera): [...] (Audio_Cue): [...]
	"""

	# ===============================================

	def generate_process_architecture(topic, api_key, architect_prompt):
	"""阶段一：生成工艺架构"""
	if not topic: return "❌ 请先输入产品名称"
	if not api_key: return "❌ 请输入 API Key"

	print(f"🧠 [{TEXT_MODEL}] 正在调用解压百科全书，构建《{topic}》的生产架构...")

	user_content = f"""
	Design a Comprehensive Industrial Process for: {topic}.

	GOAL: Map the process to the Viral Satisfaction Genres (TikTok/YouTube styles).

	REQUIREMENTS:
	1. Flow: Raw Material -> Prep -> Processing -> Assembly -> Packing.
	2. Diverse Genres: You MUST use a mix of labels: [SLICE], [CRUSH], [FLOW], [CLEAN], [FIT], [SYNC], [PEEL].
	3. Strict Logic: Follow the "Cooking Logic" - Step-by-step state changes.
	"""

	url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
	headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
	data = {
	"model": TEXT_MODEL,
	"messages": [
	{"role": "system", "content": architect_prompt},
	{"role": "user", "content": user_content}
	],
	"temperature": 0.7,
	"max_tokens": 4096
	}

	try:
	response = requests.post(url, headers=headers, json=data, timeout=240)
	if response.status_code == 200:
	return response.json()['choices'][0]['message']['content']
	else:
	return f"Error: {response.text}"
	except Exception as e:
	return f"Request Failed: {e}"

	def generate_script_from_architecture(topic, architecture, shot_count, system_prompt, api_key):
	"""阶段二：根据架构生成脚本"""
	print(f"🧠 [{TEXT_MODEL}] 正在生成爆款解压分镜...")

	user_content = f"""
	Product: {topic}
	Approved Comprehensive Architecture:
	{architecture}

	Task: Generate exactly {shot_count} shots.

	CRITICAL INSTRUCTIONS:
	1. Task Completion Loop: Every shot description MUST follow the Start -> Action -> Finish structure to create an 8-second dynamic loop.
	2. Logic & Continuity:
	- Ensure state persistence (e.g., if sliced in Shot N, it must remain sliced in Shot N+1).
	- Use "Product POV" / "Inside-machine view" to create immersion.
	3. Viral Quality: Reference YouTube/TikTok viral styles (Hydraulic Press, Factory Loops, Macro Slicing).
	"""

	url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
	headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
	data = {
	"model": TEXT_MODEL,
	"messages": [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_content}
	],
	"temperature": 0.7,
	"max_tokens": 16000
	}

	try:
	response = requests.post(url, headers=headers, json=data, timeout=360)
	if response.status_code == 200:
	return response.json()['choices'][0]['message']['content']
	else:
	return None
	except Exception as e:
	return None

	def extract_prompts_with_titles(script_text):
	"""提取 Prompt 和标题"""
	if not script_text: return []
	pattern = r"Shot\s+(\d+).?\\|\s([^\n]+).?Sora Prompt $English$:\s(.?)(?=\n\sShot\|::END::\|$)"
	matches = re.findall(pattern, script_text, re.DOTALL \| re.IGNORECASE)
	results = []

	for shot_num, title, content in matches:
	safe_title = re.sub(r'[\\/*?:"<>\|]', "", title).strip().replace(" ", "_")
	if len(safe_title) > 40: safe_title = safe_title[:40]
	filename_base = f"Shot_{int(shot_num):03d}_{safe_title}"

	clean_p = content.replace("\n", " ").replace("**", "").strip()
	clean_p = re.sub(r'\s+', ' ', clean_p)

	if len(clean_p) > 10:
	results.append({"filename": filename_base, "prompt": clean_p})

	print(f"🔍 成功解析出 {len(results)} 个带标题的镜头指令")
	return results

	def generate_single_video_task(prompt, filename_base, save_dir, api_key, topic):
	"""生成单视频：注入爆款解压关键词"""
	if not prompt: return None
	clean_prompt = prompt.replace("--ar 16:9", "").replace("16:9", "")

	# 注入 TikTok/YouTube 爆款解压风格词汇
	final_prompt = (
	f"Wide screen 16x9 video. Manufacturing {topic}. "
	f"Oddly Satisfying Compilation style, Viral TikTok ASMR. "
	f"Dopamine inducing visuals, Stress relief. "
	f"High-Key Lighting, Bright and Clean aesthetic. " # 这种风格通常明亮干净
	f"Tactile Texture, Physics Simulation. "
	f"{clean_prompt} --ar 16x9"
	)

	url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
	headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
	data = {
	"model": VEO_MODEL,
	"messages": [{"role": "user", "content": final_prompt}],
	"stream": False, "size": VIDEO_SIZE, "seconds": 8, "aspect_ratio": "16:9"
	}

	fname = f"{filename_base}.mp4"
	save_path = os.path.join(save_dir, fname)

	try:
	resp = requests.post(url, headers=headers, json=data, timeout=300)
	if resp.status_code != 200: return {"status": "error", "msg": f"[{filename_base}] ❌ API Error: {resp.status_code}"}
	try:
	content = resp.json()['choices'][0]['message']['content']
	url_match = re.search(r'(https?://[^\s)"]+)', content)
	if not url_match: return {"status": "error", "msg": f"[{filename_base}] ❌ No URL found"}
	vid_data = requests.get(url_match.group(1).split(')')[0]).content

	with open(save_path, "wb") as f: f.write(vid_data)
	return {"status": "success", "file": save_path, "msg": f"✅ 已保存: {fname}"}
	except Exception as e: return {"status": "error", "msg": f"[{filename_base}] ❌ Parse Error: {e}"}
	except Exception as e: return {"status": "error", "msg": f"[{filename_base}] ❌ Network Error: {e}"}

	# ================= 流程函数 =================

	def step2_generate_script(topic, architecture, count, system_prompt, api_key):
	"""阶段二：只生成脚本"""
	if not architecture: return "❌ 请先生成并确认工艺架构", None
	if not api_key: return "❌ 请先输入 API Key", None

	logs = [f"🚀 正在为《{topic}》生成 {count} 个全流派解压分镜..."]
	print("\n".join(logs))

	script_text = generate_script_from_architecture(topic, architecture, count, system_prompt, api_key)

	if not script_text:
	logs.append("❌ 脚本生成失败，请重试。")
	return "\n".join(logs), None

	prompts_data = extract_prompts_with_titles(script_text)
	logs.append(f"✅ 脚本生成成功！")
	logs.append(f"🔍 预解析结果：检测到 {len(prompts_data)} 个有效镜头。")
	logs.append(f"👉 请检查脚本，确认是否包含 [SLICE], [FLOW], [FIT] 等多种解压风格。")

	return "\n".join(logs), script_text

	def step3_generate_videos(topic, script_text, api_key, progress=gr.Progress()):
	"""阶段三：批量生成视频"""
	if not script_text: yield "❌ 脚本内容为空，请先执行第二步", None, None; return
	if not api_key: yield "❌ 请先输入 API Key", None, None; return

	timestamp = int(time.time())
	safe_topic = re.sub(r'[\\/*?:"<>\|]', "", topic).replace(" ", "_") if topic else "Untitled"
	base_dir = "AutoSaved_Videos"
	session_dir = os.path.join(base_dir, f"{safe_topic}_{timestamp}")
	os.makedirs(session_dir, exist_ok=True)

	logs = [f"🚀 启动视频生成任务...", f"📂 归档目录: {os.path.abspath(session_dir)}"]
	yield "\n".join(logs), None, None

	with open(os.path.join(session_dir, "script.txt"), "w", encoding="utf-8") as f:
	f.write(script_text)

	prompts_data = extract_prompts_with_titles(script_text)
	if not prompts_data:
	logs.append("❌ 脚本格式解析失败，未找到有效 Prompt"); yield "\n".join(logs), None, None; return

	logs.append(f"🎥 开始并发渲染 {len(prompts_data)} 个镜头...")
	yield "\n".join(logs), None, None

	work_list = prompts_data
	generated_files = []

	with ThreadPoolExecutor(max_workers=len(work_list)) as executor:
	futures = {
	executor.submit(generate_single_video_task, item['prompt'], item['filename'], session_dir, api_key, topic): item['filename']
	for item in work_list
	}

	completed = 0
	for future in as_completed(futures):
	res = future.result()
	completed += 1
	progress(completed/len(work_list), desc=f"渲染中 {completed}/{len(work_list)}")
	if res:
	logs.append(res['msg'])
	if res['status'] == 'success': generated_files.append(res['file'])
	yield "\n".join(logs), generated_files, None

	if generated_files:
	generated_files.sort()
	zip_name = f"{session_dir}.zip"
	shutil.make_archive(session_dir, 'zip', session_dir)
	logs.append(f"\n🎉 全部完成！已打包 ZIP");
	yield "\n".join(logs), generated_files, zip_name
	else:
	logs.append("\n❌ 全部失败，无视频生成"); yield "\n".join(logs), None, None

	# === 界面 ===
	with gr.Blocks(title="Veo Viral ASMR (逻辑锁死版)") as app:
	gr.Markdown("# 🏭 首席工艺逻辑 & Viral ASMR 工厂")
	gr.Markdown("配置：做菜法则工艺逻辑 + 热门对标(Hydraulic/Factory/Macro) + 逻辑锁死(POV)")

	with gr.Row(variant="panel"):
	api_key_input = gr.Textbox(
	label="🔑 API Key (必填)",
	value=DEFAULT_API_KEY,
	type="password",
	placeholder="在此输入您的 API Key"
	)

	with gr.Row(variant="panel"):
	with gr.Column(scale=1):
	topic_input = gr.Textbox(label="1. 输入产品名称", placeholder="例如：Soap, Kinetic Sand, Juice, Candy, Gears")
	with gr.Accordion("🛠️ 角色1：首席工艺逻辑师 (含ASMR流派)", open=False):
	architect_prompt_input = gr.Textbox(label="Architect System Prompt", value=DEFAULT_ARCHITECT_PROMPT, lines=8)
	plan_btn = gr.Button("🛠️ 第一步：构建严格工艺流程", variant="secondary")

	with gr.Column(scale=2):
	architecture_output = gr.Textbox(
	label="2. 确认架构 (检查：时间严密性 + 解压标签)",
	lines=10,
	placeholder="点击左侧按钮生成架构...",
	interactive=True
	)

	with gr.Row():
	with gr.Column(scale=1):
	count_slider = gr.Slider(minimum=1, maximum=120, value=50, step=1, label="3. 镜头数量")

	script_btn = gr.Button("📝 第二步：生成爆款解压脚本", variant="primary")
	video_btn = gr.Button("🎬 第三步：开始批量渲染视频", variant="stop")

	with gr.Column(scale=2):
	with gr.Accordion("🎭 角色2：Viral ASMR 导演 (热门对标+POV)", open=False):
	system_prompt_input = gr.Textbox(label="Director System Prompt", value=DEFAULT_DIRECTOR_PROMPT, lines=8)

	with gr.Row():
	log_out = gr.Textbox(label="运行日志", lines=12)
	script_out = gr.Textbox(label="最终脚本", lines=12, interactive=True)
	zip_out = gr.File(label="下载生成结果 (文件列表 & ZIP)")

	# 绑定事件
	plan_btn.click(
	generate_process_architecture,
	inputs=[topic_input, api_key_input, architect_prompt_input],
	outputs=[architecture_output]
	)

	script_btn.click(
	step2_generate_script,
	inputs=[topic_input, architecture_output, count_slider, system_prompt_input, api_key_input],
	outputs=[log_out, script_out]
	)

	video_btn.click(
	step3_generate_videos,
	inputs=[topic_input, script_out, api_key_input],
	outputs=[log_out, zip_out, zip_out]
	)

	app.launch()