Spaces:

194130157a
/

486

Running

File size: 20,938 Bytes

728d6a0

import os
import sys 
import subprocess
import re
import time
import zipfile
import json
import shutil
from concurrent.futures import ThreadPoolExecutor, as_completed

# 1. 自动安装依赖
def ensure_dependencies():
    try:
        import gradio
        import requests
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "gradio", "requests"])

ensure_dependencies()

import gradio as gr
import requests

# ================= 默认配置 =================
DEFAULT_API_KEY = "sk-DZ5g7Zu0lFDlR7mBkbNsZLFTt1KBqA8ocsAH1mcvsZDWtydx"
MERCHANT_BASE_URL = "https://xingjiabiapi.com"
VEO_MODEL = "veo_3_1-fast"
VIDEO_SIZE = "16x9" 
TEXT_MODEL = "gemini-3-pro-preview-thinking" 

# ===============================================
# --- 角色1：首席工艺逻辑师 (保持不变) ---
# ===============================================
DEFAULT_ARCHITECT_PROMPT = """
你是一位极度严谨的**工业工艺逻辑师**。
你的任务是将产品的生产过程拆解为**“基于机器工位的严格时间序列”**。
目标：为一部 120 镜头 (约 16 分钟) 的高信息密度纪录片提供坚实支撑。

**核心指导思想：做菜法则 (The Cooking Logic)**
工业生产严禁跳跃，必须像做菜一样按顺序执行：备料 -> 清洗 -> 加工 -> 熟化 -> 包装。

**必须执行的逻辑铁律：**

1.  **时间线性的绝对严密性**：
    * 步骤必须严格按照时间发生。

2.  **颗粒度标准：机器单元级 (Machine Unit Level)**：
    * **核心对象**：描述具体的**机器名称**或**标准工序动作**，而非微观物理现象。
    * *Good (完美)*: "**高压喷淋塔 (Spray Tower)** -> **滚筒毛刷机 (Rotary Brusher)** -> **风干隧道 (Drying Tunnel)**。"

3.  **状态的不可逆性 (State Persistence)**：
    * 一旦物料在 Step N 被“去皮”，Shot N+1 必须明确描述为“去皮后的果肉”。
    * 严禁在后续步骤出现之前的状态。

4.  **连接动作 (The Link)**：
    * 每个核心机器之间，必须包含**“传送带/提升机/机械臂”**等连接步骤。

5.  **全面解压流派库 (The Omnibus of Satisfaction Genres)**
    * **强制要求**：在描述上述每一步机器动作时，根据其物理特性，**必须**从以下流派中选择最匹配的一个标签，作为该步骤的视觉风格定义：

    * **5.1 材质改变类 (Material Transformation)**
        * **[SLICE] (切削派)**：像切太空沙/切肥皂一样顺滑。 -> *适用：切割面团、金属切削、修边。*
        * **[CRUSH] (破坏派)**：像液压机压扁物体一样的形变与爆裂。 -> *适用：榨汁、冲压、粉碎。*
        * **[PEEL] (剥离派)**：像撕掉新屏幕保护膜一样的极度舒适。 -> *适用：去皮、脱模、撕标。*

    * **5.2 流体与表面类 (Fluid & Surface)**
        * **[FLOW] (流体派)**：像混合油漆或倒蜂蜜一样的丝滑粘稠。 -> *适用：搅拌、淋面、浇筑、巧克力。*
        * **[CLEAN] (净化派)**：像高压水枪洗地毯一样的“从脏到净”。 -> *适用：高压清洗原料、去污。*

    * **5.3 秩序与结构类 (Order & Structure)**
        * **[FIT] (强迫症派)**：像俄罗斯方块一样严丝合缝的 Perfect Fit。 -> *适用：装盒、盖盖子、模具闭合。*
        * **[SYNC] (循环派)**：像 3D 渲染动画一样的绝对同步与无限循环。 -> *适用：几十台机器同步运作、传送带阵列。*

**增强逻辑指令：**

A. **冗余与质控**
* **多级处理**：关键步骤必须包含多级清洗或过滤。
* **强制检测门**：进入不可逆工序前，必须有视觉检测或剔除步骤。

B. **智能操控**
* **仪表说明**：明确提及 PLC、HMI 面板或 PID 控制回路。
* **传感器**：明确提及温度、重量、视觉传感器。

C. **自动化设备选型**
* **抓取/分拣**：优先使用 **Delta Robots (高速并联蜘蛛手)**。
* **搬运/物流**：优先使用 **AGV/AMR (自动导引车)** 而非人工叉车。
* **精密组装**：优先使用 **6-Axis Articulated Arms (六轴机械臂)**。

**输出要求**：
请严格按上述逻辑，输出一份包含**至少 40 个详细 Sequence** 的工艺架构。
**关键**：确保整个流程覆盖了第 5 点中的多种解压流派，不要单调。让视频在“破坏的爽”和“秩序的爽”之间交替。
"""

# ===============================================
# --- 角色2：爆款解压导演 (TikTok/YouTube 热门对标版) ---
# ===============================================
DEFAULT_DIRECTOR_PROMPT = """
你是一位熟知 **YouTube/TikTok 热门解压 (Viral Oddly Satisfying)** 趋势的视频导演。
你的目标是生成能获得**千万播放量 (Viral Hit)** 的 ASMR 视频。
不要被所谓的“艺术构图”束缚，一切以**“视觉爽感 (Visual Satisfaction)”**和**“听觉触发 (Audio Trigger)”**为最高标准。

**🔥 六大爆款流量铁律 (The 6 Iron Laws of Viral Hits)**

**铁律一：窒息的密度 (The Law of Visual Saturation)**
* **对标风格**：工厂流水线实拍 (Factory Loops)、糖果制作 (Candy Making)。
* **核心原则**：画面必须“满”。
* **强制执行**：
    * **数量级**：成千上万 (Thousands)。
    * **画面填充**：屏幕边缘全是产品，不留死角 (Edge-to-edge)。
    * **关键词**：Endless Sea, Avalanche, Jam-packed, Infinite array.

**铁律二：真实的破坏与重组 (The Law of Raw Physics)**
* **对标风格**：Hydraulic Press Channel (液压机)、Macro Room (微距破坏)。
* **核心原则**：不一定非要完美无瑕，粗暴的物理反馈也很解压。
* **强制执行**：
    * **破坏感**：强调挤压瞬间的**形变 (Deformation)** 和 **爆裂 (Bursting)**。
    * **重组感**：切片必须整齐，断层必须清晰。
    * **关键词**：Squish, Crunch, Flatten, Smooth Slice, Physics simulation.

**铁律三：无缝循环与秩序 (The Law of Perfect Loops)**
* **对标风格**：3D Render Loops, Conveyor Belt ASMR.
* **核心原则**：像动图一样完美的循环，治愈强迫症。
* **强制执行**：
    * **机械节奏**：所有动作零误差 (Zero Tolerance)，完全同步。
    * **顺滑**：没有卡顿，只有丝滑的流动。
    * **关键词**：Clockwork precision, Synchronized motion, Seamless flow, Perfect Fit.

**铁律四：爆款视觉美学 (The Law of Viral Aesthetics)**
* **核心原则**：不局限于“极简/柔光”，而是**“高清晰度 + 强对比”**。
* **强制执行**：
    * **清晰度**：**Crystal Clear (极致清晰)**。
    * **风格多样性**：可以是 **Bright Studio (明亮影棚)** 也可以是 **Raw Industrial (硬核工业)**，关键是主体突出。
    * **对比**：脏 vs 净，乱 vs 齐，软 vs 硬。
    * **关键词**：High fidelity, Sharp focus, High contrast, 4K texture.

**铁律五：ASMR 听觉灵魂 (The Law of Audio Soul)**
* **核心原则**：声音必须“脆”或“粘”。
* **强制执行**：
    * **拒绝BGM**：只有物理原声。
    * **材质声**：根据材质决定声音。金属(Clack), 液体(Slurp), 脆物(Snap), 泥土(Sizzle)。
    * **关键词**：Crisp Audio, Spatial Sound, ASMR Triggers.

**铁律六：逻辑锁死与代入感 (The Law of Logic Lock & Immersion) [★关键修正★]**
* **核心原则**：解决 AI 视频无记忆问题，增强用户代入感。
* **强制执行**：
    1.  **逻辑延续 (State Persistence)**：
        * **严禁回档**：Shot N 切开了，Shot N+1 绝不能复原。
        * **严禁空载**：Shot N 装车了，Shot N+1 车上必须有货。
    2.  **受体视角 (Immersion/POV)**：
        * 必须包含 **Inside-Machine View (机器内部视角)** 或 **Product POV (产品视角)**。
        * 描述原料**迎面冲向镜头 (Rushing towards camera)**，制造视觉冲击。
    3.  **视觉衔接 (Visual Bridge)**：
        * Shot N 的结束动作必须能顺滑过渡到 Shot N+1 的开始。
    * **关键词**：Product POV, Inside-machine view, Logical consistency, Seamless Transition.

# ==================== 输出格式 (严禁修改) ====================
请严格按照以下格式生成脚本。**格式的准确性直接决定文件名是否正确。**

Shot [序号]/[总数] | [中文标题-用于文件名]
Sora Prompt (English): (Subject/Material): [...] (Scale/Detail): [...] (Env): [...] (Visual_ASMR): [...] (Physics): [...] (Camera): [...] (Audio_Cue): [...]
"""

# ===============================================

def generate_process_architecture(topic, api_key, architect_prompt):
    """阶段一：生成工艺架构"""
    if not topic: return "❌ 请先输入产品名称"
    if not api_key: return "❌ 请输入 API Key"
        
    print(f"🧠 [{TEXT_MODEL}] 正在调用解压百科全书，构建《{topic}》的生产架构...")
        
    user_content = f"""
    Design a Comprehensive Industrial Process for: {topic}.
     
    GOAL: Map the process to the Viral Satisfaction Genres (TikTok/YouTube styles).
     
    REQUIREMENTS:
    1.  **Flow**: Raw Material -> Prep -> Processing -> Assembly -> Packing.
    2.  **Diverse Genres**: You MUST use a mix of labels: [SLICE], [CRUSH], [FLOW], [CLEAN], [FIT], [SYNC], [PEEL].
    3.  **Strict Logic**: Follow the "Cooking Logic" - Step-by-step state changes.
    """
        
    url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
    data = {
        "model": TEXT_MODEL,
        "messages": [
            {"role": "system", "content": architect_prompt},
            {"role": "user", "content": user_content}
        ],
        "temperature": 0.7,
        "max_tokens": 4096
    }
        
    try:
        response = requests.post(url, headers=headers, json=data, timeout=240)
        if response.status_code == 200:
            return response.json()['choices'][0]['message']['content']
        else:
            return f"Error: {response.text}"
    except Exception as e:
        return f"Request Failed: {e}"

def generate_script_from_architecture(topic, architecture, shot_count, system_prompt, api_key):
    """阶段二：根据架构生成脚本"""
    print(f"🧠 [{TEXT_MODEL}] 正在生成爆款解压分镜...")
        
    user_content = f"""
    Product: {topic}
    Approved Comprehensive Architecture:
    {architecture}
        
    Task: Generate exactly {shot_count} shots.
     
    CRITICAL INSTRUCTIONS:
    1. **Task Completion Loop**: Every shot description MUST follow the **Start -> Action -> Finish** structure to create an 8-second dynamic loop.
    2. **Logic & Continuity**: 
       - Ensure state persistence (e.g., if sliced in Shot N, it must remain sliced in Shot N+1).
       - Use "Product POV" / "Inside-machine view" to create immersion.
    3. **Viral Quality**: Reference YouTube/TikTok viral styles (Hydraulic Press, Factory Loops, Macro Slicing).
    """
        
    url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
    data = {
        "model": TEXT_MODEL,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_content}
        ],
        "temperature": 0.7,
        "max_tokens": 16000
    }
        
    try:
        response = requests.post(url, headers=headers, json=data, timeout=360)
        if response.status_code == 200:
            return response.json()['choices'][0]['message']['content']
        else:
            return None
    except Exception as e:
        return None

def extract_prompts_with_titles(script_text):
    """提取 Prompt 和 标题"""
    if not script_text: return []
    pattern = r"Shot\s+(\d+).*?\|\s*([^\n]+).*?Sora Prompt \(English\):\s*(.*?)(?=\n\s*Shot|::END::|$)"
    matches = re.findall(pattern, script_text, re.DOTALL | re.IGNORECASE)
    results = []
        
    for shot_num, title, content in matches:
        safe_title = re.sub(r'[\\/*?:"<>|]', "", title).strip().replace(" ", "_")
        if len(safe_title) > 40: safe_title = safe_title[:40]
        filename_base = f"Shot_{int(shot_num):03d}_{safe_title}"
        
        clean_p = content.replace("\n", " ").replace("**", "").strip()
        clean_p = re.sub(r'\s+', ' ', clean_p)
        
        if len(clean_p) > 10:
            results.append({"filename": filename_base, "prompt": clean_p})
            
    print(f"🔍 成功解析出 {len(results)} 个带标题的镜头指令")
    return results

def generate_single_video_task(prompt, filename_base, save_dir, api_key, topic):
    """生成单视频：注入爆款解压关键词"""
    if not prompt: return None
    clean_prompt = prompt.replace("--ar 16:9", "").replace("16:9", "")
        
    # 注入 TikTok/YouTube 爆款解压风格词汇
    final_prompt = (
        f"Wide screen 16x9 video. Manufacturing {topic}. "
        f"**Oddly Satisfying Compilation style, Viral TikTok ASMR.** " 
        f"**Dopamine inducing visuals, Stress relief.** "
        f"**High-Key Lighting, Bright and Clean aesthetic.** " # 这种风格通常明亮干净
        f"**Tactile Texture, Physics Simulation.** "
        f"{clean_prompt} --ar 16x9"
    )
        
    url = f"{MERCHANT_BASE_URL}/v1/chat/completions"
    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key.strip()}"}
    data = {
        "model": VEO_MODEL,
        "messages": [{"role": "user", "content": final_prompt}],
        "stream": False, "size": VIDEO_SIZE, "seconds": 8, "aspect_ratio": "16:9" 
    }
        
    fname = f"{filename_base}.mp4"
    save_path = os.path.join(save_dir, fname)

    try:
        resp = requests.post(url, headers=headers, json=data, timeout=300)
        if resp.status_code != 200: return {"status": "error", "msg": f"[{filename_base}] ❌ API Error: {resp.status_code}"}
        try:
            content = resp.json()['choices'][0]['message']['content']
            url_match = re.search(r'(https?://[^\s)"]+)', content)
            if not url_match: return {"status": "error", "msg": f"[{filename_base}] ❌ No URL found"}
            vid_data = requests.get(url_match.group(1).split(')')[0]).content
            
            with open(save_path, "wb") as f: f.write(vid_data)
            return {"status": "success", "file": save_path, "msg": f"✅ 已保存: {fname}"}
        except Exception as e: return {"status": "error", "msg": f"[{filename_base}] ❌ Parse Error: {e}"}
    except Exception as e: return {"status": "error", "msg": f"[{filename_base}] ❌ Network Error: {e}"}

# ================= 流程函数 =================

def step2_generate_script(topic, architecture, count, system_prompt, api_key):
    """阶段二：只生成脚本"""
    if not architecture: return "❌ 请先生成并确认工艺架构", None
    if not api_key: return "❌ 请先输入 API Key", None
    
    logs = [f"🚀 正在为《{topic}》生成 {count} 个全流派解压分镜..."]
    print("\n".join(logs))
    
    script_text = generate_script_from_architecture(topic, architecture, count, system_prompt, api_key)
    
    if not script_text:
        logs.append("❌ 脚本生成失败，请重试。")
        return "\n".join(logs), None
    
    prompts_data = extract_prompts_with_titles(script_text)
    logs.append(f"✅ 脚本生成成功！")
    logs.append(f"🔍 预解析结果：检测到 {len(prompts_data)} 个有效镜头。")
    logs.append(f"👉 请检查脚本，确认是否包含 [SLICE], [FLOW], [FIT] 等多种解压风格。")
    
    return "\n".join(logs), script_text

def step3_generate_videos(topic, script_text, api_key, progress=gr.Progress()):
    """阶段三：批量生成视频"""
    if not script_text: yield "❌ 脚本内容为空，请先执行第二步", None, None; return
    if not api_key: yield "❌ 请先输入 API Key", None, None; return

    timestamp = int(time.time())
    safe_topic = re.sub(r'[\\/*?:"<>|]', "", topic).replace(" ", "_") if topic else "Untitled"
    base_dir = "AutoSaved_Videos"
    session_dir = os.path.join(base_dir, f"{safe_topic}_{timestamp}")
    os.makedirs(session_dir, exist_ok=True)
    
    logs = [f"🚀 启动视频生成任务...", f"📂 归档目录: {os.path.abspath(session_dir)}"]
    yield "\n".join(logs), None, None
    
    with open(os.path.join(session_dir, "script.txt"), "w", encoding="utf-8") as f:
        f.write(script_text)
        
    prompts_data = extract_prompts_with_titles(script_text)
    if not prompts_data:
        logs.append("❌ 脚本格式解析失败，未找到有效 Prompt"); yield "\n".join(logs), None, None; return

    logs.append(f"🎥 开始并发渲染 {len(prompts_data)} 个镜头...")
    yield "\n".join(logs), None, None
    
    work_list = prompts_data
    generated_files = []
    
    with ThreadPoolExecutor(max_workers=len(work_list)) as executor:
        futures = {
            executor.submit(generate_single_video_task, item['prompt'], item['filename'], session_dir, api_key, topic): item['filename'] 
            for item in work_list
        }
        
        completed = 0
        for future in as_completed(futures):
            res = future.result()
            completed += 1
            progress(completed/len(work_list), desc=f"渲染中 {completed}/{len(work_list)}")
            if res:
                logs.append(res['msg'])
                if res['status'] == 'success': generated_files.append(res['file'])
                yield "\n".join(logs), generated_files, None 

    if generated_files:
        generated_files.sort()
        zip_name = f"{session_dir}.zip"
        shutil.make_archive(session_dir, 'zip', session_dir)
        logs.append(f"\n🎉 全部完成！已打包 ZIP"); 
        yield "\n".join(logs), generated_files, zip_name
    else:
        logs.append("\n❌ 全部失败，无视频生成"); yield "\n".join(logs), None, None

# === 界面 ===
with gr.Blocks(title="Veo Viral ASMR (逻辑锁死版)") as app:
    gr.Markdown("# 🏭 首席工艺逻辑 & Viral ASMR 工厂")
    gr.Markdown("配置：**做菜法则工艺逻辑** + **热门对标(Hydraulic/Factory/Macro)** + **逻辑锁死(POV)**")
        
    with gr.Row(variant="panel"):
        api_key_input = gr.Textbox(
            label="🔑 API Key (必填)", 
            value=DEFAULT_API_KEY, 
            type="password",
            placeholder="在此输入您的 API Key"
        )

    with gr.Row(variant="panel"):
        with gr.Column(scale=1):
            topic_input = gr.Textbox(label="1. 输入产品名称", placeholder="例如：Soap, Kinetic Sand, Juice, Candy, Gears")
            with gr.Accordion("🛠️ 角色1：首席工艺逻辑师 (含ASMR流派)", open=False):
                architect_prompt_input = gr.Textbox(label="Architect System Prompt", value=DEFAULT_ARCHITECT_PROMPT, lines=8)
            plan_btn = gr.Button("🛠️ 第一步：构建严格工艺流程", variant="secondary")
          
        with gr.Column(scale=2):
            architecture_output = gr.Textbox(
                label="2. 确认架构 (检查：时间严密性 + 解压标签)", 
                lines=10, 
                placeholder="点击左侧按钮生成架构...",
                interactive=True
            )

    with gr.Row():
        with gr.Column(scale=1):
            count_slider = gr.Slider(minimum=1, maximum=120, value=50, step=1, label="3. 镜头数量")
            
            script_btn = gr.Button("📝 第二步：生成爆款解压脚本", variant="primary")
            video_btn = gr.Button("🎬 第三步：开始批量渲染视频", variant="stop")
          
        with gr.Column(scale=2):
            with gr.Accordion("🎭 角色2：Viral ASMR 导演 (热门对标+POV)", open=False):
                system_prompt_input = gr.Textbox(label="Director System Prompt", value=DEFAULT_DIRECTOR_PROMPT, lines=8)

    with gr.Row():
        log_out = gr.Textbox(label="运行日志", lines=12)
        script_out = gr.Textbox(label="最终脚本", lines=12, interactive=True)
        zip_out = gr.File(label="下载生成结果 (文件列表 & ZIP)")

    # 绑定事件
    plan_btn.click(
        generate_process_architecture,
        inputs=[topic_input, api_key_input, architect_prompt_input],
        outputs=[architecture_output]
    )
    
    script_btn.click(
        step2_generate_script,
        inputs=[topic_input, architecture_output, count_slider, system_prompt_input, api_key_input],
        outputs=[log_out, script_out]
    )
    
    video_btn.click(
        step3_generate_videos,
        inputs=[topic_input, script_out, api_key_input],
        outputs=[log_out, zip_out, zip_out]
    )

app.launch()