Spaces:
Running
Running
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from transformers.models.qwen2_5_omni import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniProcessor | |
| import warnings | |
| import os | |
| import time | |
| import re | |
| import base64 | |
| import datetime | |
| import uuid | |
| import logging | |
| from typing import List, Dict, Tuple, Optional | |
| from PIL import Image | |
| from huggingface_hub import snapshot_download | |
| from swift.llm import PtEngine, RequestConfig, InferRequest | |
| # --- 依赖库检查 --- | |
| try: | |
| import cv2 | |
| from moviepy.editor import VideoFileClip, concatenate_videoclips | |
| from openai import OpenAI | |
| from google import genai | |
| from google.genai import types | |
| except ImportError as e: | |
| print(f"❌ 缺少必要库: {e}") | |
| print("请运行: pip install opencv-python moviepy openai google-genai") | |
| cv2 = None | |
| VideoFileClip = None | |
| OpenAI = None | |
| genai = None | |
| # --- 环境设置 --- | |
| os.environ['ENABLE_AUDIO_OUTPUT'] = '0' | |
| os.environ['VIDEO_TOTAL_PIXELS'] = '0' | |
| os.environ['IMAGE_FACTOR'] = '1' | |
| os.environ['MAX_PIXELS'] = '1024' # 降低到最低 | |
| warnings.filterwarnings("ignore") | |
| os.environ['PYTHONWARNINGS'] = 'ignore' | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| LOGGER = logging.getLogger(__name__) | |
| # ========================================== | |
| # PART 0: 配置常量 & Demo 数据 | |
| # ========================================== | |
| # 1. 模型参数配置 | |
| MODEL_CONFIGS = { | |
| "sora-2": { | |
| "sizes": ["1792x1024", "1024x1792", "1280x720", "720x1280"], | |
| "seconds_range": {"minimum": 4, "maximum": 12, "step": 4, "value": 4}, | |
| "seconds_label": "单镜时长 (Sora: 4/8/12秒)" | |
| }, | |
| "sora-2-pro": { | |
| "sizes": ["1792x1024", "1024x1792", "1280x720", "720x1280"], | |
| "seconds_range": {"minimum": 4, "maximum": 12, "step": 4, "value": 4}, | |
| "seconds_label": "单镜时长 (Sora Pro: 4/8/12秒)" | |
| }, | |
| "veo-3.1": { | |
| "sizes": ["1080p", "720p"], | |
| "seconds_range": {"minimum": 4, "maximum": 8, "step": 2, "value": 4}, | |
| "seconds_label": "单镜时长 (Veo: 4/6/8秒)" | |
| } | |
| } | |
| # 2. 提示词与风格 | |
| CONTINUITY_PROMPT = ( | |
| "保持统一的视觉风格与世界观,场景与光影保持稳定,角色服装、发型、体型与表情连贯,仅根据剧情调整动作;" | |
| "如果有参考图片,请严格保持人物形象与参考图一致,人物站位不得变化,镜头衔接需流畅自然。" | |
| "旁白不需要朗读或配音,仅作为剧情提示使用。要求视频生成的最后一帧要展示所有人物的正面形象和此时的站位。" | |
| ) | |
| STYLE_PROMPTS = { | |
| "Anime (二次元)": "整体画面要求:高质量二次元动漫渲染风格,角色为手绘动漫人物,肤色与材质为动画质感,背景为虚构的动画场景;禁止出现写实/真人或真实摄影元素。", | |
| "Realistic (写实)": "整体画面要求:高写实摄影风格,人物与环境光影细节丰富,材质与质感贴近真实世界,禁止出现卡通或夸张笔触,确保色彩与光线符合真实物理规律。", | |
| "Animated (动画/3D)": "整体画面要求:动画/卡通风格,支持二维或三维渲染,人物线条与轮廓清晰,色彩饱和且富有层次,可适当夸张动作与表情。", | |
| "Painterly (艺术/绘画)": "整体画面要求:艺术绘画风格,可呈现厚重笔触或水彩晕染质感,允许保留艺术性的纹理与笔法痕迹,整体色彩与构图需统一。", | |
| "Abstract (抽象/实验)": "整体画面要求:抽象/实验风格,鼓励运用超现实、故障艺术或非传统构图手法,可打破写实规律,突出视觉冲击力与创意表现。" | |
| } | |
| STYLE_KEYS = list(STYLE_PROMPTS.keys()) | |
| # 3. Demo 案例数据(保持不变) | |
| DEMO_DATA = [ | |
| { | |
| "file": "demo1.mp4", | |
| "title": "案例 1", | |
| "script": "[Dialogue]: 1. [0-9 seconds] (Camera movement: Handheld + long shot - medium close-up - long shot) In a forest clearing, Su Luo paces anxiously, the protagonist stands still; a medium close-up shot with a camera in hand, Su Luo scratches his hair and kicks a stone: "I really shouldn't have agreed to Qin Fei, how can I find all three time boxes in such a short time!" He stomps his foot, then cuts back to a long shot. 2. [9-18 seconds] (Shallow depth of field + long shot - medium shot - long shot) The protagonist approaches, a medium shot over the shoulder in out of focus asks: "What is a time box?" Su Luo turns around and professionally gestures: "A cloud silk control group countdown mechanism box." The two stand facing each other in a long shot. 3. [18-26 seconds] (Tilt + long shot - close-up - long shot) The long shot continues, then a close-up shot of Su Luo in a scholarly manner: "It can only be opened at a specified time, so it's called a time box." The camera pans down to the gesture and then back to the face, then the long shot ends. 4. [26-36 seconds] (Arc + long shot - medium close-up - long shot) Su Luo exaggerates in the long shot, surrounded by a medium close-up: "Qin Fei buried three boxes saying he couldn't find them, I patted my chest on the spot: Su Luo, the expert in local customs, will get them all done in an hour!" Chin raised, chest patted, still proud in the long shot. 5. [36-44 seconds] (Shaky + long shot - medium shot - long shot) The protagonist in the medium shot teases: "Seeing how anxious you are, you haven't finished, have you?" Su Luo in the background instantly deflates, shoulders droop, slightly swaying to indicate guilt, long shot lowers head. 6. [44-53 seconds] (Pan + long shot - close-up - long shot) Long shot lowers head, close-up Su Luo rubs the corner of his clothes and whispers: "Who would have thought he would cheat with the boxes... help me?" He looks up and moves to make eye contact, long shot meets eyes. 7. [53-62 seconds] (Crash Zoom + Long Shot - Medium Shot - Long Shot) The protagonist nods and smiles in the long shot, then a quick push to the medium shot closes up. Su Luo exclaims with delight, "Great! I'll leave those two directions to you, I'll handle the rest!" She pounds her chest and points into the distance, the protagonist in the long shot exhaling and setting off. 【Character Design】 Su Luo: Medium height and light build, large eyes, flaxen brown hair, messy ponytail, short jacket, tools, belt, emotions instantly revealed. Protagonist: Medium height and upright, soft dark hair, simple dark-colored clothing, reserved and calm, nodding slightly. 【Scene】Afternoon dappled sunlight in a forest clearing, pebbles, fallen leaves, quiet trees, transitioning from anxiety to relaxed cooperation. 【Positioning】 1. Su Luo walks back and forth in the center of the clearing, the protagonist at the edge, at a distance. 2. The protagonist takes two or three steps closer to face her. 3-5. Maintaining close distance, Su Luo faces the protagonist/turns sideways and lowers her head. 6. They look at each other again. 7. Su Luo points to the distance." | |
| }, | |
| { | |
| "file": "demo2.mp4", | |
| "title": "案例 2", | |
| "script": "【对话】:1. [0-8秒](运镜类型:Handheld Camera Effect+全景镜头、中景镜头)\n故事情节:远景镜头,幽暗的遗迹室内,蒋风正俯身在一块散发着微光的古代石碑(指引图)前。主角从阴影中走出,站定在他身后几步远。镜头切换为手持拍摄的中景镜头,跟随主角的视线,画面有轻微晃动,聚焦在主角锐利的眼神上。主角双臂环抱,带着审视的口吻质问:“你在做什么?指引图是不能随便篡改的。”声音打破了室内的寂静。结尾回到远景,主角保持质问的姿态,蒋风的背影僵住。\n\n2. [8-16秒](运镜类型:Arc Shot+全景镜头、中景镜头)\n故事情节:远景镜头,蒋风缓缓转过身。镜头以一个平滑的弧度围绕蒋风移动,切换为中景镜头。他看到主角时明显一愣,双手下意识地抬起,掌心向前,做出一个无辜且防御的姿态,眼神慌乱地解释:“篡改指引图?不不不,你误会了。”他的表情诚恳又急切。结尾远景,两人对峙,气氛紧张。\n\n3. [16-24秒](运'运镜类型:Shallow Depth of Field+全景镜头、中近景镜头)\n故事情节:远景镜头,蒋风放下了手,姿态变得谦卑。镜头切换为中近景,焦点落在蒋风身上,他略带窘迫地笑了笑,背景中的主角身影变得模糊。他一边说一边用手比划着自己:“我才加入风物家没多久,哪有这个本事能篡改它。”结尾回到远景,蒋风仍在解释,主角静静地听着,没有打断。\n\n4. [24-33秒](运镜类型:Tilt Shot+全景镜头、特写镜头)\n故事情节:远景镜头,蒋风再次转向指引图。镜头给到蒋风的中近景,他伸出手指,小心翼翼地指向石碑上的一个发光符文,但并未触碰:“我只是想查看指引图上的身份印鉴。”镜头向下倾斜,给到他手指所指之处的符文一个特写,符文复杂而古老。主角的声音从画外传来,带着一丝疑惑:“身份印鉴?”结尾回到远景,主角微微探身,视线也落在了那个符文上。\n\n5. [33-42秒](运镜类型:Panning Shot+全景镜头、近景镜头)\n故事情节:远景镜头,两人都注视着指引图。镜头切换为近景,从蒋风的侧脸开始,他温和地解释着:“嗯,就是一种类似签名的东西。”镜头缓缓横移,扫过石碑上更多类似签名的印鉴,光芒流转。他的声音变得低沉而充满怀念:“在考古界,早期开荒的人员有权在指引图上留下自己的名字,我们称之为身份印鉴。”镜头移回,定格在他充满希冀的眼神上:“我想看看这些指引图上有没有我父亲的名字。”结尾回到远景,整个房间的氛围因这番话而悄然改变。\n\n6. [42-51秒](运镜类型:Lens Flare+全景镜头、中景镜头)\n故事情节:远景镜头,蒋风垂下目光。镜头切换为中景,他背对着石碑,仿佛陷入了久远的回忆,一道柔和的镜头光晕扫过画面,他眼神飘向远方,带着一丝不易察觉的落寞:“我父亲是主攻考古的风物家,但他常年在外勘察……我已经很久很久没见到他了。”结尾回到远景,主角的注意力已经完全从石碑转移到了蒋风身上。\n\n7. [51-59秒](运镜类型:Deep Depth of Field+全景镜头、中近景镜头)\n故事情节:远景镜头,蒋风转过头,重新看向主角。镜头切为中近景,景深拉远,我们能清晰看到前景中蒋风努力挤出一个微笑,眼神却流露着不确定,以及背景里主角严肃倾听的轮廓。蒋风说:“母亲说稷下不少的开荒考古是他完成的,我想看看是不是他真的来过。”结尾回到远景,蒋风的微笑显得有些无力。\n\n8. [59-67秒](运镜类型:Shallow Depth of Field+全景镜头、特写镜头)\n故事情节:远景镜头,室内一片沉寂。镜头推进到蒋风脸部的特写,极浅的景深模糊了周围的一切,只剩下他复杂的表情。他的笑容消失了,嘴唇微微颤抖:“虽然我相信母亲不会骗我,但……”他停顿了一下,低下头,用几不可闻的声音说出心底的委屈,“哪有人经常在外不回家的。”结尾远景,蒋风低着头,肩膀微微垮下。\n\n9. [67-74秒](运镜类型:Shaky Cam+全景镜头、中近景镜头)\n故事情节:远景镜头,主角打破了沉默。镜头切换为中近景,聚焦在主角身上,轻微的镜头晃动暗示着他内心的触动。他原本锐利的眼神已经完全柔和下来,取而代之的是理解与同情。他轻声问道:“那你找到答案了吗?”结尾远景,听到问话,蒋风缓缓抬起头。\n\n10. [74-82秒](运镜类型:Arc Shot+全景镜头、中景镜头)\n故事情节:远景镜头,两人视线交汇。镜头给到蒋风的中景,他轻轻摇头,眼中闪过一丝失望:“目前还没有,我想多找几个地方再下定论。”随即,他深吸一口气,鼓起勇气向前迈了一小步,镜头以一个微小的弧度跟随着他,增加了请求的郑重感。他恳切地问:“那个……可以拜托你帮我深入遗迹内部看看吗?”结尾远景,两人间的距离缩短了。\n\n11. [82-90秒](运镜类型:Shallow Depth of Field+全景镜头、近景镜头)\n故事情节:远景镜头,主角静待下文。镜头切到蒋风的近景,他有些难为情地低下头,看了看自己无力的双手,再抬头望向主角时,眼神充满了坦诚的无助:“以我的实力,里面的机关人我实在无法应付……更别说接近指引图了。”背景中的主角被虚化,突出了蒋风此刻的窘迫与孤立。结尾回到远景,蒋风的姿态显得格外渺小。\n\n12. [90-98秒](运镜类型:Handheld Camera Effect+全景镜头、中近景镜头)\n故事情节:远景镜头,蒋风等待着判决。镜头切为中近景,手持拍摄的画面极度稳定,仿佛连摄影师都屏住了呼吸。蒋风微微躬身,这是一个郑重的请求:“我想最后再确认一下……”他抬起眼,目光灼灼地直视主角,声音里带着颤音,“能请你帮忙完成我这个心愿吗?”结尾回到远景,空气仿佛凝固了,主角一动不动。\n\n13. [98-102秒](运镜类型:Deep Depth of Field+全景镜头、特写镜头)\n故事情节:远景镜头,主角终于有了动作。镜头给到主角面部特写,他沉默地审视着蒋风的眼睛,几秒钟的权衡之后,嘴角无奈地向上一撇,随即发出一声轻不可闻的叹息。景深拉开,我们能看到他身后不远处,蒋风紧张等待的模糊身影。主角终于开口,语气平淡却掷地有声:“行吧。”结尾回到远景,听到回答的蒋风如释重负地松了口气,紧绷的身体瞬间放松下来。\n【人物形象】:主角:身形挺拔,体态匀称有力,面部轮廓分明,眼神锐利如鹰。留着一头便于打理的深色短发,发丝间或夹杂风霜痕迹。身着深色调、材质耐磨的探险服,肩部和肘部有皮革补丁,腰间挂着若干实用工具包。气质沉稳老练,初期动作多为双臂环抱的审视姿态,后期眼神转为柔和,流露同情与无奈,是一位经验丰富、外冷内热的行动派。\n蒋风:身高略低于主角,体态偏瘦,书生气较重,面部线条柔和,眼神清澈但时常流露慌乱与不确定。发型是略显蓬乱的黑色中短发,似乎无暇打理。穿着一身崭新的“风物家”制服,款式简洁但略显宽大,与身形不甚贴合。气质真诚而笨拙,常有抬手、低头、窘迫微笑等下意识动作,在提及父亲时,会从紧张转为充满希冀与感伤的脆弱,是一位涉世未深的年轻后辈。\n【场景描述】:幽暗的古代遗迹室内,唯一的稳定光源来自一块散发着微光的石碑指引图,石壁上刻有古老符文。场景氛围从初始的紧张对峙,随着角色对话的深入,逐渐转变为充满感伤与理解的静谧与私密。\n【站位】:1:主角站在蒋风身后几步远处,蒋风俯身于石碑前。\n2:蒋风完全转过身,与主角正面相对,形成对峙。\n3:两人保持面对面的站位,距离不变。\n4:蒋风转身面向石碑,主角在其侧后方,视线投向石碑。\n5:两人大致并排,共同注视着石碑。\n6:蒋风背对石碑,面向空旷处;主角从侧面注视着蒋风。\n7:蒋风转身,再次与主角面对面站立。\n8:两人位置不变,蒋风低头,避开主角视线。\n9:蒋风抬头,与主角视线交汇,维持原有距离。\n10:蒋风向前迈出一小步,缩短了与主角的距离。\n11:两人位置不变,蒋风抬头直视主角。\n12:蒋风微微躬身,更显谦卑地仰视着主角。\n13:主角与蒋风保持着略近的面对面距离,主角站姿笔直,蒋风躬身等待。\n" | |
| }, | |
| { | |
| "file": "demo3.mp4", | |
| "title": "案例 3", | |
| "script": "【Dialogue】: 1. [0-9 seconds] (Handheld Camera Effect + panoramic shot, medium close-up shot) A long shot shows the desolate scene after the fierce battle. The protagonist and Meng Ya stand on the right side of the screen, their postures wary; on the left side of the screen, Yuan Xue and Zhao Dongxu lean against a burning wooden stake, panting weakly. The camera moves forward and focuses on Yuan Xue, who is trembling and looking at the protagonist with fear on her face. Her voice trembles: "Thank you so much! Luckily you arrived in time! Otherwise, we would have become a barbecue feast by now..." She finishes speaking and gasps for breath. The camera moves to the protagonist's face. He frowns slightly, his gaze shifts from Yuan Xue to Zhao Dongxu, and asks with concern and professional assessment: "How badly are you injured? Can you still stand up?" The last second returns to the long shot, confirming the four people's positions. 2. [9-17 seconds] (Shallow Depth of Field + panoramic shot, medium shot) In the long shot, Zhao Dongxu maintains a crouching posture. The camera cuts to a medium shot focusing on Zhao Dongxu, blurring the background of the protagonist and the distant firelight. Tears stream down Zhao Dongxu's pale face, a result of his pain. He avoids looking directly at the protagonist, instead looking down at his numb legs, stammering, "I can't feel my legs at all..." His voice is filled with despair and helplessness. The final second returns to a long shot, contrasting Zhao Dongxu's despair with the protagonist's seriousness. 3. [17-25 seconds] (Panning Shot + wide shot, medium shot) In the long shot, Meng Ya steps forward from behind the protagonist. The camera focuses on Meng Ya in a medium shot. He has his arms crossed, a serious expression on his face, and says to Zhao Dongxu and Yuan Xue in an unquestionable tone, "If you're not strong enough, don't try to take on a top-tier commission! Come with us, back to the camp to get treatment from Bai Cao's family." After saying that, he turns to the side and gestures with his chin towards the protagonist. The camera then pans across the protagonist, who is also on guard. Meng Ya continues, "What's wrong with you? Why are you spacing out?" In the last second, the camera returns to a wide shot, showing Meng Ya standing in the middle of the path leading to the protagonist. 4. [25-34 seconds] (Arc Shot + wide shot, close-up shot) In the wide shot, the protagonist ignores Meng Ya and stares ahead. The camera moves slowly in an arc around the protagonist, then zooms in on his face for a close-up. His eyes are sharp and focused, his brow furrowed as he nods and examines his surroundings: “Something’s not right here.” He first senses the temperature of the air, then looks at the remnants of the extinguished flames on the ground. “The temperature is still high, and the flames haven’t died down… Could there be any survivors?” The final second returns to a wide shot, showing the protagonist standing alone at the front of the group, forming a silhouette. 5. [34-41 seconds] (Crash Zoom + wide shot, extreme close-up) In a wide shot, the protagonist’s gaze abruptly shifts to a diagonal angle ahead. Plot: The silhouette of a small aircraft suddenly sweeps past the wreckage. The camera zooms in on the protagonist for an extreme close-up, capturing the violent contraction of his pupils. He gasps, realizing: “And… I’m worried there are still students trapped ahead.” His eyes change from shock to determination. The final second returns to a wide shot, showing the protagonist now fully turned towards the road ahead, taking a step forward without hesitation. 6. [41-49 seconds] (Deep Depth of Field + panoramic shot, medium-long shot) In the long shot, the protagonist walks forward, and Meng Ya stops. The shot switches to a medium-long shot, the depth of field widens, and the foreground shows Meng Ya's back as he turns away. He decisively waves to the protagonist's back and says loudly, "Understood. Let's split up then. I'll escort the wounded to the camp first, and I'll come back to find you later." His walking figure is still clear in the depth of field, showing his determination to do as he says. In the last second, the shot returns to a long shot, and Meng Ya has already walked some distance. 7. [49-57 seconds] (Whip Pan + panoramic shot, medium-long shot) In the long shot, Meng Ya stops after taking a few steps. The camera begins from a distance, slightly behind Meng Ya. He's about to turn around, but gestures with his eyes to the protagonist in front of him. Turning back again, his voice tinged with concern, he says, "Hey, don't go too far! Big Eye Owl will contact you later!" Before he finishes speaking, the camera abruptly pans from his face to the protagonist standing motionless a few steps behind him. The final second returns to a wide shot, showing the two figures, one in front of the other, maintaining a distance suitable for communication, forming the frame. [Character Description]: Yuan Xue: Approximately 165cm tall/slender build/pale, sweaty, and fearful facial features/long, dark hair disheveled/wearing simple, light-colored clothing soaked in sweat/a timid and helpless demeanor, her body language showing trembling and an inability to look directly at others. Main Character: Approximately 180cm tall / Well-proportioned and agile / Distinct facial features, often with a furrowed brow / Short, dark hair, clean and neat / Wears dark tactical uniform, durable and easy to move in / Calm, sharp, and professional demeanor, habitually vigilant, his habitual action of nodding in scrutiny when observing his environment. Meng Ya: Approximately 182cm tall / Sturdy build, confident stance / Strong facial features, serious expression, sharp eyes / Short, spiky black hair / Wears heavily armored combat uniform with numbers, appearing experienced / Demeanor of a young commander, decisive in action, direct in speech, habitually using gestures such as crossing his arms and gesturing with his chin to express his attitude. [Scene Description]: A desolate battlefield wreckage under a clear afternoon sky, the air hot and dry, traces of recently extinguished flames on the ground, scattered burning logs emitting an apocalyptic glow. [Positioning]: 1: The main character and Meng Ya are on the right side of the screen, Yuan Xue and Zhao Dongxu are on the left, slightly behind, leaning against the burning logs. 2. Zhao Dongxu remains crouched on the left, with the protagonist and Meng Ya behind him, remaining alert.3. Meng Ya steps forward from behind the protagonist, standing between the protagonist and Zhao Dongxu, turning sideways towards the protagonist.4. The protagonist stands alone at the front of the group, facing forward; Meng Ya, Yuan Xue, and Zhao Dongxu are behind him.5. The protagonist, who was facing forward, suddenly turns to face diagonally forward.6. The protagonist walks forward, while Meng Ya stops, turns his back to the protagonist, and walks in the opposite direction.7. Meng Ya is in front, and the protagonist is behind, maintaining a distance of several steps, forming a front-and-back formation." | |
| }, | |
| { | |
| "file": "demo4.mp4", | |
| "title": "案例 4", | |
| "script": "【对话】:1. [0-8秒] (Shallow Depth of Field+全景镜头、中近景镜头) 故事情节:远景展示公园小径上,白衫和主角并肩站立,一只胖乎乎的宠物“呱呱”趴在他们脚边的草地上气喘吁吁。镜头随即以浅景深推向白衫的中近景,他低头看着呱呱,脸上交织着无奈与宠溺,接着他抬眼望向主角,问道:“怎么样?呱呱有好好锻炼吗?” 结尾回到远景,主角正准备回答。\n2. [8-17秒] (Handheld Camera Effect+全景镜头、中景镜头) 故事情节:远景中,三人位置不变。镜头切换为手持效果下的中景,画面随着白衫的动作有轻微晃动。他无奈地摊开手,叹了口气:“没办法,之前太惯着它了。” 他的视线落在懒洋洋翻了个身的呱呱身上,语气里满是无可奈何:“现在没有吃的半步也不挪。” 结尾远景,白衫轻轻摇头,主角在一旁安静地听着。\n3. [17-26秒] (Arc Shot+全景镜头、中景镜头) 故事情节:远景展现整个场景。镜头开始围绕白衫进行弧线运动,他蹲下身,温柔地抚摸着呱呱的后背,语气放缓:“不过它今天起码完成了一点运动,晚上允许它多吃一点。” 镜头继续沿弧线转向一旁的主角,他看着这温情的一幕,微笑着点头附和:“今天运动的还不错。” 结尾远景,白衫蹲着,主角站着,形成一高一低的构图。\n4. [26-34秒] (Shallow Depth of Field+全景镜头、近景镜头) 故事情节:远景中,白衫依旧蹲在呱呱身边,主角静立一旁。镜头切入白衫的近景,背景完全虚化。他抚摸呱呱的动作没停,但眼神中浮现出真切的忧虑,声音也低沉下来,充满了担忧:“我知道运动很辛苦,但呱呱真的太胖了,我很怕它胖得生病。” 结尾远景,能看到主角脸上的笑容也收敛了,神情变得严肃。\n5. [34-42秒] (Tilt Shot+全景镜头、中景镜头) 故事情节:远景确认场景站位。镜头从中景开始,从地上心满意足地摇着尾巴的呱呱缓缓向上抬升,最终定格在刚站起身的白衫脸上。他脸上的忧虑一扫而空,转为一种故作爽朗的兴奋,对着呱呱大声宣布:“表现真不错。今晚它可以多吃一点!” 结尾远景,白衫高兴地拍了拍手。\n6. [42-51秒] (Shaky Cam+全景镜头、中近景镜头) 故事情节:远景中,主角看着兴奋的白衫。镜头切换为对准主角的中近景,轻微的摇晃反映出他内心的无语。他看着白衫,嘴角微微抽动,眼神里是哭笑不得的怀疑,几乎是对自己低语:“这么吃还能瘦么……” 随即他像是突然想起了正事,表情一正,视线重新聚焦在白衫身上,问道:“对了,你知道刘叶的情况怎么样了吗?” 结尾远景,主角向前迈了半步,成功转移了话题。\n7. [51-58秒] (Deep Depth of Field+全景镜头、中景镜头) 故事情节:远景中,两人相对而立。镜头切到白衫的中景,听到“刘叶”的名字,他先是愣了一下,随即眼神投向远方,景深变大,背景中的公园路径和行人都变得清晰。他皱眉思索片刻,然后略带歉意地摇了摇头,收回目光:“刘叶?看着是没长高,具体我就不知道了。” 结尾远景,白衫看着主角,摊了摊手表示不知情。\n8. [58-67秒] (Panning Shot+全景镜头、中远景镜头) 故事情节:远景中,三人保持着最后的站位。镜头给到白衫的中远景,他侧过身,抬手指向西边的方向,语气变得热心:“同学要是想知道,不如去找他问问吧。” 镜头随着他的手臂平滑地向西边摇摄,画面中出现一条通往远处开阔广场的小径。“你往西走,他就在那边的广场。” 结尾远景,镜头停下,主角顺着白衫所指的方向望去,若有所思。\n【人物形象】:白衫:身高约180cm,体态匀称修长,略带少年感。面部线条柔和,眉眼清秀,笑起来时眼角有细微纹路。发型是自然的黑色短发,刘海稍长,显得随性。身穿一件干净的白色棉麻衬衫和浅色休闲裤,脚踩白色运动鞋。气质温和亲切,与人交谈时真诚,对待宠物时眼神宠溺,会用摊手、挠头等小动作表达无奈,是个内心细腻的暖男。\n主角:身高与白衫相仿,身形挺拔,站姿稳重。面部轮廓分明,眼神锐利但内敛,表情变化细微,善于观察。发型为深色利落短发,显得干练。穿着深色系的休闲夹克,内搭纯色T恤,下身是工装裤,整体风格偏向实用和低调。气质沉稳,话不多,习惯通过嘴角抽动、眼神聚焦等微表情传递内心活动,行动果断且有目的性。\n呱呱:一只体型极度肥胖的宠物,身躯圆滚滚,四肢短小,趴在地上像个肉球。拥有一双憨态可掬的大眼睛,表情总是懒洋洋的。毛发短而顺滑,脖子上戴着一个简单的项圈。动作迟缓,极度懒散,没有食物的诱惑便不愿动弹,对主人的抚摸会表现出心满意足的样子,是一只被宠坏了的“吃货”。\n【场景描述】:午后阳光明媚的公园草坪,氛围从轻松宠溺的日常,转为对宠物健康的真切担忧,最终变为热心指路的平实交流。主要视觉元素是茵茵绿草、蜿蜒的小径,以及趴在地上一动不动的胖宠物。\n【站位】:1. 白衫与主角并肩站立,呱呱在他们脚边的草地上。\n2. 三人位置不变,白衫面向主角和呱呱。\n3. 白衫蹲在呱呱旁边,主角站在他身侧,形成高低位。\n4. 白衫维持蹲姿,主角站在一旁注视。\n5. 白衫从呱呱身边站起,转身面对呱呱。\n6. 主角面向白衫,两人相对而立,主角向前半步拉近距离。\n7. 两人保持相对站立,白衫短暂望向远方后,目光回到主角身上。\n8. 白衫侧身指向西边,主角随其指向望向同一方向。" | |
| } | |
| ] | |
| # ========================================== | |
| # PART 1: 剧本生成模型 (ScriptAgent) | |
| # ========================================== | |
| from swift.llm import PtEngine, RequestConfig, InferRequest | |
| from swift.plugin import InferStats | |
| import torch | |
| # 全局变量 | |
| MODEL_NAME = "XD-MU/ScriptAgent" | |
| LOCAL_MODEL_PATH = "./downloaded_models/ScriptAgent" | |
| engine = None # InferEngine 对象 | |
| # 确保目录存在 | |
| os.makedirs(LOCAL_MODEL_PATH, exist_ok=True) | |
| def load_llm_model(): | |
| """使用 ms-swift 的 PtEngine 加载量化模型(int4/int8)""" | |
| global engine | |
| if engine is not None: | |
| return | |
| try: | |
| # 检查模型是否存在 | |
| if not os.path.exists(os.path.join(LOCAL_MODEL_PATH, "config.json")): | |
| print(f"正在从 HuggingFace 下载模型到 {LOCAL_MODEL_PATH}...") | |
| snapshot_download( | |
| repo_id=MODEL_NAME, | |
| local_dir=LOCAL_MODEL_PATH, | |
| local_dir_use_symlinks=False, | |
| resume_download=True | |
| ) | |
| print(f"✅ 模型已下载到: {LOCAL_MODEL_PATH}") | |
| else: | |
| print(f"✅ 模型已存在: {LOCAL_MODEL_PATH}") | |
| print("正在加载文本模式(禁用多模态)...") | |
| engine = PtEngine( | |
| model_id_or_path=LOCAL_MODEL_PATH, | |
| torch_dtype=torch.bfloat16, # 使用 bfloat16 更省内存 | |
| max_batch_size=1, | |
| device_map='cpu', | |
| quant_method='bnb', | |
| quantization_bit=4, | |
| model_kwargs={ | |
| 'low_cpu_mem_usage': True, | |
| 'max_memory': {'cpu': '10GB'}, | |
| 'offload_folder': './offload', # 内存不足时卸载到磁盘 | |
| } | |
| ) | |
| print("✅ 文本模式加载完成") | |
| except Exception as e: | |
| print(f"❌ 模型加载失败: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| def chat_with_scriptagent(user_input: str): | |
| """使用 ms-swift InferEngine 与 ScriptAgent 对话生成剧本""" | |
| global engine | |
| if engine is None: | |
| load_llm_model() | |
| if engine is None: | |
| return "❌ 模型加载失败,请检查后台日志。" | |
| user_input = user_input.strip() | |
| if not user_input: | |
| return "请输入内容" | |
| try: | |
| print("🤖 正在使用 ms-swift InferEngine 推理剧本...") | |
| # 🔥 使用 ms-swift 的推理方式 | |
| # 1. 构建消息格式 | |
| messages = [{'role': 'user', 'content': user_input}] | |
| infer_request = InferRequest(messages=messages) | |
| # 2. 配置请求参数 | |
| request_config = RequestConfig( | |
| max_tokens=4096, # 最大生成token数 | |
| temperature=0.7, # 温度参数 | |
| top_p=0.9, # top_p 采样 | |
| repetition_penalty=1.1, # 重复惩罚 | |
| stream=False, # 不使用流式输出 | |
| ) | |
| # 3. 执行推理 | |
| metric = InferStats() | |
| resp_list = engine.infer([infer_request], request_config, metrics=[metric]) | |
| # 4. 提取结果 | |
| response = resp_list[0].choices[0].message.content | |
| # 5. 打印性能指标(可选) | |
| print(f"✅ 生成完成 | 指标: {metric.compute()}") | |
| print(f"✅ 生成结果长度: {len(response)} 字符") | |
| return response if response else "⚠️ 生成为空,请重试" | |
| except Exception as e: | |
| print(f"❌ 生成出错: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return f"生成失败: {str(e)}" | |
| # ========================================== | |
| # PART 2: 视频生成 API 封装 | |
| # ========================================== | |
| class OpenAISoraAPI: | |
| """OpenAI Sora API 封装""" | |
| def __init__(self, api_key: str): | |
| if OpenAI is None: | |
| raise RuntimeError("未安装 openai 库,请运行: pip install openai") | |
| self.client = OpenAI(api_key=api_key) | |
| def generate_video( | |
| self, | |
| prompt: str, | |
| output_path: str, | |
| model: str, | |
| size: str, | |
| seconds: int, | |
| ref_img_path: str = None | |
| ) -> Optional[str]: | |
| """ | |
| 生成视频 | |
| 返回: None (成功) 或 错误信息字符串 | |
| """ | |
| try: | |
| LOGGER.info(f"🎬 Sora API 调用: {model} | {size} | {seconds}秒") | |
| # 构建请求参数 | |
| kwargs = { | |
| "model": model, | |
| "prompt": prompt, | |
| "size": size, | |
| "seconds": str(seconds), | |
| } | |
| # 添加参考图片(如果有) | |
| if ref_img_path and os.path.exists(ref_img_path): | |
| with open(ref_img_path, 'rb') as f: | |
| kwargs["input_reference"] = f | |
| # 创建视频任务 | |
| video_job = self.client.videos.create(**kwargs) | |
| # 轮询任务状态 | |
| while video_job.status in ["queued", "processing"]: | |
| LOGGER.info(f"⏳ 视频生成中... 进度: {video_job.progress}%") | |
| time.sleep(10) | |
| video_job = self.client.videos.retrieve(video_job.id) | |
| # 检查任务状态 | |
| if video_job.status == "completed": | |
| # 下载视频 | |
| video_url = video_job.url | |
| import requests | |
| video_data = requests.get(video_url).content | |
| with open(output_path, 'wb') as f: | |
| f.write(video_data) | |
| LOGGER.info(f"✅ 视频已保存: {output_path}") | |
| return None | |
| else: | |
| error_msg = f"视频生成失败,状态: {video_job.status}" | |
| LOGGER.error(error_msg) | |
| return error_msg | |
| except Exception as e: | |
| error_msg = f"Sora API 错误: {str(e)}" | |
| LOGGER.error(error_msg) | |
| import traceback | |
| traceback.print_exc() | |
| return error_msg | |
| class GoogleVeoAPI: | |
| """Google Veo 3.1 API 封装""" | |
| def __init__(self, api_key: str): | |
| if genai is None: | |
| raise RuntimeError("未安装 google-genai 库,请运行: pip install google-genai") | |
| self.client = genai.Client(api_key=api_key) | |
| def generate_video( | |
| self, | |
| prompt: str, | |
| output_path: str, | |
| size: str, | |
| seconds: int, | |
| ref_img_path: str = None | |
| ) -> Optional[str]: | |
| """ | |
| 生成视频 | |
| 返回: None (成功) 或 错误信息字符串 | |
| """ | |
| try: | |
| LOGGER.info(f"🎬 Veo API 调用: {size} | {seconds}秒") | |
| # 构建配置 | |
| config_kwargs = {} | |
| # 添加参考图片(如果有) | |
| if ref_img_path and os.path.exists(ref_img_path): | |
| ref_image = Image.open(ref_img_path) | |
| reference = types.VideoGenerationReferenceImage( | |
| image=ref_image, | |
| reference_type="asset" | |
| ) | |
| config_kwargs["reference_images"] = [reference] | |
| # 映射分辨率 | |
| resolution_map = {"1080p": "1080p", "720p": "720p"} | |
| resolution = resolution_map.get(size, "720p") | |
| # 创建视频生成任务 | |
| operation = self.client.models.generate_videos( | |
| model="veo-3.1-generate-preview", | |
| prompt=prompt, | |
| config=types.GenerateVideosConfig( | |
| duration_seconds=seconds, | |
| resolution=resolution, | |
| aspect_ratio="16:9", | |
| **config_kwargs | |
| ), | |
| ) | |
| # 轮询任务状态 | |
| while not operation.done: | |
| LOGGER.info("⏳ 视频生成中...") | |
| time.sleep(10) | |
| operation = self.client.operations.get(operation) | |
| # 下载视频 | |
| video = operation.response.generated_videos[0] | |
| self.client.files.download(file=video.video, output_path=output_path) | |
| LOGGER.info(f"✅ 视频已保存: {output_path}") | |
| return None | |
| except Exception as e: | |
| error_msg = f"Veo API 错误: {str(e)}" | |
| LOGGER.error(error_msg) | |
| import traceback | |
| traceback.print_exc() | |
| return error_msg | |
| # ========================================== | |
| # PART 3: 视频处理工具函数 | |
| # ========================================== | |
| def parse_script_nodes(script_text: str) -> List[str]: | |
| """解析剧本为分镜列表""" | |
| cleaned = script_text.replace("\r\n", "\n").strip() | |
| pattern = re.compile(r"\s*(\d+)\.\s*") | |
| matches = list(pattern.finditer(cleaned)) | |
| if not matches: | |
| return [line.strip() for line in cleaned.split('\n') if line.strip()] | |
| nodes = [] | |
| for index, match in enumerate(matches): | |
| start = match.end() | |
| end = matches[index + 1].start() if index + 1 < len(matches) else len(cleaned) | |
| content = cleaned[start:end].strip() | |
| if content: | |
| nodes.append(content) | |
| return nodes | |
| def extract_last_frame(video_path: str, output_path: str) -> Optional[str]: | |
| """提取视频最后一帧作为参考图""" | |
| if cv2 is None: | |
| return None | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| return None | |
| total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, max(total_frames - 1, 0)) | |
| ret, frame = cap.read() | |
| cap.release() | |
| if ret: | |
| cv2.imwrite(output_path, frame) | |
| return output_path | |
| return None | |
| def stitch_videos(video_paths: List[str], output_path: str): | |
| """拼接多个视频为最终成片""" | |
| if not video_paths: | |
| raise ValueError("未提供可拼接的视频文件。") | |
| if VideoFileClip is None or concatenate_videoclips is None: | |
| raise RuntimeError("未找到 moviepy,请安装依赖。") | |
| clips = [] | |
| try: | |
| for path in video_paths: | |
| if not os.path.exists(path): | |
| continue | |
| clips.append(VideoFileClip(path)) | |
| if not clips: | |
| raise ValueError("没有有效的视频片段") | |
| final_clip = concatenate_videoclips(clips, method="compose") | |
| final_clip.write_videofile( | |
| output_path, | |
| codec="libx264", | |
| audio_codec="aac", | |
| verbose=False, | |
| logger=None, | |
| remove_temp=True | |
| ) | |
| finally: | |
| for clip in clips: | |
| clip.close() | |
| # ========================================== | |
| # PART 4: 视频生成流水线 | |
| # ========================================== | |
| def run_video_generation_pipeline( | |
| script_text: str, | |
| api_key: str, | |
| model_name: str, | |
| style_choice: str, | |
| size: str, | |
| seconds: int | |
| ): | |
| """ | |
| 视频生成流水线 | |
| Yields: (分镜列表, 最终视频路径, 日志信息) | |
| """ | |
| # 验证输入 | |
| if not script_text: | |
| yield [], None, "❌ 请输入剧本!" | |
| return | |
| if not api_key or api_key == "Your API Key": | |
| yield [], None, "❌ 请输入有效的 API Key!" | |
| return | |
| # 解析剧本 | |
| nodes = parse_script_nodes(script_text) | |
| run_id = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") | |
| output_dir = os.path.join("output_videos", run_id) | |
| os.makedirs(output_dir, exist_ok=True) | |
| # 初始化 API 客户端 | |
| try: | |
| if model_name.startswith("sora"): | |
| api_client = OpenAISoraAPI(api_key) | |
| elif model_name.startswith("veo"): | |
| api_client = GoogleVeoAPI(api_key) | |
| else: | |
| yield [], None, f"❌ 不支持的模型: {model_name}" | |
| return | |
| except Exception as e: | |
| yield [], None, f"❌ API 初始化失败: {str(e)}" | |
| return | |
| generated_videos = [] | |
| last_frame_path = None | |
| style_prompt = STYLE_PROMPTS.get(style_choice, "") | |
| yield [], None, f"🚀 开始任务,共 {len(nodes)} 个分镜。模型: {model_name}" | |
| # 逐个生成分镜 | |
| for i, node_text in enumerate(nodes): | |
| idx = i + 1 | |
| video_filename = os.path.join(output_dir, f"segment_{idx:02d}.mp4") | |
| full_prompt = f"{CONTINUITY_PROMPT}\n{style_prompt}\n镜头编号:{idx}/{len(nodes)}。\n镜头脚本:{node_text}" | |
| yield generated_videos, None, f"🎥 生成中: 分镜 {idx}/{len(nodes)}..." | |
| # 调用 API 生成视频 | |
| if model_name.startswith("sora"): | |
| err = api_client.generate_video( | |
| prompt=full_prompt, | |
| output_path=video_filename, | |
| model=model_name, | |
| size=size, | |
| seconds=seconds, | |
| ref_img_path=last_frame_path | |
| ) | |
| else: # veo | |
| err = api_client.generate_video( | |
| prompt=full_prompt, | |
| output_path=video_filename, | |
| size=size, | |
| seconds=seconds, | |
| ref_img_path=last_frame_path | |
| ) | |
| if err: | |
| yield generated_videos, None, f"❌ 分镜 {idx} 失败: {err}" | |
| return | |
| generated_videos.append(video_filename) | |
| # 提取最后一帧作为下一个分镜的参考 | |
| if i < len(nodes) - 1: | |
| frame_path = os.path.join(output_dir, f"ref_{idx:02d}.png") | |
| last_frame_path = extract_last_frame(video_filename, frame_path) | |
| yield generated_videos, None, f"✅ 分镜 {idx} 完成" | |
| # 拼接视频 | |
| yield generated_videos, None, "🎬 正在拼接..." | |
| final_video_path = os.path.join(output_dir, "final_movie.mp4") | |
| try: | |
| stitch_videos(generated_videos, final_video_path) | |
| yield generated_videos, final_video_path, "🎉 任务完成!" | |
| except Exception as e: | |
| yield generated_videos, None, f"❌ 拼接失败: {str(e)}" | |
| # ========================================== | |
| # PART 5: Gradio 界面 | |
| # ========================================== | |
| def update_model_params(model_name): | |
| """根据模型更新界面参数""" | |
| config = MODEL_CONFIGS.get(model_name, MODEL_CONFIGS["sora-2"]) | |
| return ( | |
| gr.Dropdown( | |
| choices=config["sizes"], | |
| value=config["sizes"][0], | |
| label=f"分辨率 ({model_name})" | |
| ), | |
| gr.Slider( | |
| minimum=config["seconds_range"]["minimum"], | |
| maximum=config["seconds_range"]["maximum"], | |
| step=config["seconds_range"]["step"], | |
| value=config["seconds_range"]["value"], | |
| label=config["seconds_label"] | |
| ) | |
| ) | |
| def get_demo_path(filename): | |
| """获取 Demo 文件路径""" | |
| return filename if os.path.exists(filename) else None | |
| # # 构建 Gradio 界面 | |
| # with gr.Blocks(title="AI 剧本视频工厂") as demo: | |
| # gr.Markdown("# 🎬 ScriptAgent & Sora/Veo 视频生成工坊") | |
| # with gr.Tabs(): | |
| # # --- TAB 1: 剧本创作 --- | |
| # with gr.Tab("📝 第一步:剧本创作"): | |
| # with gr.Row(): | |
| # with gr.Column(): | |
| # llm_input = gr.Textbox( | |
| # label="剧情输入", | |
| # placeholder="主角:你在做什么?...", | |
| # lines=6 | |
| # ) | |
| # llm_btn = gr.Button("生成/续写剧本", variant="primary") | |
| # with gr.Column(): | |
| # llm_output = gr.Textbox( | |
| # label="生成的剧本", | |
| # lines=10, | |
| # interactive=True | |
| # ) | |
| # to_video_btn = gr.Button("⬇️ 发送到视频生成", variant="secondary") | |
| # gr.Examples( | |
| # [[ | |
| # "主角:你在做什么?指引图是不能随便篡改的。\n" | |
| # "蒋前:篡改指引图?不不不,你误会了。\n" | |
| # "蒋前:我才加入风物家没多久,哪有这个本事能篡改它..." | |
| # ]], | |
| # inputs=llm_input | |
| # ) | |
| with gr.Blocks(title="AI 剧本视频工厂") as demo: | |
| gr.Markdown("# 🎬 ScriptAgent & Sora/Veo 视频生成工坊 ") | |
| # 🔥 关键修改:给 Tabs 组件添加变量引用 | |
| with gr.Tabs() as tabs: | |
| # --- TAB 1: 剧本创作 --- | |
| with gr.Tab("📝 第一步:剧本创作", id=0): | |
| with gr.Row(): | |
| with gr.Column(): | |
| llm_input = gr.Textbox( | |
| label="剧情输入", | |
| placeholder="主角:你在做什么?...", | |
| lines=6 | |
| ) | |
| llm_btn = gr.Button("生成/续写剧本", variant="primary") | |
| with gr.Column(): | |
| llm_output = gr.Textbox( | |
| label="生成的剧本", | |
| lines=10, | |
| interactive=True | |
| ) | |
| to_video_btn = gr.Button("⬇️ 发送到视频生成", variant="secondary") | |
| gr.Examples( | |
| [[ | |
| "主角:你在做什么?指引图是不能随便篡改的。\n" | |
| "蒋前:篡改指引图?不不不,你误会了。\n" | |
| "蒋前:我才加入风物家没多久,哪有这个本事能篡改它..." | |
| ]], | |
| inputs=llm_input | |
| ) | |
| # 🔥 新增:本地部署代码展示区 | |
| gr.Markdown("---") | |
| with gr.Accordion("💻 本地部署完整代码(点击展开查看)", open=False): | |
| gr.Markdown(""" | |
| ### 📦 完整部署步骤 | |
| 以下代码可在本地完整运行,获得最佳性能和输出质量: | |
| """) | |
| deployment_code = '''import os | |
| from huggingface_hub import snapshot_download | |
| os.environ['CUDA_VISIBLE_DEVICES'] = '0' | |
| model_name = "XD-MU/ScriptAgent" | |
| local_path = "./models/ScriptAgent" | |
| # 下载整个仓库的所有文件 | |
| print("下载模型所有文件...") | |
| snapshot_download( | |
| repo_id=model_name, | |
| local_dir=local_path, | |
| local_dir_use_symlinks=False, # 直接复制文件,不使用符号链接 | |
| resume_download=True # 支持断点续传 | |
| ) | |
| print(f"模型已完整下载到: {local_path}") | |
| # 然后使用 SWIFT 加载 | |
| from swift.llm import PtEngine, RequestConfig, InferRequest | |
| engine = PtEngine(local_path, max_batch_size=1) | |
| request_config = RequestConfig(max_tokens=8192, temperature=0.7) | |
| infer_request = InferRequest(messages=[ | |
| {"role": "user", "content": "你的对话上下文(Your Dialogue)"} | |
| ]) | |
| response = engine.infer([infer_request], request_config)[0] | |
| print(response.choices[0].message.content)''' | |
| gr.Code( | |
| value=deployment_code, | |
| language="python", | |
| label="deploy_scriptagent.py", | |
| lines=35, | |
| interactive=False | |
| ) | |
| gr.Markdown(""" | |
| ### 📌 环境要求 | |
| ```bash | |
| # 安装依赖 | |
| pip install ms-swift[llm] transformers torch huggingface_hub | |
| # GPU 推荐配置 | |
| - CUDA 11.8+ | |
| - 显存: 16GB+ (推荐 24GB) | |
| - 内存: 32GB+ | |
| ``` | |
| """) | |
| # --- TAB 2: 视频生成 --- | |
| with gr.Tab("🎥 第二步:视频生成", id=1): | |
| with gr.Row(): | |
| # 左侧配置区 | |
| with gr.Column(scale=1): | |
| with gr.Accordion("⚙️ API 设置", open=True): | |
| api_key_input = gr.Textbox( | |
| label="API Key", | |
| type="password", | |
| value="Your API Key", | |
| info="根据选择的模型输入 OpenAI 或 Google API Key" | |
| ) | |
| gr.Markdown("### 🎨 风格与模型配置") | |
| style_radio = gr.Radio( | |
| choices=STYLE_KEYS, | |
| value=STYLE_KEYS[0], | |
| label="画风" | |
| ) | |
| model_sel = gr.Dropdown( | |
| choices=["sora-2", "sora-2-pro", "veo-3.1"], | |
| value="sora-2", | |
| label="选择模型", | |
| info="Sora 使用 OpenAI Key,Veo 使用 Google Key" | |
| ) | |
| with gr.Row(): | |
| size_sel = gr.Dropdown( | |
| choices=MODEL_CONFIGS["sora-2"]["sizes"], | |
| value=MODEL_CONFIGS["sora-2"]["sizes"][0], | |
| label="分辨率" | |
| ) | |
| sec_slider = gr.Slider( | |
| minimum=4, | |
| maximum=12, | |
| step=4, | |
| value=4, | |
| label="单镜时长" | |
| ) | |
| video_script_input = gr.TextArea( | |
| label="分镜脚本", | |
| lines=8, | |
| placeholder="1. [0-8秒] ..." | |
| ) | |
| gen_btn = gr.Button("🚀 开始生成", variant="primary") | |
| status_log = gr.Textbox(label="日志", interactive=False) | |
| # 右侧展示区 | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 🎞️ 分镜预览") | |
| gallery = gr.Gallery( | |
| label="分镜序列", | |
| columns=3, | |
| height="auto" | |
| ) | |
| gr.Markdown("### 🎬 最终成片") | |
| final_video = gr.Video(label="成片输出") | |
| # Demo 展示区(保持不变) | |
| gr.Markdown("---") | |
| gr.Markdown("### 🌟 精选成片案例 (Demo Showcase)") | |
| # 使用 2x2 布局 | |
| for i in range(0, 4, 2): | |
| with gr.Row(): | |
| for j in range(2): | |
| idx = i + j | |
| if idx < len(DEMO_DATA): | |
| item = DEMO_DATA[idx] | |
| with gr.Column(): | |
| # 使用 Group 制造卡片效果 | |
| with gr.Group(): | |
| gr.Video(value=get_demo_path(item["file"]), label=item["title"], interactive=False) | |
| # 使用 Accordion 折叠剧本 | |
| with gr.Accordion(f"📄 查看剧本: {item['title']}", open=False): | |
| gr.Textbox( | |
| value=item["script"], | |
| show_label=False, | |
| lines=6, | |
| max_lines=6, | |
| interactive=False | |
| ) | |
| # 🔥 页面底部警告提示(标红显示) | |
| gr.HTML('<p style="color: red; font-weight: bold; text-align: center; margin-top: 20px; font-size: 16px;">⚠️ 注意:仅供简单测试,由于成本问题在线平台内存只有18G,我们量化了模型,性能效果并不能保证,如果需要最准确的输出请自行部署即可</p>') | |
| # --- 逻辑绑定 --- | |
| llm_btn.click(chat_with_scriptagent, llm_input, llm_output) | |
| # 🔥 关键修改:返回 (剧本内容, 标签页索引) | |
| to_video_btn.click( | |
| lambda x: (x, 1), # 1 表示切换到第二个标签页(索引从0开始) | |
| inputs=llm_output, | |
| outputs=[video_script_input, tabs] # 输出到文本框和标签页切换器 | |
| ) | |
| model_sel.change( | |
| fn=update_model_params, | |
| inputs=model_sel, | |
| outputs=[size_sel, sec_slider] | |
| ) | |
| gen_btn.click( | |
| fn=run_video_generation_pipeline, | |
| inputs=[ | |
| video_script_input, | |
| api_key_input, | |
| model_sel, | |
| style_radio, | |
| size_sel, | |
| sec_slider | |
| ], | |
| outputs=[gallery, final_video, status_log] | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue() | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |