File size: 55,026 Bytes
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8cbffa
 
 
 
 
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ff707e
d00e784
 
 
 
 
 
 
 
 
1ff707e
d00e784
 
 
 
 
 
 
 
 
 
 
74a7ccc
 
d00e784
 
 
 
 
74a7ccc
d00e784
 
 
 
 
985454b
74a7ccc
 
d00e784
 
 
985454b
74a7ccc
d00e784
 
 
 
 
 
 
 
 
 
 
b8cbffa
 
d00e784
74a7ccc
d00e784
b8cbffa
985454b
 
b8cbffa
 
d00e784
985454b
b8cbffa
 
74a7ccc
d00e784
 
b8cbffa
d00e784
 
 
 
 
 
 
74a7ccc
 
d00e784
74a7ccc
d00e784
74a7ccc
d00e784
 
 
 
 
 
 
74a7ccc
 
 
 
 
 
d00e784
74a7ccc
 
 
 
 
 
 
d00e784
 
74a7ccc
 
 
 
 
 
 
 
 
d00e784
74a7ccc
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ff707e
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23a3e3f
 
e81e4b1
23a3e3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00e784
42316e1
d00e784
1ff707e
 
d00e784
1ff707e
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23a3e3f
 
 
 
 
 
1ff707e
23a3e3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00e784
23a3e3f
 
 
 
 
 
 
 
 
68cf8a9
23a3e3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ff707e
d00e784
1ff707e
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ff707e
e81e4b1
 
d00e784
 
 
 
1ff707e
d00e784
1ff707e
d00e784
1ff707e
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.models.qwen2_5_omni import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniProcessor
import warnings
import os
import time
import re
import base64
import datetime
import uuid
import logging
from typing import List, Dict, Tuple, Optional
from PIL import Image
from huggingface_hub import snapshot_download
from swift.llm import PtEngine, RequestConfig, InferRequest

# --- 依赖库检查 ---
try:
    import cv2
    from moviepy.editor import VideoFileClip, concatenate_videoclips
    from openai import OpenAI
    from google import genai
    from google.genai import types
except ImportError as e:
    print(f"❌ 缺少必要库: {e}")
    print("请运行: pip install opencv-python moviepy openai google-genai")
    cv2 = None
    VideoFileClip = None
    OpenAI = None
    genai = None

# --- 环境设置 ---
os.environ['ENABLE_AUDIO_OUTPUT'] = '0'
os.environ['VIDEO_TOTAL_PIXELS'] = '0'
os.environ['IMAGE_FACTOR'] = '1'
os.environ['MAX_PIXELS'] = '1024'  # 降低到最低

warnings.filterwarnings("ignore")
os.environ['PYTHONWARNINGS'] = 'ignore'
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
LOGGER = logging.getLogger(__name__)

# ==========================================
# PART 0: 配置常量 & Demo 数据
# ==========================================

# 1. 模型参数配置
MODEL_CONFIGS = {
    "sora-2": {
        "sizes": ["1792x1024", "1024x1792", "1280x720", "720x1280"],
        "seconds_range": {"minimum": 4, "maximum": 12, "step": 4, "value": 4},
        "seconds_label": "单镜时长 (Sora: 4/8/12秒)"
    },
    "sora-2-pro": {
        "sizes": ["1792x1024", "1024x1792", "1280x720", "720x1280"],
        "seconds_range": {"minimum": 4, "maximum": 12, "step": 4, "value": 4},
        "seconds_label": "单镜时长 (Sora Pro: 4/8/12秒)"
    },
    "veo-3.1": {
        "sizes": ["1080p", "720p"],
        "seconds_range": {"minimum": 4, "maximum": 8, "step": 2, "value": 4},
        "seconds_label": "单镜时长 (Veo: 4/6/8秒)"
    }
}

# 2. 提示词与风格
CONTINUITY_PROMPT = (
    "保持统一的视觉风格与世界观,场景与光影保持稳定,角色服装、发型、体型与表情连贯,仅根据剧情调整动作;"
    "如果有参考图片,请严格保持人物形象与参考图一致,人物站位不得变化,镜头衔接需流畅自然。"
    "旁白不需要朗读或配音,仅作为剧情提示使用。要求视频生成的最后一帧要展示所有人物的正面形象和此时的站位。"
)

STYLE_PROMPTS = {
    "Anime (二次元)": "整体画面要求:高质量二次元动漫渲染风格,角色为手绘动漫人物,肤色与材质为动画质感,背景为虚构的动画场景;禁止出现写实/真人或真实摄影元素。",
    "Realistic (写实)": "整体画面要求:高写实摄影风格,人物与环境光影细节丰富,材质与质感贴近真实世界,禁止出现卡通或夸张笔触,确保色彩与光线符合真实物理规律。",
    "Animated (动画/3D)": "整体画面要求:动画/卡通风格,支持二维或三维渲染,人物线条与轮廓清晰,色彩饱和且富有层次,可适当夸张动作与表情。",
    "Painterly (艺术/绘画)": "整体画面要求:艺术绘画风格,可呈现厚重笔触或水彩晕染质感,允许保留艺术性的纹理与笔法痕迹,整体色彩与构图需统一。",
    "Abstract (抽象/实验)": "整体画面要求:抽象/实验风格,鼓励运用超现实、故障艺术或非传统构图手法,可打破写实规律,突出视觉冲击力与创意表现。"
}
STYLE_KEYS = list(STYLE_PROMPTS.keys())

# 3. Demo 案例数据(保持不变)
DEMO_DATA = [
    {
        "file": "demo1.mp4",
        "title": "案例 1",
        "script": "[Dialogue]: 1. [0-9 seconds] (Camera movement: Handheld + long shot - medium close-up - long shot) In a forest clearing, Su Luo paces anxiously, the protagonist stands still; a medium close-up shot with a camera in hand, Su Luo scratches his hair and kicks a stone: "I really shouldn't have agreed to Qin Fei, how can I find all three time boxes in such a short time!" He stomps his foot, then cuts back to a long shot. 2. [9-18 seconds] (Shallow depth of field + long shot - medium shot - long shot) The protagonist approaches, a medium shot over the shoulder in out of focus asks: "What is a time box?" Su Luo turns around and professionally gestures: "A cloud silk control group countdown mechanism box." The two stand facing each other in a long shot. 3. [18-26 seconds] (Tilt + long shot - close-up - long shot) The long shot continues, then a close-up shot of Su Luo in a scholarly manner: "It can only be opened at a specified time, so it's called a time box." The camera pans down to the gesture and then back to the face, then the long shot ends. 4. [26-36 seconds] (Arc + long shot - medium close-up - long shot) Su Luo exaggerates in the long shot, surrounded by a medium close-up: "Qin Fei buried three boxes saying he couldn't find them, I patted my chest on the spot: Su Luo, the expert in local customs, will get them all done in an hour!" Chin raised, chest patted, still proud in the long shot. 5. [36-44 seconds] (Shaky + long shot - medium shot - long shot) The protagonist in the medium shot teases: "Seeing how anxious you are, you haven't finished, have you?" Su Luo in the background instantly deflates, shoulders droop, slightly swaying to indicate guilt, long shot lowers head. 6. [44-53 seconds] (Pan + long shot - close-up - long shot) Long shot lowers head, close-up Su Luo rubs the corner of his clothes and whispers: "Who would have thought he would cheat with the boxes... help me?" He looks up and moves to make eye contact, long shot meets eyes. 7. [53-62 seconds] (Crash Zoom + Long Shot - Medium Shot - Long Shot) The protagonist nods and smiles in the long shot, then a quick push to the medium shot closes up. Su Luo exclaims with delight, "Great! I'll leave those two directions to you, I'll handle the rest!" She pounds her chest and points into the distance, the protagonist in the long shot exhaling and setting off. 【Character Design】 Su Luo: Medium height and light build, large eyes, flaxen brown hair, messy ponytail, short jacket, tools, belt, emotions instantly revealed. Protagonist: Medium height and upright, soft dark hair, simple dark-colored clothing, reserved and calm, nodding slightly. 【Scene】Afternoon dappled sunlight in a forest clearing, pebbles, fallen leaves, quiet trees, transitioning from anxiety to relaxed cooperation. 【Positioning】 1. Su Luo walks back and forth in the center of the clearing, the protagonist at the edge, at a distance. 2. The protagonist takes two or three steps closer to face her. 3-5. Maintaining close distance, Su Luo faces the protagonist/turns sideways and lowers her head. 6. They look at each other again. 7. Su Luo points to the distance."
    },
    {
        "file": "demo2.mp4",
        "title": "案例 2",
        "script": "【对话】:1. [0-8秒](运镜类型:Handheld Camera Effect+全景镜头、中景镜头)\n故事情节:远景镜头,幽暗的遗迹室内,蒋风正俯身在一块散发着微光的古代石碑(指引图)前。主角从阴影中走出,站定在他身后几步远。镜头切换为手持拍摄的中景镜头,跟随主角的视线,画面有轻微晃动,聚焦在主角锐利的眼神上。主角双臂环抱,带着审视的口吻质问:“你在做什么?指引图是不能随便篡改的。”声音打破了室内的寂静。结尾回到远景,主角保持质问的姿态,蒋风的背影僵住。\n\n2. [8-16秒](运镜类型:Arc Shot+全景镜头、中景镜头)\n故事情节:远景镜头,蒋风缓缓转过身。镜头以一个平滑的弧度围绕蒋风移动,切换为中景镜头。他看到主角时明显一愣,双手下意识地抬起,掌心向前,做出一个无辜且防御的姿态,眼神慌乱地解释:“篡改指引图?不不不,你误会了。”他的表情诚恳又急切。结尾远景,两人对峙,气氛紧张。\n\n3. [16-24秒](运'运镜类型:Shallow Depth of Field+全景镜头、中近景镜头)\n故事情节:远景镜头,蒋风放下了手,姿态变得谦卑。镜头切换为中近景,焦点落在蒋风身上,他略带窘迫地笑了笑,背景中的主角身影变得模糊。他一边说一边用手比划着自己:“我才加入风物家没多久,哪有这个本事能篡改它。”结尾回到远景,蒋风仍在解释,主角静静地听着,没有打断。\n\n4. [24-33秒](运镜类型:Tilt Shot+全景镜头、特写镜头)\n故事情节:远景镜头,蒋风再次转向指引图。镜头给到蒋风的中近景,他伸出手指,小心翼翼地指向石碑上的一个发光符文,但并未触碰:“我只是想查看指引图上的身份印鉴。”镜头向下倾斜,给到他手指所指之处的符文一个特写,符文复杂而古老。主角的声音从画外传来,带着一丝疑惑:“身份印鉴?”结尾回到远景,主角微微探身,视线也落在了那个符文上。\n\n5. [33-42秒](运镜类型:Panning Shot+全景镜头、近景镜头)\n故事情节:远景镜头,两人都注视着指引图。镜头切换为近景,从蒋风的侧脸开始,他温和地解释着:“嗯,就是一种类似签名的东西。”镜头缓缓横移,扫过石碑上更多类似签名的印鉴,光芒流转。他的声音变得低沉而充满怀念:“在考古界,早期开荒的人员有权在指引图上留下自己的名字,我们称之为身份印鉴。”镜头移回,定格在他充满希冀的眼神上:“我想看看这些指引图上有没有我父亲的名字。”结尾回到远景,整个房间的氛围因这番话而悄然改变。\n\n6. [42-51秒](运镜类型:Lens Flare+全景镜头、中景镜头)\n故事情节:远景镜头,蒋风垂下目光。镜头切换为中景,他背对着石碑,仿佛陷入了久远的回忆,一道柔和的镜头光晕扫过画面,他眼神飘向远方,带着一丝不易察觉的落寞:“我父亲是主攻考古的风物家,但他常年在外勘察……我已经很久很久没见到他了。”结尾回到远景,主角的注意力已经完全从石碑转移到了蒋风身上。\n\n7. [51-59秒](运镜类型:Deep Depth of Field+全景镜头、中近景镜头)\n故事情节:远景镜头,蒋风转过头,重新看向主角。镜头切为中近景,景深拉远,我们能清晰看到前景中蒋风努力挤出一个微笑,眼神却流露着不确定,以及背景里主角严肃倾听的轮廓。蒋风说:“母亲说稷下不少的开荒考古是他完成的,我想看看是不是他真的来过。”结尾回到远景,蒋风的微笑显得有些无力。\n\n8. [59-67秒](运镜类型:Shallow Depth of Field+全景镜头、特写镜头)\n故事情节:远景镜头,室内一片沉寂。镜头推进到蒋风脸部的特写,极浅的景深模糊了周围的一切,只剩下他复杂的表情。他的笑容消失了,嘴唇微微颤抖:“虽然我相信母亲不会骗我,但……”他停顿了一下,低下头,用几不可闻的声音说出心底的委屈,“哪有人经常在外不回家的。”结尾远景,蒋风低着头,肩膀微微垮下。\n\n9. [67-74秒](运镜类型:Shaky Cam+全景镜头、中近景镜头)\n故事情节:远景镜头,主角打破了沉默。镜头切换为中近景,聚焦在主角身上,轻微的镜头晃动暗示着他内心的触动。他原本锐利的眼神已经完全柔和下来,取而代之的是理解与同情。他轻声问道:“那你找到答案了吗?”结尾远景,听到问话,蒋风缓缓抬起头。\n\n10. [74-82秒](运镜类型:Arc Shot+全景镜头、中景镜头)\n故事情节:远景镜头,两人视线交汇。镜头给到蒋风的中景,他轻轻摇头,眼中闪过一丝失望:“目前还没有,我想多找几个地方再下定论。”随即,他深吸一口气,鼓起勇气向前迈了一小步,镜头以一个微小的弧度跟随着他,增加了请求的郑重感。他恳切地问:“那个……可以拜托你帮我深入遗迹内部看看吗?”结尾远景,两人间的距离缩短了。\n\n11. [82-90秒](运镜类型:Shallow Depth of Field+全景镜头、近景镜头)\n故事情节:远景镜头,主角静待下文。镜头切到蒋风的近景,他有些难为情地低下头,看了看自己无力的双手,再抬头望向主角时,眼神充满了坦诚的无助:“以我的实力,里面的机关人我实在无法应付……更别说接近指引图了。”背景中的主角被虚化,突出了蒋风此刻的窘迫与孤立。结尾回到远景,蒋风的姿态显得格外渺小。\n\n12. [90-98秒](运镜类型:Handheld Camera Effect+全景镜头、中近景镜头)\n故事情节:远景镜头,蒋风等待着判决。镜头切为中近景,手持拍摄的画面极度稳定,仿佛连摄影师都屏住了呼吸。蒋风微微躬身,这是一个郑重的请求:“我想最后再确认一下……”他抬起眼,目光灼灼地直视主角,声音里带着颤音,“能请你帮忙完成我这个心愿吗?”结尾回到远景,空气仿佛凝固了,主角一动不动。\n\n13. [98-102秒](运镜类型:Deep Depth of Field+全景镜头、特写镜头)\n故事情节:远景镜头,主角终于有了动作。镜头给到主角面部特写,他沉默地审视着蒋风的眼睛,几秒钟的权衡之后,嘴角无奈地向上一撇,随即发出一声轻不可闻的叹息。景深拉开,我们能看到他身后不远处,蒋风紧张等待的模糊身影。主角终于开口,语气平淡却掷地有声:“行吧。”结尾回到远景,听到回答的蒋风如释重负地松了口气,紧绷的身体瞬间放松下来。\n【人物形象】:主角:身形挺拔,体态匀称有力,面部轮廓分明,眼神锐利如鹰。留着一头便于打理的深色短发,发丝间或夹杂风霜痕迹。身着深色调、材质耐磨的探险服,肩部和肘部有皮革补丁,腰间挂着若干实用工具包。气质沉稳老练,初期动作多为双臂环抱的审视姿态,后期眼神转为柔和,流露同情与无奈,是一位经验丰富、外冷内热的行动派。\n蒋风:身高略低于主角,体态偏瘦,书生气较重,面部线条柔和,眼神清澈但时常流露慌乱与不确定。发型是略显蓬乱的黑色中短发,似乎无暇打理。穿着一身崭新的“风物家”制服,款式简洁但略显宽大,与身形不甚贴合。气质真诚而笨拙,常有抬手、低头、窘迫微笑等下意识动作,在提及父亲时,会从紧张转为充满希冀与感伤的脆弱,是一位涉世未深的年轻后辈。\n【场景描述】:幽暗的古代遗迹室内,唯一的稳定光源来自一块散发着微光的石碑指引图,石壁上刻有古老符文。场景氛围从初始的紧张对峙,随着角色对话的深入,逐渐转变为充满感伤与理解的静谧与私密。\n【站位】:1:主角站在蒋风身后几步远处,蒋风俯身于石碑前。\n2:蒋风完全转过身,与主角正面相对,形成对峙。\n3:两人保持面对面的站位,距离不变。\n4:蒋风转身面向石碑,主角在其侧后方,视线投向石碑。\n5:两人大致并排,共同注视着石碑。\n6:蒋风背对石碑,面向空旷处;主角从侧面注视着蒋风。\n7:蒋风转身,再次与主角面对面站立。\n8:两人位置不变,蒋风低头,避开主角视线。\n9:蒋风抬头,与主角视线交汇,维持原有距离。\n10:蒋风向前迈出一小步,缩短了与主角的距离。\n11:两人位置不变,蒋风抬头直视主角。\n12:蒋风微微躬身,更显谦卑地仰视着主角。\n13:主角与蒋风保持着略近的面对面距离,主角站姿笔直,蒋风躬身等待。\n"
    },
    {
        "file": "demo3.mp4",
        "title": "案例 3",
        "script": "【Dialogue】: 1. [0-9 seconds] (Handheld Camera Effect + panoramic shot, medium close-up shot) A long shot shows the desolate scene after the fierce battle. The protagonist and Meng Ya stand on the right side of the screen, their postures wary; on the left side of the screen, Yuan Xue and Zhao Dongxu lean against a burning wooden stake, panting weakly. The camera moves forward and focuses on Yuan Xue, who is trembling and looking at the protagonist with fear on her face. Her voice trembles: "Thank you so much! Luckily you arrived in time! Otherwise, we would have become a barbecue feast by now..." She finishes speaking and gasps for breath. The camera moves to the protagonist's face. He frowns slightly, his gaze shifts from Yuan Xue to Zhao Dongxu, and asks with concern and professional assessment: "How badly are you injured? Can you still stand up?" The last second returns to the long shot, confirming the four people's positions. 2. [9-17 seconds] (Shallow Depth of Field + panoramic shot, medium shot) In the long shot, Zhao Dongxu maintains a crouching posture. The camera cuts to a medium shot focusing on Zhao Dongxu, blurring the background of the protagonist and the distant firelight. Tears stream down Zhao Dongxu's pale face, a result of his pain. He avoids looking directly at the protagonist, instead looking down at his numb legs, stammering, "I can't feel my legs at all..." His voice is filled with despair and helplessness. The final second returns to a long shot, contrasting Zhao Dongxu's despair with the protagonist's seriousness. 3. [17-25 seconds] (Panning Shot + wide shot, medium shot) In the long shot, Meng Ya steps forward from behind the protagonist. The camera focuses on Meng Ya in a medium shot. He has his arms crossed, a serious expression on his face, and says to Zhao Dongxu and Yuan Xue in an unquestionable tone, "If you're not strong enough, don't try to take on a top-tier commission! Come with us, back to the camp to get treatment from Bai Cao's family." After saying that, he turns to the side and gestures with his chin towards the protagonist. The camera then pans across the protagonist, who is also on guard. Meng Ya continues, "What's wrong with you? Why are you spacing out?" In the last second, the camera returns to a wide shot, showing Meng Ya standing in the middle of the path leading to the protagonist. 4. [25-34 seconds] (Arc Shot + wide shot, close-up shot) In the wide shot, the protagonist ignores Meng Ya and stares ahead. The camera moves slowly in an arc around the protagonist, then zooms in on his face for a close-up. His eyes are sharp and focused, his brow furrowed as he nods and examines his surroundings: “Something’s not right here.” He first senses the temperature of the air, then looks at the remnants of the extinguished flames on the ground. “The temperature is still high, and the flames haven’t died down… Could there be any survivors?” The final second returns to a wide shot, showing the protagonist standing alone at the front of the group, forming a silhouette. 5. [34-41 seconds] (Crash Zoom + wide shot, extreme close-up) In a wide shot, the protagonist’s gaze abruptly shifts to a diagonal angle ahead. Plot: The silhouette of a small aircraft suddenly sweeps past the wreckage. The camera zooms in on the protagonist for an extreme close-up, capturing the violent contraction of his pupils. He gasps, realizing: “And… I’m worried there are still students trapped ahead.” His eyes change from shock to determination. The final second returns to a wide shot, showing the protagonist now fully turned towards the road ahead, taking a step forward without hesitation. 6. [41-49 seconds] (Deep Depth of Field + panoramic shot, medium-long shot) In the long shot, the protagonist walks forward, and Meng Ya stops. The shot switches to a medium-long shot, the depth of field widens, and the foreground shows Meng Ya's back as he turns away. He decisively waves to the protagonist's back and says loudly, "Understood. Let's split up then. I'll escort the wounded to the camp first, and I'll come back to find you later." His walking figure is still clear in the depth of field, showing his determination to do as he says. In the last second, the shot returns to a long shot, and Meng Ya has already walked some distance. 7. [49-57 seconds] (Whip Pan + panoramic shot, medium-long shot) In the long shot, Meng Ya stops after taking a few steps. The camera begins from a distance, slightly behind Meng Ya. He's about to turn around, but gestures with his eyes to the protagonist in front of him. Turning back again, his voice tinged with concern, he says, "Hey, don't go too far! Big Eye Owl will contact you later!" Before he finishes speaking, the camera abruptly pans from his face to the protagonist standing motionless a few steps behind him. The final second returns to a wide shot, showing the two figures, one in front of the other, maintaining a distance suitable for communication, forming the frame. [Character Description]: Yuan Xue: Approximately 165cm tall/slender build/pale, sweaty, and fearful facial features/long, dark hair disheveled/wearing simple, light-colored clothing soaked in sweat/a timid and helpless demeanor, her body language showing trembling and an inability to look directly at others. Main Character: Approximately 180cm tall / Well-proportioned and agile / Distinct facial features, often with a furrowed brow / Short, dark hair, clean and neat / Wears dark tactical uniform, durable and easy to move in / Calm, sharp, and professional demeanor, habitually vigilant, his habitual action of nodding in scrutiny when observing his environment. Meng Ya: Approximately 182cm tall / Sturdy build, confident stance / Strong facial features, serious expression, sharp eyes / Short, spiky black hair / Wears heavily armored combat uniform with numbers, appearing experienced / Demeanor of a young commander, decisive in action, direct in speech, habitually using gestures such as crossing his arms and gesturing with his chin to express his attitude. [Scene Description]: A desolate battlefield wreckage under a clear afternoon sky, the air hot and dry, traces of recently extinguished flames on the ground, scattered burning logs emitting an apocalyptic glow. [Positioning]: 1: The main character and Meng Ya are on the right side of the screen, Yuan Xue and Zhao Dongxu are on the left, slightly behind, leaning against the burning logs. 2. Zhao Dongxu remains crouched on the left, with the protagonist and Meng Ya behind him, remaining alert.3. Meng Ya steps forward from behind the protagonist, standing between the protagonist and Zhao Dongxu, turning sideways towards the protagonist.4. The protagonist stands alone at the front of the group, facing forward; Meng Ya, Yuan Xue, and Zhao Dongxu are behind him.5. The protagonist, who was facing forward, suddenly turns to face diagonally forward.6. The protagonist walks forward, while Meng Ya stops, turns his back to the protagonist, and walks in the opposite direction.7. Meng Ya is in front, and the protagonist is behind, maintaining a distance of several steps, forming a front-and-back formation."
    },
    {
        "file": "demo4.mp4",
        "title": "案例 4",
        "script": "【对话】:1. [0-8秒] (Shallow Depth of Field+全景镜头、中近景镜头) 故事情节:远景展示公园小径上,白衫和主角并肩站立,一只胖乎乎的宠物“呱呱”趴在他们脚边的草地上气喘吁吁。镜头随即以浅景深推向白衫的中近景,他低头看着呱呱,脸上交织着无奈与宠溺,接着他抬眼望向主角,问道:“怎么样?呱呱有好好锻炼吗?” 结尾回到远景,主角正准备回答。\n2. [8-17秒] (Handheld Camera Effect+全景镜头、中景镜头) 故事情节:远景中,三人位置不变。镜头切换为手持效果下的中景,画面随着白衫的动作有轻微晃动。他无奈地摊开手,叹了口气:“没办法,之前太惯着它了。” 他的视线落在懒洋洋翻了个身的呱呱身上,语气里满是无可奈何:“现在没有吃的半步也不挪。” 结尾远景,白衫轻轻摇头,主角在一旁安静地听着。\n3. [17-26秒] (Arc Shot+全景镜头、中景镜头) 故事情节:远景展现整个场景。镜头开始围绕白衫进行弧线运动,他蹲下身,温柔地抚摸着呱呱的后背,语气放缓:“不过它今天起码完成了一点运动,晚上允许它多吃一点。” 镜头继续沿弧线转向一旁的主角,他看着这温情的一幕,微笑着点头附和:“今天运动的还不错。” 结尾远景,白衫蹲着,主角站着,形成一高一低的构图。\n4. [26-34秒] (Shallow Depth of Field+全景镜头、近景镜头) 故事情节:远景中,白衫依旧蹲在呱呱身边,主角静立一旁。镜头切入白衫的近景,背景完全虚化。他抚摸呱呱的动作没停,但眼神中浮现出真切的忧虑,声音也低沉下来,充满了担忧:“我知道运动很辛苦,但呱呱真的太胖了,我很怕它胖得生病。” 结尾远景,能看到主角脸上的笑容也收敛了,神情变得严肃。\n5. [34-42秒] (Tilt Shot+全景镜头、中景镜头) 故事情节:远景确认场景站位。镜头从中景开始,从地上心满意足地摇着尾巴的呱呱缓缓向上抬升,最终定格在刚站起身的白衫脸上。他脸上的忧虑一扫而空,转为一种故作爽朗的兴奋,对着呱呱大声宣布:“表现真不错。今晚它可以多吃一点!” 结尾远景,白衫高兴地拍了拍手。\n6. [42-51秒] (Shaky Cam+全景镜头、中近景镜头) 故事情节:远景中,主角看着兴奋的白衫。镜头切换为对准主角的中近景,轻微的摇晃反映出他内心的无语。他看着白衫,嘴角微微抽动,眼神里是哭笑不得的怀疑,几乎是对自己低语:“这么吃还能瘦么……” 随即他像是突然想起了正事,表情一正,视线重新聚焦在白衫身上,问道:“对了,你知道刘叶的情况怎么样了吗?” 结尾远景,主角向前迈了半步,成功转移了话题。\n7. [51-58秒] (Deep Depth of Field+全景镜头、中景镜头) 故事情节:远景中,两人相对而立。镜头切到白衫的中景,听到“刘叶”的名字,他先是愣了一下,随即眼神投向远方,景深变大,背景中的公园路径和行人都变得清晰。他皱眉思索片刻,然后略带歉意地摇了摇头,收回目光:“刘叶?看着是没长高,具体我就不知道了。” 结尾远景,白衫看着主角,摊了摊手表示不知情。\n8. [58-67秒] (Panning Shot+全景镜头、中远景镜头) 故事情节:远景中,三人保持着最后的站位。镜头给到白衫的中远景,他侧过身,抬手指向西边的方向,语气变得热心:“同学要是想知道,不如去找他问问吧。” 镜头随着他的手臂平滑地向西边摇摄,画面中出现一条通往远处开阔广场的小径。“你往西走,他就在那边的广场。” 结尾远景,镜头停下,主角顺着白衫所指的方向望去,若有所思。\n【人物形象】:白衫:身高约180cm,体态匀称修长,略带少年感。面部线条柔和,眉眼清秀,笑起来时眼角有细微纹路。发型是自然的黑色短发,刘海稍长,显得随性。身穿一件干净的白色棉麻衬衫和浅色休闲裤,脚踩白色运动鞋。气质温和亲切,与人交谈时真诚,对待宠物时眼神宠溺,会用摊手、挠头等小动作表达无奈,是个内心细腻的暖男。\n主角:身高与白衫相仿,身形挺拔,站姿稳重。面部轮廓分明,眼神锐利但内敛,表情变化细微,善于观察。发型为深色利落短发,显得干练。穿着深色系的休闲夹克,内搭纯色T恤,下身是工装裤,整体风格偏向实用和低调。气质沉稳,话不多,习惯通过嘴角抽动、眼神聚焦等微表情传递内心活动,行动果断且有目的性。\n呱呱:一只体型极度肥胖的宠物,身躯圆滚滚,四肢短小,趴在地上像个肉球。拥有一双憨态可掬的大眼睛,表情总是懒洋洋的。毛发短而顺滑,脖子上戴着一个简单的项圈。动作迟缓,极度懒散,没有食物的诱惑便不愿动弹,对主人的抚摸会表现出心满意足的样子,是一只被宠坏了的“吃货”。\n【场景描述】:午后阳光明媚的公园草坪,氛围从轻松宠溺的日常,转为对宠物健康的真切担忧,最终变为热心指路的平实交流。主要视觉元素是茵茵绿草、蜿蜒的小径,以及趴在地上一动不动的胖宠物。\n【站位】:1. 白衫与主角并肩站立,呱呱在他们脚边的草地上。\n2. 三人位置不变,白衫面向主角和呱呱。\n3. 白衫蹲在呱呱旁边,主角站在他身侧,形成高低位。\n4. 白衫维持蹲姿,主角站在一旁注视。\n5. 白衫从呱呱身边站起,转身面对呱呱。\n6. 主角面向白衫,两人相对而立,主角向前半步拉近距离。\n7. 两人保持相对站立,白衫短暂望向远方后,目光回到主角身上。\n8. 白衫侧身指向西边,主角随其指向望向同一方向。"
    }
]

# ==========================================
# PART 1: 剧本生成模型 (ScriptAgent)
# ==========================================
from swift.llm import PtEngine, RequestConfig, InferRequest
from swift.plugin import InferStats
import torch

# 全局变量
MODEL_NAME = "XD-MU/ScriptAgent"
LOCAL_MODEL_PATH = "./downloaded_models/ScriptAgent"
engine = None  # InferEngine 对象

# 确保目录存在
os.makedirs(LOCAL_MODEL_PATH, exist_ok=True)

def load_llm_model():
    """使用 ms-swift 的 PtEngine 加载量化模型(int4/int8)"""
    global engine
    if engine is not None:
        return
    
    try:
        # 检查模型是否存在
        if not os.path.exists(os.path.join(LOCAL_MODEL_PATH, "config.json")):
            print(f"正在从 HuggingFace 下载模型到 {LOCAL_MODEL_PATH}...")
            snapshot_download(
                repo_id=MODEL_NAME,
                local_dir=LOCAL_MODEL_PATH,
                local_dir_use_symlinks=False,
                resume_download=True
            )
            print(f"✅ 模型已下载到: {LOCAL_MODEL_PATH}")
        else:
            print(f"✅ 模型已存在: {LOCAL_MODEL_PATH}")
        

        print("正在加载文本模式(禁用多模态)...")
        
        engine = PtEngine(
            model_id_or_path=LOCAL_MODEL_PATH,
            torch_dtype=torch.bfloat16,      # 使用 bfloat16 更省内存
            max_batch_size=1,
            device_map='cpu',
            quant_method='bnb',
            quantization_bit=4,
            model_kwargs={
                'low_cpu_mem_usage': True,
                'max_memory': {'cpu': '10GB'},
                'offload_folder': './offload',  # 内存不足时卸载到磁盘
            }
        )
        
        print("✅ 文本模式加载完成")
        
    except Exception as e:
        print(f"❌ 模型加载失败: {e}")
        import traceback
        traceback.print_exc()

def chat_with_scriptagent(user_input: str):
    """使用 ms-swift InferEngine 与 ScriptAgent 对话生成剧本"""
    global engine
    
    if engine is None:
        load_llm_model()
        if engine is None:
            return "❌ 模型加载失败,请检查后台日志。"
    
    user_input = user_input.strip()
    if not user_input:
        return "请输入内容"

    try:
        print("🤖 正在使用 ms-swift InferEngine 推理剧本...")
        
        # 🔥 使用 ms-swift 的推理方式
        # 1. 构建消息格式
        messages = [{'role': 'user', 'content': user_input}]
        infer_request = InferRequest(messages=messages)
        
        # 2. 配置请求参数
        request_config = RequestConfig(
            max_tokens=4096,           # 最大生成token数
            temperature=0.7,           # 温度参数
            top_p=0.9,                 # top_p 采样
            repetition_penalty=1.1,    # 重复惩罚
            stream=False,              # 不使用流式输出
        )
        
        # 3. 执行推理
        metric = InferStats()
        resp_list = engine.infer([infer_request], request_config, metrics=[metric])
        
        # 4. 提取结果
        response = resp_list[0].choices[0].message.content
        
        # 5. 打印性能指标(可选)
        print(f"✅ 生成完成 | 指标: {metric.compute()}")
        print(f"✅ 生成结果长度: {len(response)} 字符")
        
        return response if response else "⚠️ 生成为空,请重试"
    
    except Exception as e:
        print(f"❌ 生成出错: {e}")
        import traceback
        traceback.print_exc()
        return f"生成失败: {str(e)}"
# ==========================================
# PART 2: 视频生成 API 封装
# ==========================================

class OpenAISoraAPI:
    """OpenAI Sora API 封装"""
    
    def __init__(self, api_key: str):
        if OpenAI is None:
            raise RuntimeError("未安装 openai 库,请运行: pip install openai")
        self.client = OpenAI(api_key=api_key)
    
    def generate_video(
        self, 
        prompt: str, 
        output_path: str, 
        model: str, 
        size: str, 
        seconds: int, 
        ref_img_path: str = None
    ) -> Optional[str]:
        """
        生成视频
        返回: None (成功) 或 错误信息字符串
        """
        try:
            LOGGER.info(f"🎬 Sora API 调用: {model} | {size} | {seconds}秒")
            
            # 构建请求参数
            kwargs = {
                "model": model,
                "prompt": prompt,
                "size": size,
                "seconds": str(seconds),
            }
            
            # 添加参考图片(如果有)
            if ref_img_path and os.path.exists(ref_img_path):
                with open(ref_img_path, 'rb') as f:
                    kwargs["input_reference"] = f
            
            # 创建视频任务
            video_job = self.client.videos.create(**kwargs)
            
            # 轮询任务状态
            while video_job.status in ["queued", "processing"]:
                LOGGER.info(f"⏳ 视频生成中... 进度: {video_job.progress}%")
                time.sleep(10)
                video_job = self.client.videos.retrieve(video_job.id)
            
            # 检查任务状态
            if video_job.status == "completed":
                # 下载视频
                video_url = video_job.url
                import requests
                video_data = requests.get(video_url).content
                with open(output_path, 'wb') as f:
                    f.write(video_data)
                LOGGER.info(f"✅ 视频已保存: {output_path}")
                return None
            else:
                error_msg = f"视频生成失败,状态: {video_job.status}"
                LOGGER.error(error_msg)
                return error_msg
                
        except Exception as e:
            error_msg = f"Sora API 错误: {str(e)}"
            LOGGER.error(error_msg)
            import traceback
            traceback.print_exc()
            return error_msg


class GoogleVeoAPI:
    """Google Veo 3.1 API 封装"""
    
    def __init__(self, api_key: str):
        if genai is None:
            raise RuntimeError("未安装 google-genai 库,请运行: pip install google-genai")
        self.client = genai.Client(api_key=api_key)
    
    def generate_video(
        self, 
        prompt: str, 
        output_path: str, 
        size: str, 
        seconds: int, 
        ref_img_path: str = None
    ) -> Optional[str]:
        """
        生成视频
        返回: None (成功) 或 错误信息字符串
        """
        try:
            LOGGER.info(f"🎬 Veo API 调用: {size} | {seconds}秒")
            
            # 构建配置
            config_kwargs = {}
            
            # 添加参考图片(如果有)
            if ref_img_path and os.path.exists(ref_img_path):
                ref_image = Image.open(ref_img_path)
                reference = types.VideoGenerationReferenceImage(
                    image=ref_image,
                    reference_type="asset"
                )
                config_kwargs["reference_images"] = [reference]
            
            # 映射分辨率
            resolution_map = {"1080p": "1080p", "720p": "720p"}
            resolution = resolution_map.get(size, "720p")
            
            # 创建视频生成任务
            operation = self.client.models.generate_videos(
                model="veo-3.1-generate-preview",
                prompt=prompt,
                config=types.GenerateVideosConfig(
                    duration_seconds=seconds,
                    resolution=resolution,
                    aspect_ratio="16:9",
                    **config_kwargs
                ),
            )
            
            # 轮询任务状态
            while not operation.done:
                LOGGER.info("⏳ 视频生成中...")
                time.sleep(10)
                operation = self.client.operations.get(operation)
            
            # 下载视频
            video = operation.response.generated_videos[0]
            self.client.files.download(file=video.video, output_path=output_path)
            
            LOGGER.info(f"✅ 视频已保存: {output_path}")
            return None
                
        except Exception as e:
            error_msg = f"Veo API 错误: {str(e)}"
            LOGGER.error(error_msg)
            import traceback
            traceback.print_exc()
            return error_msg


# ==========================================
# PART 3: 视频处理工具函数
# ==========================================

def parse_script_nodes(script_text: str) -> List[str]:
    """解析剧本为分镜列表"""
    cleaned = script_text.replace("\r\n", "\n").strip()
    pattern = re.compile(r"\s*(\d+)\.\s*")
    matches = list(pattern.finditer(cleaned))
    if not matches:
        return [line.strip() for line in cleaned.split('\n') if line.strip()]
    nodes = []
    for index, match in enumerate(matches):
        start = match.end()
        end = matches[index + 1].start() if index + 1 < len(matches) else len(cleaned)
        content = cleaned[start:end].strip()
        if content:
            nodes.append(content)
    return nodes


def extract_last_frame(video_path: str, output_path: str) -> Optional[str]:
    """提取视频最后一帧作为参考图""" 
    if cv2 is None:
        return None
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return None
    
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    cap.set(cv2.CAP_PROP_POS_FRAMES, max(total_frames - 1, 0))
    ret, frame = cap.read()
    cap.release()
    
    if ret:
        cv2.imwrite(output_path, frame)
        return output_path
    return None


def stitch_videos(video_paths: List[str], output_path: str):
    """拼接多个视频为最终成片"""
    if not video_paths:
        raise ValueError("未提供可拼接的视频文件。")
    
    if VideoFileClip is None or concatenate_videoclips is None:
        raise RuntimeError("未找到 moviepy,请安装依赖。")
    
    clips = []
    try:
        for path in video_paths:
            if not os.path.exists(path):
                continue
            clips.append(VideoFileClip(path))
        
        if not clips:
            raise ValueError("没有有效的视频片段")
        
        final_clip = concatenate_videoclips(clips, method="compose")
        final_clip.write_videofile(
            output_path, 
            codec="libx264", 
            audio_codec="aac", 
            verbose=False, 
            logger=None, 
            remove_temp=True
        )
    finally:
        for clip in clips:
            clip.close()


# ==========================================
# PART 4: 视频生成流水线
# ==========================================

def run_video_generation_pipeline(
    script_text: str,
    api_key: str,
    model_name: str,
    style_choice: str,
    size: str,
    seconds: int
):
    """
    视频生成流水线
    
    Yields: (分镜列表, 最终视频路径, 日志信息)
    """
    # 验证输入
    if not script_text:
        yield [], None, "❌ 请输入剧本!"
        return
    
    if not api_key or api_key == "Your API Key":
        yield [], None, "❌ 请输入有效的 API Key!"
        return
    
    # 解析剧本
    nodes = parse_script_nodes(script_text)
    run_id = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    output_dir = os.path.join("output_videos", run_id)
    os.makedirs(output_dir, exist_ok=True)
    
    # 初始化 API 客户端
    try:
        if model_name.startswith("sora"):
            api_client = OpenAISoraAPI(api_key)
        elif model_name.startswith("veo"):
            api_client = GoogleVeoAPI(api_key)
        else:
            yield [], None, f"❌ 不支持的模型: {model_name}"
            return
    except Exception as e:
        yield [], None, f"❌ API 初始化失败: {str(e)}"
        return
    
    generated_videos = []
    last_frame_path = None
    style_prompt = STYLE_PROMPTS.get(style_choice, "")
    
    yield [], None, f"🚀 开始任务,共 {len(nodes)} 个分镜。模型: {model_name}"

    # 逐个生成分镜
    for i, node_text in enumerate(nodes):
        idx = i + 1
        video_filename = os.path.join(output_dir, f"segment_{idx:02d}.mp4")
        full_prompt = f"{CONTINUITY_PROMPT}\n{style_prompt}\n镜头编号:{idx}/{len(nodes)}。\n镜头脚本:{node_text}"
        
        yield generated_videos, None, f"🎥 生成中: 分镜 {idx}/{len(nodes)}..."
        
        # 调用 API 生成视频
        if model_name.startswith("sora"):
            err = api_client.generate_video(
                prompt=full_prompt,
                output_path=video_filename,
                model=model_name,
                size=size,
                seconds=seconds,
                ref_img_path=last_frame_path
            )
        else:  # veo
            err = api_client.generate_video(
                prompt=full_prompt,
                output_path=video_filename,
                size=size,
                seconds=seconds,
                ref_img_path=last_frame_path
            )
        
        if err:
            yield generated_videos, None, f"❌ 分镜 {idx} 失败: {err}"
            return
        
        generated_videos.append(video_filename)
        
        # 提取最后一帧作为下一个分镜的参考
        if i < len(nodes) - 1:
            frame_path = os.path.join(output_dir, f"ref_{idx:02d}.png")
            last_frame_path = extract_last_frame(video_filename, frame_path)
        
        yield generated_videos, None, f"✅ 分镜 {idx} 完成"

    # 拼接视频
    yield generated_videos, None, "🎬 正在拼接..."
    final_video_path = os.path.join(output_dir, "final_movie.mp4")
    
    try:
        stitch_videos(generated_videos, final_video_path)
        yield generated_videos, final_video_path, "🎉 任务完成!"
    except Exception as e:
        yield generated_videos, None, f"❌ 拼接失败: {str(e)}"


# ==========================================
# PART 5: Gradio 界面
# ==========================================

def update_model_params(model_name):
    """根据模型更新界面参数"""
    config = MODEL_CONFIGS.get(model_name, MODEL_CONFIGS["sora-2"])
    return (
        gr.Dropdown(
            choices=config["sizes"], 
            value=config["sizes"][0], 
            label=f"分辨率 ({model_name})"
        ),
        gr.Slider(
            minimum=config["seconds_range"]["minimum"], 
            maximum=config["seconds_range"]["maximum"], 
            step=config["seconds_range"]["step"], 
            value=config["seconds_range"]["value"], 
            label=config["seconds_label"]
        )
    )


def get_demo_path(filename):
    """获取 Demo 文件路径"""
    return filename if os.path.exists(filename) else None


# # 构建 Gradio 界面
# with gr.Blocks(title="AI 剧本视频工厂") as demo:
#     gr.Markdown("# 🎬 ScriptAgent & Sora/Veo 视频生成工坊")
    
#     with gr.Tabs():
#         # --- TAB 1: 剧本创作 ---
#         with gr.Tab("📝 第一步:剧本创作"):
#             with gr.Row():
#                 with gr.Column():
#                     llm_input = gr.Textbox(
#                         label="剧情输入", 
#                         placeholder="主角:你在做什么?...", 
#                         lines=6
#                     )
#                     llm_btn = gr.Button("生成/续写剧本", variant="primary")
                
#                 with gr.Column():
#                     llm_output = gr.Textbox(
#                         label="生成的剧本", 
#                         lines=10, 
#                         interactive=True
#                     )
#                     to_video_btn = gr.Button("⬇️ 发送到视频生成", variant="secondary")
            
#             gr.Examples(
#                 [[
#                     "主角:你在做什么?指引图是不能随便篡改的。\n"
#                     "蒋前:篡改指引图?不不不,你误会了。\n"
#                     "蒋前:我才加入风物家没多久,哪有这个本事能篡改它..."
#                 ]], 
#                 inputs=llm_input
#             )
with gr.Blocks(title="AI 剧本视频工厂") as demo:
    gr.Markdown("# 🎬 ScriptAgent & Sora/Veo 视频生成工坊 ")
    
    # 🔥 关键修改:给 Tabs 组件添加变量引用
    with gr.Tabs() as tabs:
        # --- TAB 1: 剧本创作 ---
        with gr.Tab("📝 第一步:剧本创作", id=0):
            with gr.Row():
                with gr.Column():
                    llm_input = gr.Textbox(
                        label="剧情输入", 
                        placeholder="主角:你在做什么?...", 
                        lines=6
                    )
                    llm_btn = gr.Button("生成/续写剧本", variant="primary")
                
                with gr.Column():
                    llm_output = gr.Textbox(
                        label="生成的剧本", 
                        lines=10, 
                        interactive=True
                    )
                    to_video_btn = gr.Button("⬇️ 发送到视频生成", variant="secondary")
            
            gr.Examples(
                [[
                    "主角:你在做什么?指引图是不能随便篡改的。\n"
                    "蒋前:篡改指引图?不不不,你误会了。\n"
                    "蒋前:我才加入风物家没多久,哪有这个本事能篡改它..."
                ]], 
                inputs=llm_input
            )
            
            # 🔥 新增:本地部署代码展示区
            gr.Markdown("---")
            with gr.Accordion("💻 本地部署完整代码(点击展开查看)", open=False):
                gr.Markdown("""
                ### 📦 完整部署步骤
                以下代码可在本地完整运行,获得最佳性能和输出质量:
                """)
                
                deployment_code = '''import os
from huggingface_hub import snapshot_download

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

model_name = "XD-MU/ScriptAgent"
local_path = "./models/ScriptAgent"

# 下载整个仓库的所有文件
print("下载模型所有文件...")
snapshot_download(
    repo_id=model_name,
    local_dir=local_path,
    local_dir_use_symlinks=False,  # 直接复制文件,不使用符号链接
    resume_download=True  # 支持断点续传
)

print(f"模型已完整下载到: {local_path}")

# 然后使用 SWIFT 加载
from swift.llm import PtEngine, RequestConfig, InferRequest

engine = PtEngine(local_path, max_batch_size=1)
request_config = RequestConfig(max_tokens=8192, temperature=0.7)

infer_request = InferRequest(messages=[
    {"role": "user", "content": "你的对话上下文(Your Dialogue)"}
])
response = engine.infer([infer_request], request_config)[0]

print(response.choices[0].message.content)'''
                
                gr.Code(
                    value=deployment_code,
                    language="python",
                    label="deploy_scriptagent.py",
                    lines=35,
                    interactive=False
                )
                
                gr.Markdown("""
                ### 📌 环境要求
                ```bash
                # 安装依赖
                pip install ms-swift[llm] transformers torch huggingface_hub
                
                # GPU 推荐配置
                - CUDA 11.8+
                - 显存: 16GB+ (推荐 24GB)
                - 内存: 32GB+
                ```
                """)
        
        # --- TAB 2: 视频生成 ---
        with gr.Tab("🎥 第二步:视频生成", id=1):
            with gr.Row():
                # 左侧配置区
                with gr.Column(scale=1):
                    with gr.Accordion("⚙️ API 设置", open=True):
                        api_key_input = gr.Textbox(
                            label="API Key", 
                            type="password", 
                            value="Your API Key",
                            info="根据选择的模型输入 OpenAI 或 Google API Key"
                        )
                    
                    gr.Markdown("### 🎨 风格与模型配置")
                    style_radio = gr.Radio(
                        choices=STYLE_KEYS, 
                        value=STYLE_KEYS[0], 
                        label="画风"
                    )
                    model_sel = gr.Dropdown(
                        choices=["sora-2", "sora-2-pro", "veo-3.1"], 
                        value="sora-2", 
                        label="选择模型",
                        info="Sora 使用 OpenAI Key,Veo 使用 Google Key"
                    )
                    
                    with gr.Row():
                        size_sel = gr.Dropdown(
                            choices=MODEL_CONFIGS["sora-2"]["sizes"], 
                            value=MODEL_CONFIGS["sora-2"]["sizes"][0], 
                            label="分辨率"
                        )
                        sec_slider = gr.Slider(
                            minimum=4, 
                            maximum=12, 
                            step=4, 
                            value=4, 
                            label="单镜时长"
                        )

                    video_script_input = gr.TextArea(
                        label="分镜脚本", 
                        lines=8, 
                        placeholder="1. [0-8秒] ..."
                    )
                    gen_btn = gr.Button("🚀 开始生成", variant="primary")
                    status_log = gr.Textbox(label="日志", interactive=False)

                # 右侧展示区
                with gr.Column(scale=2):
                    gr.Markdown("### 🎞️ 分镜预览")
                    gallery = gr.Gallery(
                        label="分镜序列", 
                        columns=3, 
                        height="auto"
                    )
                    
                    gr.Markdown("### 🎬 最终成片")
                    final_video = gr.Video(label="成片输出")

                    # Demo 展示区(保持不变)
                    gr.Markdown("---")
                    gr.Markdown("### 🌟 精选成片案例 (Demo Showcase)")
                    
                    # 使用 2x2 布局
                    for i in range(0, 4, 2): 
                        with gr.Row():
                            for j in range(2): 
                                idx = i + j
                                if idx < len(DEMO_DATA):
                                    item = DEMO_DATA[idx]
                                    with gr.Column():
                                        # 使用 Group 制造卡片效果
                                        with gr.Group():
                                            gr.Video(value=get_demo_path(item["file"]), label=item["title"], interactive=False)
                                            # 使用 Accordion 折叠剧本
                                            with gr.Accordion(f"📄 查看剧本: {item['title']}", open=False):
                                                gr.Textbox(
                                                    value=item["script"], 
                                                    show_label=False, 
                                                    lines=6,       
                                                    max_lines=6,   
                                                    interactive=False
                                                )
    
    # 🔥 页面底部警告提示(标红显示)
    gr.HTML('<p style="color: red; font-weight: bold; text-align: center; margin-top: 20px; font-size: 16px;">⚠️ 注意:仅供简单测试,由于成本问题在线平台内存只有18G,我们量化了模型,性能效果并不能保证,如果需要最准确的输出请自行部署即可</p>')

    # --- 逻辑绑定 ---
    llm_btn.click(chat_with_scriptagent, llm_input, llm_output)
    
    # 🔥 关键修改:返回 (剧本内容, 标签页索引)
    to_video_btn.click(
        lambda x: (x, 1),  # 1 表示切换到第二个标签页(索引从0开始)
        inputs=llm_output, 
        outputs=[video_script_input, tabs]  # 输出到文本框和标签页切换器
    )
    
    model_sel.change(
        fn=update_model_params, 
        inputs=model_sel, 
        outputs=[size_sel, sec_slider]
    )
    
    gen_btn.click(
        fn=run_video_generation_pipeline,
        inputs=[
            video_script_input, 
            api_key_input, 
            model_sel, 
            style_radio, 
            size_sel, 
            sec_slider
        ],
        outputs=[gallery, final_video, status_log]
    )

if __name__ == "__main__":
    demo.queue()
    demo.launch(server_name="0.0.0.0", server_port=7860)