File size: 62,924 Bytes
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8cbffa
 
 
 
 
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7cbe1b
d00e784
 
 
 
b1b8a33
5a6b719
b1b8a33
5a6b719
 
b1b8a33
5a6b719
 
b1b8a33
5a6b719
b1b8a33
5a6b719
b1b8a33
5a6b719
b1b8a33
5a6b719
b1b8a33
5a6b719
b1b8a33
5a6b719
b1b8a33
5a6b719
b1b8a33
5a6b719
b1b8a33
5a6b719
 
b1b8a33
5a6b719
b1b8a33
5a6b719
b1b8a33
5a6b719
b1b8a33
5a6b719
b1b8a33
5a6b719
 
d00e784
 
 
 
a7cbe1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f729d44
d00e784
 
 
 
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
 
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
b1b8a33
0f2a1f2
 
d00e784
 
 
 
a7cbe1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00e784
 
 
 
 
 
74a7ccc
 
d00e784
 
 
 
 
74a7ccc
d00e784
 
 
 
 
985454b
74a7ccc
 
d00e784
 
 
985454b
74a7ccc
d00e784
 
 
 
 
 
 
 
 
 
 
b8cbffa
 
d00e784
74a7ccc
d00e784
b8cbffa
985454b
 
b8cbffa
 
d00e784
985454b
b8cbffa
 
74a7ccc
d00e784
 
b8cbffa
d00e784
 
 
 
 
 
 
74a7ccc
 
d00e784
74a7ccc
d00e784
74a7ccc
d00e784
 
 
 
 
 
 
74a7ccc
 
 
 
 
 
d00e784
74a7ccc
 
 
 
 
 
 
d00e784
 
74a7ccc
 
 
 
 
 
 
 
 
d00e784
74a7ccc
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2119db4
 
 
d00e784
2119db4
d00e784
 
2119db4
 
d00e784
 
2119db4
 
 
 
 
 
d00e784
2119db4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00e784
2119db4
 
 
 
 
 
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2119db4
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42316e1
d00e784
2595f75
d00e784
2595f75
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2595f75
 
 
 
 
 
 
 
d00e784
 
 
7026227
 
 
 
 
 
 
71bc2ba
 
 
 
 
 
 
 
 
 
d00e784
 
23a3e3f
2595f75
23a3e3f
 
 
 
96a8adc
23a3e3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2595f75
 
23a3e3f
d00e784
23a3e3f
 
2595f75
23a3e3f
 
 
 
 
 
68cf8a9
23a3e3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67cc2af
23a3e3f
 
 
 
 
 
96a8adc
67cc2af
96a8adc
 
 
 
23a3e3f
1ff707e
d00e784
2595f75
 
 
 
 
 
 
 
 
 
 
 
 
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2595f75
 
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96a8adc
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ff707e
2595f75
e81e4b1
d00e784
2595f75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00e784
 
 
2595f75
 
 
 
 
 
 
d00e784
2595f75
96a8adc
2595f75
d00e784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2119db4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.models.qwen2_5_omni import Qwen2_5OmniForConditionalGeneration, Qwen2_5OmniProcessor
import warnings
import os
import time
import re
import base64
import datetime
import uuid
import logging
from typing import List, Dict, Tuple, Optional
from PIL import Image
from huggingface_hub import snapshot_download
from swift.llm import PtEngine, RequestConfig, InferRequest

# --- 依赖库检查 ---
try:
    import cv2
    from moviepy.editor import VideoFileClip, concatenate_videoclips
    from openai import OpenAI
    from google import genai
    from google.genai import types
except ImportError as e:
    print(f"❌ 缺少必要库: {e}")
    print("请运行: pip install opencv-python moviepy openai google-genai")
    cv2 = None
    VideoFileClip = None
    OpenAI = None
    genai = None

# --- 环境设置 ---
os.environ['ENABLE_AUDIO_OUTPUT'] = '0'
os.environ['VIDEO_TOTAL_PIXELS'] = '0'
os.environ['IMAGE_FACTOR'] = '1'
os.environ['MAX_PIXELS'] = '1024'  # 降低到最低

warnings.filterwarnings("ignore")
os.environ['PYTHONWARNINGS'] = 'ignore'
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
LOGGER = logging.getLogger(__name__)

# ==========================================
# PART 0: 配置常量 & Demo 数据
# ==========================================

# 1. 模型参数配置
MODEL_CONFIGS = {
    "sora-2": {
        "sizes": ["1792x1024", "1024x1792", "1280x720", "720x1280"],
        "seconds_range": {"minimum": 4, "maximum": 12, "step": 4, "value": 4},
        "seconds_label": "单镜时长 (Sora: 4/8/12秒)"
    },
    "sora-2-pro": {
        "sizes": ["1792x1024", "1024x1792", "1280x720", "720x1280"],
        "seconds_range": {"minimum": 4, "maximum": 12, "step": 4, "value": 4},
        "seconds_label": "单镜时长 (Sora Pro: 4/8/12秒)"
    },
    "veo-3.1": {
        "sizes": ["1080p", "720p"],
        "seconds_range": {"minimum": 4, "maximum": 8, "step": 2, "value": 4},
        "seconds_label": "单镜时长 (Veo: 4/6/8秒)"
    }
}

# 2. 提示词与风格
CONTINUITY_PROMPT = (
    "保持统一的视觉风格与世界观,场景与光影保持稳定,角色服装、发型、体型与表情连贯,仅根据剧情调整动作;"
    "如果有参考图片,请严格保持人物形象与参考图一致,人物站位不得变化,镜头衔接需流畅自然。"
    "旁白不需要朗读或配音,仅作为剧情提示使用。要求视频生成的最后一帧要展示所有人物的正面形象和此时的站位。"
)

STYLE_PROMPTS = {
    "Anime (二次元)": "整体画面要求:高质量二次元动漫渲染风格,角色为手绘动漫人物,肤色与材质为动画质感,背景为虚构的动画场景;禁止出现写实/真人或真实摄影元素。",
    "Realistic (写实)": "整体画面要求:高写实摄影风格,人物与环境光影细节丰富,材质与质感贴近真实世界,禁止出现卡通或夸张笔触,确保色彩与光线符合真实物理规律。",
    "Animated (动画/3D)": "整体画面要求:动画/卡通风格,支持二维或三维渲染,人物线条与轮廓清晰,色彩饱和且富有层次,可适当夸张动作与表情。",
    "Painterly (艺术/绘画)": "整体画面要求:艺术绘画风格,可呈现厚重笔触或水彩晕染质感,允许保留艺术性的纹理与笔法痕迹,整体色彩与构图需统一。",
    "Abstract (抽象/实验)": "整体画面要求:抽象/实验风格,鼓励运用超现实、故障艺术或非传统构图手法,可打破写实规律,突出视觉冲击力与创意表现。"
}
STYLE_KEYS = list(STYLE_PROMPTS.keys())

# 3. Demo 案例数据
DEMO_DATA = [
    {
        "file": "demo1.mp4",
        "title": "案例 1",
        "script": """
【Dialogue】:

1. [0 seconds - 9 seconds] (Camera Movement: Handheld Camera Effect + Shot Type: Long Shot - Medium Close-up - Long Shot)
Storyline: In a long shot, Su Luo paces anxiously back and forth in a forest clearing. The protagonist stands a short distance away, quietly watching her. The camera zooms in, switching to a handheld medium close-up, closely following Su Luo. She scratches her hair in frustration, stops, kicks a pebble at her feet, and mutters to herself, "I really shouldn't have agreed to Qin Fei's request. How can I find all three time capsules in such a short time?" She then stomps her foot in annoyance. The scene cuts back to a long shot, making the distance between the two obvious.

2. [9 seconds - 18 seconds] (Camera Movement: Shallow Depth of Field + Shot Type: Long Shot - Medium Shot - Long Shot)
Storyline: In a long shot, the protagonist takes a few steps closer to Su Luo. The camera switches to a medium shot over the protagonist's shoulder, focusing on Su Luo, with the background blurred. The protagonist asks with concern, "What is a time capsule?" Su Luo turns her head at the sound of her voice, her annoyance temporarily replaced by professionalism. She raises her hand to explain, "It's a time capsule controlled by cloud threads, a mechanism that counts down." The scene cuts back to a wide shot, where the protagonist is now standing in front of Su Luo.

3. [18 seconds - 26 seconds] (Camera movement: Tilt Shot + Shot type: Wide shot - Close-up - Wide shot) Plot: In the wide shot, Su Luo continues to explain to the protagonist. The camera cuts to a close-up of Su Luo, her eyes focused, her tone becoming scholarly: "Because it can only be opened at a designated time, it's called a time capsule." The camera tilts downwards, panning across her gestures simulating opening and closing the capsule, then returns to her serious face. The scene cuts back to a wide shot, Su Luo's explanation has ended.

4. [26 seconds - 36 seconds] (Camera Type: Arc Shot + Shot Type: Long Shot - Medium Close-up - Long Shot) Plot: In the long shot, Su Luo's body language becomes exaggerated. The camera switches to a medium close-up arc shot circling Su Luo. She puts one hand on her hip, recalling the scene, her tone shifting from explanation to a slightly smug complaint: "Ugh, that guy Qin Fei suddenly said he buried three time capsules containing precious treasures, thinking I definitely wouldn't find them." She raises her chin, mimicking her past self, proudly patting her chest: "As a connoisseur, I said on the spot, 'Hmph, what is there that I, Su Luo, can't find? I'll find them all for you in less than an hour.'" The scene cuts back to the long shot, Su Luo still maintaining her proud pose.

5. [36seconds - 44 seconds] (Camera Type: Shaky Cam + Shot Type: Long Shot - Medium Shot - Long Shot) Plot: In the long shot, the protagonist seems to say something, and Su Luo's posture instantly collapses. The camera shifts to a medium shot of the protagonist, who, with a slightly mocking smile, gently shakes their head: "Uh, judging by your eagerness, you haven't achieved your goal yet, have you?" The focus remains the same, but Su Luo can be seen in the background; her proud posture instantly crumbles, her shoulders slump like a punctured balloon. Slight camera shake hints at Su Luo's inner turmoil. The scene cuts back to a long shot, where Su Luo lowers her head.

6. [44 seconds - 53 seconds] (Camera movement: Panning Shot + Shot type: Long shot - Close-up - Long shot) Plot: In the long shot, Su Luo appears somewhat dejected. The camera cuts to a close-up of Su Luo, who awkwardly avoids the protagonist's gaze, fidgeting with the hem of her clothes as she whispers an explanation: "Who knew that guy would be so cunning this time? The boxes were so far apart! Even though he cheated, I don't want to lose face..." Before she can finish, she suddenly looks up, and the camera pans to capture her pleading gaze at the protagonist: "Can you help me?" The scene cuts back to a wide shot, their eyes meeting.

7. [53 seconds - 62 seconds] (Camera movement: Crash Zoom + Shot type: Wide shot - Medium shot - Wide shot) Plot: In the wide shot, the protagonist smiles and nods. The camera quickly zooms in on Su Luo, a rapid medium zoom close-up capturing the fleeting surprise and relief on her face. She immediately perked up, excitedly exclaiming, "Great!" Then, she pointed decisively in two different directions into the distance, "I'll leave those two to you," she said, then pounded her chest confidently, "I'll handle the last one myself." The scene cuts back to a distant view; Su Luo has regained her composure and is ready to set off.

【Character Profile】: Su Luo: Medium height/light and agile physique/clear facial features, with large eyes that can instantly switch emotions, sometimes focused and sharp, sometimes cunning and smug/wears a slightly messy high ponytail with vibrant ash brown hair/wears a short jacket and cotton shirt for ease of movement, a tool belt around her waist, and trousers tucked into sturdy hiking boots/extroverted temperament, with rapidly changing emotions, from pacing restlessly and proudly with hands on hips to pleading for help in embarrassment; her body language is rich, and she's an action-oriented person who can't hide her feelings.

Main Character: Above average height / Tall and composed posture / Soft facial features, gentle yet sharp eyes, often with a subtle smile / Short, neatly trimmed dark hair / Wears a dark long-sleeved shirt and durable travel trousers, well-fitting and easy to move in, with a simple and understated style / Reserved and calm demeanor, an excellent observer and listener, responding primarily with subtle gestures like nods and smiles, a stark contrast to Su Luo's liveliness.

【Scene Description】: A dappled sunlight-dappled clearing in the woods in the afternoon. The atmosphere gradually shifts from one person's anxiety and the other's calm observation to a relaxed, cooperative one. The scene is open, with pebbles and fallen leaves scattered on the ground, surrounded by tranquil woods.

【Positioning】:
1. Su Luo moves back and forth in the center of the clearing, while the main character stands at the edge, a considerable distance away.

2. The main character walks towards Su Luo, and eventually the two stand face-to-face, about two or three steps apart.

3. Maintaining this close proximity, Su Luo faces the main character and begins to explain. 4. The two remain in the same position, with Su Luo turning towards the protagonist in a slightly theatrical manner.

5. The two remain in the same position, with the protagonist facing Su Luo, while Su Luo lowers her head and turns to the side, avoiding the protagonist's gaze.

6. Su Luo changes her gaze from avoiding the protagonist to looking directly at her, and the two are face to face again, their eyes meeting.

7. The two remain in the same position, with Su Luo briefly facing the protagonist, then turning her body to the side and pointing into the distance.
         """
    },
    {
        "file": "demo2.mp4",
        "title": "案例 2",
        "script": """【对话】:1. [0-8秒](运镜类型:Handheld Camera Effect+全景镜头、中景镜头)
故事情节:远景镜头,幽暗的遗迹室内,蒋风正俯身在一块散发着微光的古代石碑(指引图)前。主角从阴影中走出,站定在他身后几步远。镜头切换为手持拍摄的中景镜头,跟随主角的视线,画面有轻微晃动,聚焦在主角锐利的眼神上。主角双臂环抱,带着审视的口吻质问:"你在做什么?指引图是不能随便篡改的。"声音打破了室内的寂静。结尾回到远景,主角保持质问的姿态,蒋风的背影僵住。

2. [8-16秒](运镜类型:Arc Shot+全景镜头、中景镜头)
故事情节:远景镜头,蒋风缓缓转过身。镜头以一个平滑的弧度围绕蒋风移动,切换为中景镜头。他看到主角时明显一愣,双手下意识地抬起,掌心向前,做出一个无辜且防御的姿态,眼神慌乱地解释:"篡改指引图?不不不,你误会了。"他的表情诚恳又急切。结尾远景,两人对峙,气氛紧张。

3. [16-24秒](运镜类型:Shallow Depth of Field+全景镜头、中近景镜头)
故事情节:远景镜头,蒋风放下了手,姿态变得谦卑。镜头切换为中近景,焦点落在蒋风身上,他略带窘迫地笑了笑,背景中的主角身影变得模糊。他一边说一边用手比划着自己:"我才加入风物家没多久,哪有这个本事能篡改它。"结尾回到远景,蒋风仍在解释,主角静静地听着,没有打断。

4. [24-33秒](运镜类型:Tilt Shot+全景镜头、特写镜头)
故事情节:远景镜头,蒋风再次转向指引图。镜头给到蒋风的中近景,他伸出手指,小心翼翼地指向石碑上的一个发光符文,但并未触碰:"我只是想查看指引图上的身份印鉴。"镜头向下倾斜,给到他手指所指之处的符文一个特写,符文复杂而古老。主角的声音从画外传来,带着一丝疑惑:"身份印鉴?"结尾回到远景,主角微微探身,视线也落在了那个符文上。

5. [33-42秒](运镜类型:Panning Shot+全景镜头、近景镜头)
故事情节:远景镜头,两人都注视着指引图。镜头切换为近景,从蒋风的侧脸开始,他温和地解释着:"嗯,就是一种类似签名的东西。"镜头缓缓横移,扫过石碑上更多类似签名的印鉴,光芒流转。他的声音变得低沉而充满怀念:"在考古界,早期开荒的人员有权在指引图上留下自己的名字,我们称之为身份印鉴。"镜头移回,定格在他充满希冀的眼神上:"我想看看这些指引图上有没有我父亲的名字。"结尾回到远景,整个房间的氛围因这番话而悄然改变。

6. [42-51秒](运镜类型:Lens Flare+全景镜头、中景镜头)
故事情节:远景镜头,蒋风垂下目光。镜头切换为中景,他背对着石碑,仿佛陷入了久远的回忆,一道柔和的镜头光晕扫过画面,他眼神飘向远方,带着一丝不易察觉的落寞:"我父亲是主攻考古的风物家,但他常年在外勘察……我已经很久很久没见到他了。"结尾回到远景,主角的注意力已经完全从石碑转移到了蒋风身上。

7. [51-59秒](运镜类型:Deep Depth of Field+全景镜头、中近景镜头)
故事情节:远景镜头,蒋风转过头,重新看向主角。镜头切为中近景,景深拉远,我们能清晰看到前景中蒋风努力挤出一个微笑,眼神却流露着不确定,以及背景里主角严肃倾听的轮廓。蒋风说:"母亲说稷下不少的开荒考古是他完成的,我想看看是不是他真的来过。"结尾回到远景,蒋风的微笑显得有些无力。

8. [59-67秒](运镜类型:Shallow Depth of Field+全景镜头、特写镜头)
故事情节:远景镜头,室内一片沉寂。镜头推进到蒋风脸部的特写,极浅的景深模糊了周围的一切,只剩下他复杂的表情。他的笑容消失了,嘴唇微微颤抖:"虽然我相信母亲不会骗我,但……"他停顿了一下,低下头,用几不可闻的声音说出心底的委屈,"哪有人经常在外不回家的。"结尾远景,蒋风低着头,肩膀微微垮下。

9. [67-74秒](运镜类型:Shaky Cam+全景镜头、中近景镜头)
故事情节:远景镜头,主角打破了沉默。镜头切换为中近景,聚焦在主角身上,轻微的镜头晃动暗示着他内心的触动。他原本锐利的眼神已经完全柔和下来,取而代之的是理解与同情。他轻声问道:"那你找到答案了吗?"结尾远景,听到问话,蒋风缓缓抬起头。

【人物形象】:主角:身形挺拔,体态匀称有力,面部轮廓分明,眼神锐利如鹰。留着一头便于打理的深色短发,发丝间或夹杂风霜痕迹。身着深色调、材质耐磨的探险服,肩部和肘部有皮革补丁,腰间挂着若干实用工具包。气质沉稳老练,初期动作多为双臂环抱的审视姿态,后期眼神转为柔和,流露同情与无奈,是一位经验丰富、外冷内热的行动派。

蒋风:身高略低于主角,体态偏瘦,书生气较重,面部线条柔和,眼神清澈但时常流露慌乱与不确定。发型是略显蓬乱的黑色中短发,似乎无暇打理。穿着一身崭新的"风物家"制服,款式简洁但略显宽大,与身形不甚贴合。气质真诚而笨拙,常有抬手、低头、窘迫微笑等下意识动作,在提及父亲时,会从紧张转为充满希冀与感伤的脆弱,是一位涉世未深的年轻后辈。

【场景描述】:幽暗的古代遗迹室内,唯一的稳定光源来自一块散发着微光的石碑指引图,石壁上刻有古老符文。场景氛围从初始的紧张对峙,随着角色对话的深入,逐渐转变为充满感伤与理解的静谧与私密。

【站位】:1:主角站在蒋风身后几步远处,蒋风俯身于石碑前。
2:蒋风完全转过身,与主角正面相对,形成对峙。
3:两人保持面对面的站位,距离不变。
4:蒋风转身面向石碑,主角在其侧后方,视线投向石碑。
5:两人大致并排,共同注视着石碑。
6:蒋风背对石碑,面向空旷处;主角从侧面注视着蒋风。
7:蒋风转身,再次与主角面对面站立。
8:两人位置不变,蒋风低头,避开主角视线。
9:蒋风抬头,与主角视线交汇,维持原有距离。
"""
    },
    {
        "file": "demo3.mp4",
        "title": "案例 3",
        "script": """
【Dialogue】: 1. [0-8 seconds] (Handheld Camera Effect + Wide Shot + Medium Close-up) A wide shot shows the protagonist and Meng Ya standing on a dusty open ground, while Yuan Xue and Zhao Dongxu lean against a tree not far away. The camera zooms in on Yuan Xue and the protagonist in a medium close-up. Yuan Xue is pale and ragged. She wipes the blood from her forehead with her sleeve, looking at the protagonist with fear and gratitude: "Thank you so much! Luckily you arrived in time! Otherwise, we would be a barbecue feast by now..." Upon hearing this, the protagonist's expression turns serious, his sharp eyes scanning the charred marks behind her. The camera pulls back to a wide shot, confirming the four people's positions.

2. [8-16 seconds] (Tilt Shot + Medium Shot) In a wide shot, the protagonist's gaze shifts from Yuan Xue, and he quickly walks to Zhao Dongxu's side. The camera shifts to a medium shot, starting with Zhao Dongxu's pained face. He grits his teeth, his face ashen, struggling to move his legs. The camera tilts downwards, finally focusing on his trembling right knee, which is supporting his body. He weakly whispers, "I can't feel my leg at all..." The camera pulls back to a long shot, showing the protagonist bending down to carefully examine Zhao Dongxu's injuries.

3. [16-24 seconds] (Panning Shot + Medium Shot) In the long shot, Meng Ya walks out from behind the protagonist. The camera cuts to a medium shot of Meng Ya, his arms crossed, brows furrowed, and he says impatiently to the protagonist, "What's wrong with you? What are you spacing out for?" He glances in the direction of Yuan Xue and Zhao Dongxu, then his gaze moves past the protagonist to the scorched earth, seemingly assessing the damage. The camera pulls back to a long shot, showing Meng Ya standing side-by-side with the protagonist, creating a sense of confrontation.

4. [24-33 seconds] (Shallow Depth of Field + Close-up) A long shot shows Meng Ya and the protagonist's positions. The camera quickly zooms in on Meng Ya, who becomes blurred in the background. He says with a hint of sarcasm, "If you're not strong enough, don't try to take on a top-tier commission! Come with us, back to the camp to get treatment from Bai Cao's family." Before he finishes speaking, the focus shifts to a close-up of the protagonist. He appears calm, but his gaze is unusually cool. He sniffs the air, confirming that Meng Ya's words have a hidden meaning. The camera pulls back to a long shot, and the protagonist slowly raises his head, looking directly at Meng Ya.

5. [33-41 seconds] (Arc Shot + Medium Close-up) A long shot creates a tense atmosphere. The camera begins a slow, arcing movement around the protagonist. Meng Ya's voice is like a ticking time bomb: "Understood. Let's split up then. I'll escort the wounded to the camp first, then come back to find you." In the moving shot, the protagonist's profile appears exceptionally resolute. He doesn't respond to Meng Ya, but instead warily scans his surroundings. Suddenly, his eyes sharpen, as if he's caught something unusual. The camera pulls back to a wide shot; the protagonist has turned completely in another direction.

6. [41-49 seconds] (Crash Zoom + Extreme Close-up) In the wide shot, the protagonist maintains a wary posture. He sniffs the air sharply, his pupils suddenly contracting. The camera instantly zooms in on his eyes, creating an extreme close-up, as if he's discerning the presence of danger: "And... I'm worried there might be other trapped students ahead. Perhaps we should go ahead and scout further." Plot: As soon as he finishes speaking, the camera quickly pulls back to a wide shot; we see him make up his mind and resolutely head towards the unknown danger ahead.

7. [49-57 seconds] (Shaky Cam + Medium-long Shot) In the long shot, Meng Ya is stunned. The camera switches to a handheld medium-long shot, the image shaking slightly with Meng Ya's reaction. He sighs helplessly, points towards the camp, as if to give instructions: "Understood. Let's split up then. I'll escort the wounded to the camp first, then come find you later." Although his tone is dissatisfied, his actions are decisive. The camera pulls back to the long shot, Meng Ya watches the protagonist turn around, then immediately turns and runs in the opposite direction.

8. [57-65 seconds] (Deep Depth of Field + Medium-long Shot) In the long shot, Meng Ya has already run dozens of meters. The camera is fixed on a mid-to-long shot with a large depth of field. In the foreground is a blurry silhouette of Meng Ya, who runs while turning back to shout, "Hey, don't go too far! Big Eye Owl will contact you later!" The background is very clear, with the protagonist continuing to walk forward without looking back, his expression unwavering. Plot: The two disappear rapidly into the distance in completely opposite directions. The camera eventually returns to the long shot, showing only...

a desolate land, no people left in the Frame. 
【Character Appearance】: Yuan Xue: Approximately 165cm tall / Slender build, appearing extremely weak due to shock and injuries / Pale face, tattered clothes with burn marks, wiping blood from her forehead with her sleeve / Long, dark hair disheveled, mixed with dust and sweat / Eyes filled with lingering fear and gratitude, movements weak due to swaying body.

Main Character: Approximately 180cm tall / Well-proportioned and agile build, efficient movements / Well-defined facial features, serious and sharp eyes, with a wary expression when calmly observing / Short, dark hair, clean and neat / Wearing dark, durable tactical clothing, in good condition / Calm and decisive temperament, strong action ability, habitually using her sense of smell to perceive her environment.

Zhao Dongxu: Approximately 178cm tall / Slender build, appearing extremely weak after being injured / Pale face, facial features tense due to pain, obvious... Scarred/Short black hair, greasy with sweat and dust/Expression filled with pain, moving his lower limbs with difficulty, his movements trembling due to the injury/A resilient personality, but now submissive due to severe injuries, full of gratitude towards his comrades but unable to respond due to his physical helplessness.

Meng Ya: Approximately 182cm tall/Strong build, exuding power/Hard facial features, often frowning, his expression showing impatience with a hint of mockery/Short red hair, messy and with burn marks/Wearing a light combat vest and work pants, his arm muscles are clearly defined/Direct and fiery temperament, acting faster than thinking, possessing leadership but lacking patience with his comrades, once a decision is made, he will execute it immediately.

【Scene Description】: A desolate clearing after a disaster, the air filled with the dust and acrid smell of burning, the sun blazing, and traces of unextinguished flames still remaining on the ground, the overall atmosphere tense, oppressive, and uneasy.

【Positioning】: 1: The protagonist and Meng Ya stand side by side. 1. Yuan Xue and Zhao Dongxu lean against a tree in the distance, behind and to the side of the protagonist.

2. The protagonist walks from his original position towards Zhao Dongxu, bending down to check on him; the two are very close.

3. Meng Ya emerges from behind the protagonist, a few steps away, forming a confrontational stance with the two injured people.

4. Meng Ya stands side-by-side with the protagonist, facing each other, about an arm's length apart.

5. The protagonist turns around, his back to Meng Ya, scanning his surroundings; Meng Ya remains in place, observing him.

6. The protagonist remains still, turning his body completely forward, isolating himself from Meng Ya and the others behind him.

7. Meng Ya runs towards the distant camp; the protagonist remains in place, watching his retreating figure.

8. The protagonist moves forward, while Meng Ya runs in the opposite direction (away from the protagonist). The distance between them rapidly increases.
 """
    },
    {
        "file": "demo4.mp4",
        "title": "案例 4",
        "script": """【对话】:1. [0-8秒] (Shallow Depth of Field+全景镜头、中近景镜头) 故事情节:远景展示公园小径上,白衫和主角并肩站立,一只胖乎乎的宠物"呱呱"趴在他们脚边的草地上气喘吁吁。镜头随即以浅景深推向白衫的中近景,他低头看着呱呱,脸上交织着无奈与宠溺,接着他抬眼望向主角,问道:"怎么样?呱呱有好好锻炼吗?" 结尾回到远景,主角正准备回答。

2. [8-17秒] (Handheld Camera Effect+全景镜头、中景镜头) 故事情节:远景中,三人位置不变。镜头切换为手持效果下的中景,画面随着白衫的动作有轻微晃动。他无奈地摊开手,叹了口气:"没办法,之前太惯着它了。" 他的视线落在懒洋洋翻了个身的呱呱身上,语气里满是无可奈何:"现在没有吃的半步也不挪。" 结尾远景,白衫轻轻摇头,主角在一旁安静地听着。

3. [17-26秒] (Arc Shot+全景镜头、中景镜头) 故事情节:远景展现整个场景。镜头开始围绕白衫进行弧线运动,他蹲下身,温柔地抚摸着呱呱的后背,语气放缓:"不过它今天起码完成了一点运动,晚上允许它多吃一点。" 镜头继续沿弧线转向一旁的主角,他看着这温情的一幕,微笑着点头附和:"今天运动的还不错。" 结尾远景,白衫蹲着,主角站着,形成一高一低的构图。

4. [26-34秒] (Shallow Depth of Field+全景镜头、近景镜头) 故事情节:远景中,白衫依旧蹲在呱呱身边,主角静立一旁。镜头切入白衫的近景,背景完全虚化。他抚摸呱呱的动作没停,但眼神中浮现出真切的忧虑,声音也低沉下来,充满了担忧:"我知道运动很辛苦,但呱呱真的太胖了,我很怕它胖得生病。" 结尾远景,能看到主角脸上的笑容也收敛了,神情变得严肃。

5. [34-42秒] (Tilt Shot+全景镜头、中景镜头) 故事情节:远景确认场景站位。镜头从中景开始,从地上心满意足地摇着尾巴的呱呱缓缓向上抬升,最终定格在刚站起身的白衫脸上。他脸上的忧虑一扫而空,转为一种故作爽朗的兴奋,对着呱呱大声宣布:"表现真不错。今晚它可以多吃一点!" 结尾远景,白衫高兴地拍了拍手。

6. [42-51秒] (Shaky Cam+全景镜头、中近景镜头) 故事情节:远景中,主角看着兴奋的白衫。镜头切换为对准主角的中近景,轻微的摇晃反映出他内心的无语。他看着白衫,嘴角微微抽动,眼神里是哭笑不得的怀疑,几乎是对自己低语:"这么吃还能瘦么……" 随即他像是突然想起了正事,表情一正,视线重新聚焦在白衫身上,问道:"对了,你知道刘叶的情况怎么样了吗?" 结尾远景,主角向前迈了半步,成功转移了话题。

7. [51-58秒] (Deep Depth of Field+全景镜头、中景镜头) 故事情节:远景中,两人相对而立。镜头切到白衫的中景,听到"刘叶"的名字,他先是愣了一下,随即眼神投向远方,景深变大,背景中的公园路径和行人都变得清晰。他皱眉思索片刻,然后略带歉意地摇了摇头,收回目光:"刘叶?看着是没长高,具体我就不知道了。" 结尾远景,白衫看着主角,摊了摊手表示不知情。

8. [58-67秒] (Panning Shot+全景镜头、中远景镜头) 故事情节:远景中,三人保持着最后的站位。镜头给到白衫的中远景,他侧过身,抬手指向西边的方向,语气变得热心:"同学要是想知道,不如去找他问问吧。" 镜头随着他的手臂平滑地向西边摇摄,画面中出现一条通往远处开阔广场的小径。"你往西走,他就在那边的广场。" 结尾远景,镜头停下,主角顺着白衫所指的方向望去,若有所思。

【人物形象】:白衫:身高约180cm,体态匀称修长,略带少年感。面部线条柔和,眉眼清秀,笑起来时眼角有细微纹路。发型是自然的黑色短发,刘海稍长,显得随性。身穿一件干净的白色棉麻衬衫和浅色休闲裤,脚踩白色运动鞋。气质温和亲切,与人交谈时真诚,对待宠物时眼神宠溺,会用摊手、挠头等小动作表达无奈,是个内心细腻的暖男。

主角:身高与白衫相仿,身形挺拔,站姿稳重。面部轮廓分明,眼神锐利但内敛,表情变化细微,善于观察。发型为深色利落短发,显得干练。穿着深色系的休闲夹克,内搭纯色T恤,下身是工装裤,整体风格偏向实用和低调。气质沉稳,话不多,习惯通过嘴角抽动、眼神聚焦等微表情传递内心活动,行动果断且有目的性。

呱呱:一只体型极度肥胖的宠物,身躯圆滚滚,四肢短小,趴在地上像个肉球。拥有一双憨态可掬的大眼睛,表情总是懒洋洋的。毛发短而顺滑,脖子上戴着一个简单的项圈。动作迟缓,极度懒散,没有食物的诱惑便不愿动弹,对主人的抚摸会表现出心满意足的样子,是一只被宠坏了的"吃货"。

【场景描述】:午后阳光明媚的公园草坪,氛围从轻松宠溺的日常,转为对宠物健康的真切担忧,最终变为热心指路的平实交流。主要视觉元素是茵茵绿草、蜿蜒的小径,以及趴在地上一动不动的胖宠物。

【站位】:1. 白衫与主角并肩站立,呱呱在他们脚边的草地上。
2. 三人位置不变,白衫面向主角和呱呱。
3. 白衫蹲在呱呱旁边,主角站在他身侧,形成高低位。
4. 白衫维持蹲姿,主角站在一旁注视。
5. 白衫从呱呱身边站起,转身面对呱呱。
6. 主角面向白衫,两人相对而立,主角向前半步拉近距离。
7. 两人保持相对站立,白衫短暂望向远方后,目光回到主角身上。
8. 白衫侧身指向西边,主角随其指向望向同一方向。"""
    }
]

# ==========================================
# PART 1: 剧本生成模型 (ScriptAgent)
# ==========================================
from swift.llm import PtEngine, RequestConfig, InferRequest
from swift.plugin import InferStats
import torch

# 全局变量
MODEL_NAME = "XD-MU/ScriptAgent"
LOCAL_MODEL_PATH = "./downloaded_models/ScriptAgent"
engine = None  # InferEngine 对象

# 确保目录存在
os.makedirs(LOCAL_MODEL_PATH, exist_ok=True)

def load_llm_model():
    """使用 ms-swift 的 PtEngine 加载量化模型(int4/int8)"""
    global engine
    if engine is not None:
        return
    
    try:
        # 检查模型是否存在
        if not os.path.exists(os.path.join(LOCAL_MODEL_PATH, "config.json")):
            print(f"正在从 HuggingFace 下载模型到 {LOCAL_MODEL_PATH}...")
            snapshot_download(
                repo_id=MODEL_NAME,
                local_dir=LOCAL_MODEL_PATH,
                local_dir_use_symlinks=False,
                resume_download=True
            )
            print(f"✅ 模型已下载到: {LOCAL_MODEL_PATH}")
        else:
            print(f"✅ 模型已存在: {LOCAL_MODEL_PATH}")
        

        print("正在加载文本模式(禁用多模态)...")
        
        engine = PtEngine(
            model_id_or_path=LOCAL_MODEL_PATH,
            torch_dtype=torch.bfloat16,      # 使用 bfloat16 更省内存
            max_batch_size=1,
            device_map='cpu',
            quant_method='bnb',
            quantization_bit=4,
            model_kwargs={
                'low_cpu_mem_usage': True,
                'max_memory': {'cpu': '10GB'},
                'offload_folder': './offload',  # 内存不足时卸载到磁盘
            }
        )
        
        print("✅ 文本模式加载完成")
        
    except Exception as e:
        print(f"❌ 模型加载失败: {e}")
        import traceback
        traceback.print_exc()

def chat_with_scriptagent(user_input: str):
    """使用 ms-swift InferEngine 与 ScriptAgent 对话生成剧本"""
    global engine
    
    if engine is None:
        load_llm_model()
        if engine is None:
            return "❌ 模型加载失败,请检查后台日志。"
    
    user_input = user_input.strip()
    if not user_input:
        return "请输入内容"

    try:
        print("🤖 正在使用 ms-swift InferEngine 推理剧本...")
        
        # 🔥 使用 ms-swift 的推理方式
        # 1. 构建消息格式
        messages = [{'role': 'user', 'content': user_input}]
        infer_request = InferRequest(messages=messages)
        
        # 2. 配置请求参数
        request_config = RequestConfig(
            max_tokens=4096,           # 最大生成token数
            temperature=0.7,           # 温度参数
            top_p=0.9,                 # top_p 采样
            repetition_penalty=1.1,    # 重复惩罚
            stream=False,              # 不使用流式输出
        )
        
        # 3. 执行推理
        metric = InferStats()
        resp_list = engine.infer([infer_request], request_config, metrics=[metric])
        
        # 4. 提取结果
        response = resp_list[0].choices[0].message.content
        
        # 5. 打印性能指标(可选)
        print(f"✅ 生成完成 | 指标: {metric.compute()}")
        print(f"✅ 生成结果长度: {len(response)} 字符")
        
        return response if response else "⚠️ 生成为空,请重试"
    
    except Exception as e:
        print(f"❌ 生成出错: {e}")
        import traceback
        traceback.print_exc()
        return f"生成失败: {str(e)}"
# ==========================================
# PART 2: 视频生成 API 封装
# ==========================================

class OpenAISoraAPI:
    """OpenAI Sora API 封装"""
    
    def __init__(self, api_key: str):
        if OpenAI is None:
            raise RuntimeError("未安装 openai 库,请运行: pip install openai")
        self.client = OpenAI(api_key=api_key)
    
    def generate_video(
        self, 
        prompt: str, 
        output_path: str, 
        model: str, 
        size: str, 
        seconds: int, 
        ref_img_path: str = None
    ) -> Optional[str]:
        """
        生成视频
        返回: None (成功) 或 错误信息字符串
        """
        try:
            LOGGER.info(f"🎬 Sora API 调用: {model} | {size} | {seconds}秒")
            
            # 构建请求参数
            kwargs = {
                "model": model,
                "prompt": prompt,
                "size": size,
                "seconds": str(seconds),
            }
            
            # 添加参考图片(如果有)
            if ref_img_path and os.path.exists(ref_img_path):
                with open(ref_img_path, 'rb') as f:
                    kwargs["input_reference"] = f
            
            # 创建视频任务
            video_job = self.client.videos.create(**kwargs)
            
            # 轮询任务状态
            while video_job.status in ["queued", "processing"]:
                LOGGER.info(f"⏳ 视频生成中... 进度: {video_job.progress}%")
                time.sleep(10)
                video_job = self.client.videos.retrieve(video_job.id)
            
            # 检查任务状态
            if video_job.status == "completed":
                # 下载视频
                video_url = video_job.url
                import requests
                video_data = requests.get(video_url).content
                with open(output_path, 'wb') as f:
                    f.write(video_data)
                LOGGER.info(f"✅ 视频已保存: {output_path}")
                return None
            else:
                error_msg = f"视频生成失败,状态: {video_job.status}"
                LOGGER.error(error_msg)
                return error_msg
                
        except Exception as e:
            error_msg = f"Sora API 错误: {str(e)}"
            LOGGER.error(error_msg)
            import traceback
            traceback.print_exc()
            return error_msg


class GoogleVeoAPI:
    """Google Veo 3.1 API 封装"""
    
    def __init__(self, api_key: str):
        if genai is None:
            raise RuntimeError("未安装 google-genai 库,请运行: pip install google-genai")
        self.client = genai.Client(api_key=api_key)
    
    def generate_video(
        self, 
        prompt: str, 
        output_path: str, 
        size: str, 
        seconds: int, 
        ref_img_path: str = None
    ) -> Optional[str]:
        """
        生成视频
        返回: None (成功) 或 错误信息字符串
        """
        try:
            LOGGER.info(f"🎬 Veo API 调用: {size} | {seconds}秒")
            
            # 构建配置
            config_kwargs = {}
            
            # 添加参考图片(如果有)
            if ref_img_path and os.path.exists(ref_img_path):
                ref_image = Image.open(ref_img_path)
                reference = types.VideoGenerationReferenceImage(
                    image=ref_image,
                    reference_type="asset"
                )
                config_kwargs["reference_images"] = [reference]
            
            # 映射分辨率
            resolution_map = {"1080p": "1080p", "720p": "720p"}
            resolution = resolution_map.get(size, "720p")
            
            # 创建视频生成任务
            operation = self.client.models.generate_videos(
                model="veo-3.1-generate-preview",
                prompt=prompt,
                config=types.GenerateVideosConfig(
                    duration_seconds=seconds,
                    resolution=resolution,
                    aspect_ratio="16:9",
                    **config_kwargs
                ),
            )
            
            # 轮询任务状态
            while not operation.done:
                LOGGER.info("⏳ 视频生成中...")
                time.sleep(10)
                operation = self.client.operations.get(operation)
            
            # 下载视频
            video = operation.response.generated_videos[0]
            self.client.files.download(file=video.video, output_path=output_path)
            
            LOGGER.info(f"✅ 视频已保存: {output_path}")
            return None
                
        except Exception as e:
            error_msg = f"Veo API 错误: {str(e)}"
            LOGGER.error(error_msg)
            import traceback
            traceback.print_exc()
            return error_msg


# ==========================================
# PART 3: 视频处理工具函数
# ==========================================

def parse_script_nodes(script_text: str) -> List[str]:
    """解析剧本为分镜列表"""
    cleaned = script_text.replace("\r\n", "\n").strip()
    pattern = re.compile(r"\s*(\d+)\.\s*")
    matches = list(pattern.finditer(cleaned))
    if not matches:
        return [line.strip() for line in cleaned.split('\n') if line.strip()]
    nodes = []
    for index, match in enumerate(matches):
        start = match.end()
        end = matches[index + 1].start() if index + 1 < len(matches) else len(cleaned)
        content = cleaned[start:end].strip()
        if content:
            nodes.append(content)
    return nodes


def extract_last_frame(video_path: str, output_path: str) -> Optional[str]:
    """提取视频最后一帧(OpenCV优化版 - 精简)"""
    import time
    
    if cv2 is None:
        LOGGER.warning("OpenCV 不可用")
        return None
    
    if not os.path.exists(video_path):
        LOGGER.error(f"视频文件不存在: {video_path}")
        return None
    
    # === 步骤1: 等待文件写入稳定 ===
    max_wait = 30
    check_interval = 1.0
    stable_count = 0
    required_stable = 3
    last_size = 0
    
    LOGGER.info("⏳ 等待文件写入完成...")
    for i in range(int(max_wait / check_interval)):
        try:
            current_size = os.path.getsize(video_path)
        except OSError:
            time.sleep(check_interval)
            continue
        
        if current_size == 0:
            time.sleep(check_interval)
            continue
        
        if current_size == last_size:
            stable_count += 1
            if stable_count >= required_stable:
                LOGGER.info(f"✅ 文件稳定: {current_size / 1024 / 1024:.2f} MB")
                break
        else:
            stable_count = 0
        
        last_size = current_size
        time.sleep(check_interval)
    
    # 额外等待确保文件系统同步
    time.sleep(2.0)
    
    # === 步骤2: OpenCV 读取视频 ===
    capture = cv2.VideoCapture(video_path)
    if not capture.isOpened():
        LOGGER.error("OpenCV 无法打开视频")
        return None
    
    try:
        total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
        fps = capture.get(cv2.CAP_PROP_FPS) or 30
        
        if total_frames <= 0:
            LOGGER.error("视频帧数为 0")
            return None
        
        LOGGER.info(f"📹 视频信息: {total_frames} 帧, {fps:.2f} FPS")
        
        # === 步骤3: 多候选帧策略(避免黑帧/损坏帧)===
        candidates = [
            total_frames - 1,              # 最后一帧
            total_frames - 2,              # 倒数第2帧
            total_frames - 5,              # 倒数第5帧
            max(0, int(total_frames * 0.95))  # 95%位置
        ]
        
        frame = None
        used_index = -1
        
        for candidate_idx in candidates:
            candidate_idx = max(0, min(candidate_idx, total_frames - 1))
            capture.set(cv2.CAP_PROP_POS_FRAMES, candidate_idx)
            success, temp_frame = capture.read()
            
            if success and temp_frame is not None and temp_frame.size > 0:
                # 检查亮度(排除黑屏)
                gray = cv2.cvtColor(temp_frame, cv2.COLOR_BGR2GRAY)
                brightness = gray.mean()
                
                if brightness > 5:  # 亮度阈值
                    frame = temp_frame
                    used_index = candidate_idx
                    LOGGER.info(f"✅ 提取第 {used_index}/{total_frames} 帧(亮度: {brightness:.1f})")
                    break
        
        # === 步骤4: 保存图片 ===
        if frame is None:
            LOGGER.error("所有候选帧均无效")
            return None
        
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        if not cv2.imwrite(output_path, frame):
            LOGGER.error("保存图片失败")
            return None
        
        file_size = os.path.getsize(output_path)
        LOGGER.info(f"💾 参考帧已保存: {os.path.basename(output_path)} ({file_size / 1024:.1f} KB)")
        return output_path
        
    except Exception as e:
        LOGGER.error(f"提取帧时出错: {e}")
        return None
    finally:
        capture.release()


def stitch_videos(video_paths: List[str], output_path: str):
    """拼接多个视频为最终成片"""
    if not video_paths:
        raise ValueError("未提供可拼接的视频文件。")
    
    if VideoFileClip is None or concatenate_videoclips is None:
        raise RuntimeError("未找到 moviepy,请安装依赖。")
    
    clips = []
    try:
        for path in video_paths:
            if not os.path.exists(path):
                continue
            clips.append(VideoFileClip(path))
        
        if not clips:
            raise ValueError("没有有效的视频片段")
        
        final_clip = concatenate_videoclips(clips, method="compose")
        final_clip.write_videofile(
            output_path, 
            codec="libx264", 
            audio_codec="aac", 
            verbose=False, 
            logger=None, 
            remove_temp=True
        )
    finally:
        for clip in clips:
            clip.close()


# ==========================================
# PART 4: 视频生成流水线
# ==========================================

def run_video_generation_pipeline(
    script_text: str,
    api_key: str,
    model_name: str,
    style_choice: str,
    size: str,
    seconds: int
):
    """
    视频生成流水线
    
    Yields: (分镜列表, 最终视频路径, 日志信息)
    """
    # 验证输入
    if not script_text:
        yield [], None, "❌ 请输入剧本!"
        return
    
    if not api_key or api_key == "Your API Key":
        yield [], None, "❌ 请输入有效的 API Key!"
        return
    
    # 解析剧本
    nodes = parse_script_nodes(script_text)
    run_id = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    output_dir = os.path.join("output_videos", run_id)
    os.makedirs(output_dir, exist_ok=True)
    
    # 初始化 API 客户端
    try:
        if model_name.startswith("sora"):
            api_client = OpenAISoraAPI(api_key)
        elif model_name.startswith("veo"):
            api_client = GoogleVeoAPI(api_key)
        else:
            yield [], None, f"❌ 不支持的模型: {model_name}"
            return
    except Exception as e:
        yield [], None, f"❌ API 初始化失败: {str(e)}"
        return
    
    generated_videos = []
    last_frame_path = None
    style_prompt = STYLE_PROMPTS.get(style_choice, "")
    
    yield [], None, f"🚀 开始任务,共 {len(nodes)} 个分镜。模型: {model_name}"

    # 逐个生成分镜
    for i, node_text in enumerate(nodes):
        idx = i + 1
        video_filename = os.path.join(output_dir, f"segment_{idx:02d}.mp4")
        full_prompt = f"{CONTINUITY_PROMPT}\n{style_prompt}\n镜头编号:{idx}/{len(nodes)}。\n镜头脚本:{node_text}"
        
        yield generated_videos, None, f"🎥 生成中: 分镜 {idx}/{len(nodes)}..."
        
        # 调用 API 生成视频
        if model_name.startswith("sora"):
            err = api_client.generate_video(
                prompt=full_prompt,
                output_path=video_filename,
                model=model_name,
                size=size,
                seconds=seconds,
                ref_img_path=last_frame_path
            )
        else:  # veo
            err = api_client.generate_video(
                prompt=full_prompt,
                output_path=video_filename,
                size=size,
                seconds=seconds,
                ref_img_path=last_frame_path
            )
        
        if err:
            yield generated_videos, None, f"❌ 分镜 {idx} 失败: {err}"
            return
        
        generated_videos.append(video_filename)
        
        # 提取最后一帧作为下一个分镜的参考
        if i < len(nodes) - 1:
            frame_path = os.path.join(output_dir, f"ref_{idx:02d}.png")
            last_frame_path = extract_last_frame(video_filename, frame_path)

        yield generated_videos, None, f"✅ 分镜 {idx} 完成"

    # 拼接视频
    yield generated_videos, None, "🎬 正在拼接..."
    final_video_path = os.path.join(output_dir, "final_movie.mp4")
    
    try:
        stitch_videos(generated_videos, final_video_path)
        yield generated_videos, final_video_path, "🎉 任务完成!"
    except Exception as e:
        yield generated_videos, None, f"❌ 拼接失败: {str(e)}"


# ==========================================
# PART 5: Gradio 界面
# ==========================================

def update_model_params(model_name):
    """根据模型更新界面参数"""
    config = MODEL_CONFIGS.get(model_name, MODEL_CONFIGS["sora-2"])
    return (
        gr.Dropdown(
            choices=config["sizes"], 
            value=config["sizes"][0], 
            label=f"分辨率 ({model_name})"
        ),
        gr.Slider(
            minimum=config["seconds_range"]["minimum"], 
            maximum=config["seconds_range"]["maximum"], 
            step=config["seconds_range"]["step"], 
            value=config["seconds_range"]["value"], 
            label=config["seconds_label"]
        )
    )


def get_demo_path(filename):
    """获取 Demo 文件路径"""
    return filename if os.path.exists(filename) else None


with gr.Blocks(title="AI 剧本视频工厂") as demo:
    gr.Markdown("# 🎬 ScriptAgent & Sora/Veo 视频生成工坊 ")
    
    with gr.Tabs():
        # --- TAB 1: 剧本创作 ---
        with gr.Tab("📝 第一步:剧本创作"):
            with gr.Row():
                with gr.Column():
                    llm_input = gr.Textbox(
                        label="剧情输入", 
                        placeholder="主角:你在做什么?...", 
                        lines=6
                    )
                    llm_btn = gr.Button("生成/续写剧本", variant="primary")
                
                with gr.Column():
                    llm_output = gr.Textbox(
                        label="生成的剧本", 
                        lines=10, 
                        interactive=True
                    )
                    # 🔥 修改:添加提示信息组件
                    copy_status = gr.Textbox(
                        label="", 
                        value="", 
                        visible=False,
                        elem_classes="copy-status"
                    )
                    to_video_btn = gr.Button("⬇️ 复制到视频生成", variant="secondary")
            
            gr.Examples(
                [[
                    "艾蕾娜:……星辰的余烬?你竟敢在此地点燃旧日之光,流亡者。  \n"
                    "凯兰:光从不属于谁,骑士。它只记得……曾照过怎样的真相。倒是你,影子的囚徒,是来求我终结你的诅咒?还是……来替你的新王收割我的喉咙?  \n"
                    "艾蕾娜:终结?我的诅咒早已生根。每杀一人,他的灵魂便多一道裂痕……而你要的真相,不过是让裂痕更早崩裂。  \n"
                    "凯兰:那就让它崩裂。你以为影子吞噬的是敌人?不……它啃的是你自己的存在。当你彻底沦为影魔,王国会迎来真正的永夜——而那时,连诅咒都会因你的消失而……笑出声。  \n"
                    "艾蕾娜:若我消失……她会替我活下去。而王国……会记得我曾是守护者。但若你以星为誓,告诉我……当年你预言的毁灭,可有……一线逆转?  \n"
                    "凯兰:逆转的代价……是被遗忘。你愿用存在换王国的黎明?那么——以星辰与影子的名义,契约成立。  \n"
                    "艾蕾娜:……从此,弑王者的名字将被抹去。而星辰将记住……一个骑士用消失,为王国换来了……第零次日出。\n"
                ],
                [
                    "林照:最近读完《人类简史》,突然意识到我们所谓的“现代文明”不过是场集体幻觉,真有点虚无。\n"  
                    "陈放:幻觉才值钱啊,货币、国家、公司,哪个不是大家愿意信才存在?读书不是为了拆穿,而是为了看清游戏规则。  \n"
                    "许知:你俩别绕了,我昨晚刚把《斯通纳》看完,合上书就一个问题:如果注定平庸,还读个什么劲?  \n"
                    "林照:斯通纳的平庸恰恰反击了功利叙事,他的失败里藏着尊严,像黑暗中的一点磷火。  \n"
                    "陈放:说穿了,读书就是给自己建一座防空洞,外面狂轰滥炸,洞里还能点一盏小灯。  \n"
                    "林照:那灯最好自带电池,别指望谁给你发电,明天我打算读《倦怠社会》,继续给灯添点燃料。\n"
                ]
                ], 
                inputs=llm_input
            )
            
            # 本地部署代码展示区(保持不变)
            gr.Markdown("---")
            with gr.Accordion("💻 本地部署完整代码(点击展开查看)", open=False):
                gr.Markdown("""
                ### 📦 完整部署步骤
                以下代码可在本地完整运行,获得最佳性能和输出质量:
                """)
                
                deployment_code = '''import os
from huggingface_hub import snapshot_download

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

model_name = "XD-MU/ScriptAgent"
local_path = "./models/ScriptAgent"

# 下载整个仓库的所有文件
print("下载模型所有文件...")
snapshot_download(
    repo_id=model_name,
    local_dir=local_path,
    local_dir_use_symlinks=False,
    resume_download=True
)

print(f"模型已完整下载到: {local_path}")

# 使用 SWIFT 加载
from swift.llm import PtEngine, RequestConfig, InferRequest

engine = PtEngine(local_path, max_batch_size=1)
request_config = RequestConfig(max_tokens=8192, temperature=0.7)

infer_request = InferRequest(messages=[
    {"role": "user", "content": "你的对话上下文(Your Dialogue)"}
])
response = engine.infer([infer_request], request_config)[0]

print(response.choices[0].message.content)'''
                
                gr.Code(
                    value=deployment_code,
                    language="python",
                    label="deploy_scriptagent.py",
                    lines=35,
                    interactive=False
                )
                
                gr.Markdown("""
                ### 📌 环境要求
                ```bash
                # 安装依赖
                pip install ms-swift transformers torch huggingface_hub
                
                # GPU 推荐配置
                - CUDA 11.8+
                - 显存: 16GB+ (推荐 24GB)
                - 内存: 32GB+
                ```
                
                ### 本地部署优势
                - ✅ 完整精度模型,无量化损失
                - ✅ 更快的推理速度
                - ✅ 无网络限制,支持离线运行
                - ✅ 可自定义参数(temperature, max_tokens等)
                """)
        
        # --- TAB 2: 视频生成 ---
        with gr.Tab("🎥 第二步:视频生成"):
            # 🔥 新增:顶部提示区域
            gr.Markdown("""
            <div style="background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); 
                        padding: 15px; 
                        border-radius: 10px; 
                        color: white; 
                        text-align: center; 
                        margin-bottom: 20px;">
                💡 <b>提示</b>:在「第一步:剧本创作」中点击「⬇️ 复制到视频生成」后,剧本会自动填充到下方「分镜脚本」输入框
            </div>
            """)
            
            with gr.Row():
                # 左侧配置区
                with gr.Column(scale=1):
                    with gr.Accordion("⚙️ API 设置", open=True):
                        api_key_input = gr.Textbox(
                            label="API Key", 
                            type="password", 
                            value="Your API Key",
                            info="根据选择的模型输入 OpenAI 或 Google API Key"
                        )
                    
                    gr.Markdown("### 🎨 风格与模型配置")
                    style_radio = gr.Radio(
                        choices=STYLE_KEYS, 
                        value=STYLE_KEYS[0], 
                        label="画风"
                    )
                    model_sel = gr.Dropdown(
                        choices=["sora-2", "sora-2-pro", "veo-3.1"], 
                        value="sora-2", 
                        label="选择模型",
                        info="Sora 使用 OpenAI Key,Veo 使用 Google Key"
                    )
                    
                    with gr.Row():
                        size_sel = gr.Dropdown(
                            choices=MODEL_CONFIGS["sora-2"]["sizes"], 
                            value=MODEL_CONFIGS["sora-2"]["sizes"][0], 
                            label="分辨率"
                        )
                        sec_slider = gr.Slider(
                            minimum=4, 
                            maximum=12, 
                            step=4, 
                            value=4, 
                            label="单镜时长"
                        )

                    video_script_input = gr.TextArea(
                        label="分镜脚本", 
                        lines=8, 
                        placeholder="1. [0-8秒] ...",
                        elem_classes="script-input"  # 🔥 添加样式类
                    )
                    gen_btn = gr.Button("🚀 开始生成", variant="primary")
                    status_log = gr.Textbox(label="日志", interactive=False)

                # 右侧展示区
                with gr.Column(scale=2):
                    gr.Markdown("### 🎞️ 分镜预览")
                    gallery = gr.Gallery(
                        label="分镜序列", 
                        columns=3, 
                        height="auto"
                    )
                    
                    gr.Markdown("### 🎬 最终成片")
                    final_video = gr.Video(label="成片输出")

                    # Demo 展示区
                    gr.Markdown("---")
                    gr.Markdown("### 🌟 精选成片案例 (Demo Showcase)")
                    
                    for i in range(0, 4, 2): 
                        with gr.Row():
                            for j in range(2): 
                                idx = i + j
                                if idx < len(DEMO_DATA):
                                    item = DEMO_DATA[idx]
                                    with gr.Column():
                                        with gr.Group():
                                            gr.Video(value=get_demo_path(item["file"]), label=item["title"], interactive=False)
                                            with gr.Accordion(f"📄 查看剧本: {item['title']}", open=False):
                                                gr.Textbox(
                                                    value=item["script"], 
                                                    show_label=False, 
                                                    lines=6,       
                                                    max_lines=6,   
                                                    interactive=False
                                                )
    
    # 页面底部警告
    gr.HTML('<p style="color: red; font-weight: bold; text-align: center; margin-top: 20px; font-size: 16px;">⚠️ 注意:仅供简单测试,由于成本问题在线平台内存只有18G,我们量化了模型,性能效果并不能保证,如果需要最准确的输出请自行部署即可</p>')

    # 🔥 添加自定义 CSS 实现高亮动画
    demo.load(
        None,
        None,
        None,
        js="""
        function() {
            const style = document.createElement('style');
            style.textContent = `
                @keyframes highlight {
                    0%, 100% { background-color: transparent; }
                    50% { background-color: rgba(102, 126, 234, 0.2); }
                }
                .script-input.highlight {
                    animation: highlight 1.5s ease-in-out 3;
                    border: 2px solid #667eea !important;
                }
            `;
            document.head.appendChild(style);
        }
        """
    )

    # --- 逻辑绑定 ---
    llm_btn.click(chat_with_scriptagent, llm_input, llm_output)
    
    # 🔥 修改后的按钮逻辑:复制文本 + 显示成功提示
    def copy_to_video(script_text):
        """复制剧本到视频生成标签页"""
        if not script_text or not script_text.strip():
            return script_text, gr.update(value="⚠️ 剧本为空,无法复制", visible=True)
        return script_text, gr.update(value="✅ 已复制到「第二步:视频生成」→「分镜脚本」,请切换标签页查看", visible=True)
    
    to_video_btn.click(
        fn=copy_to_video,
        inputs=llm_output,
        outputs=[video_script_input, copy_status]
    )
    
    model_sel.change(
        fn=update_model_params, 
        inputs=model_sel, 
        outputs=[size_sel, sec_slider]
    )
    
    gen_btn.click(
        fn=run_video_generation_pipeline,
        inputs=[
            video_script_input, 
            api_key_input, 
            model_sel, 
            style_radio, 
            size_sel, 
            sec_slider
        ],
        outputs=[gallery, final_video, status_log]
    )

if __name__ == "__main__":
    demo.queue()
    demo.launch(server_name="0.0.0.0", server_port=7860)