Spaces:

deeme
/

comic

Paused

App Files Files Community

deeme commited on Apr 4, 2025

Commit

83b4c2e

verified ·

1 Parent(s): eeff536

Upload 2 files

Browse files

Files changed (2) hide show

Dockerfile +2 -2
app.py +36 -19

Dockerfile CHANGED Viewed

@@ -2,9 +2,9 @@ FROM python:3.9-slim
 WORKDIR /app
-# 安装FFmpeg和Noto Sans CJK SC字体
 RUN apt-get update && \
-    apt-get install -y ffmpeg fonts-roboto && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*

 WORKDIR /app
+# 安装FFmpeg
 RUN apt-get update && \
+    apt-get install -y ffmpeg fonts-noto-cjk && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*

app.py CHANGED Viewed

@@ -156,13 +156,13 @@ def create_speech_subtitle_file(project_dir, speeches, panel_start_times, panel_
         logger.error(f"Error creating speech subtitle file: {e}")
         return None
-# 格式化时间为SRT格式
 def format_time(seconds):
     hours = int(seconds / 3600)
     minutes = int((seconds % 3600) / 60)
     secs = int(seconds % 60)
-    millisecs = int((seconds - int(seconds)) * 1000)
-    return f"{hours:02}:{minutes:02}:{secs:02},{millisecs:03}"
 # 创建音频文件
 async def create_audio_file(project_dir, captions, speeches):
@@ -181,7 +181,7 @@ async def create_audio_file(project_dir, captions, speeches):
             # 每个面板的旁白
             if caption:
                 caption_audio = os.path.join(project_dir, f"caption_{i}.mp3")
-                result = await generate_speech(caption, "alloy", caption_audio)
                 if result:
                     duration = get_audio_duration(caption_audio)
                     audio_durations[f"caption_{i}"] = duration
@@ -191,7 +191,7 @@ async def create_audio_file(project_dir, captions, speeches):
             # 每个面板的对话
             if speech:
                 speech_audio = os.path.join(project_dir, f"speech_{i}.mp3")
-                result = await generate_speech(speech, "nova", speech_audio)
                 if result:
                     duration = get_audio_duration(speech_audio)
                     audio_durations[f"speech_{i}"] = duration
@@ -268,14 +268,19 @@ def get_video_dimensions(video_path):
         return (1920, 1080)
 def process_sub_path(path):
-    # 统一处理所有特殊字符
-    return shlex.quote(
-        str(Path(path).resolve())
-            .replace(':', '\\:')
-            .replace(' ', '\\ ')
-            .replace('(', '\\(')
-            .replace(')', '\\)')
-    )
 # 创建视频
 def create_video(project_dir, image_paths, caption_subtitle_file, speech_subtitle_file,
@@ -311,14 +316,26 @@ def create_video(project_dir, image_paths, caption_subtitle_file, speech_subtitl
         # 构建滤镜链
         combined_filter = (
             f"subtitles={process_sub_path(caption_subtitle_file)}:"
-            "force_style='Fontsize={},Alignment=2,MarginV={},Outline=1'".format(
-                int(base_fontsize*0.9),
-                video_height//10
             ),
             f"subtitles={process_sub_path(speech_subtitle_file)}:"
-            "force_style='Fontsize={},Alignment=8,MarginV={},Outline=1'".format(
-                base_fontsize,
-                video_height//12
             )
         )
         filter_chain = ",".join(combined_filter)

         logger.error(f"Error creating speech subtitle file: {e}")
         return None
+# 格式化时间
 def format_time(seconds):
     hours = int(seconds / 3600)
     minutes = int((seconds % 3600) / 60)
     secs = int(seconds % 60)
+    centisecs = int((seconds - int(seconds)) * 100)
+    return f"{hours}:{minutes:02}:{secs:02}.{centisecs:02}"
 # 创建音频文件
 async def create_audio_file(project_dir, captions, speeches):
             # 每个面板的旁白
             if caption:
                 caption_audio = os.path.join(project_dir, f"caption_{i}.mp3")
+                result = await generate_speech(caption, "zh-CN-YunjianNeural", caption_audio)
                 if result:
                     duration = get_audio_duration(caption_audio)
                     audio_durations[f"caption_{i}"] = duration
             # 每个面板的对话
             if speech:
                 speech_audio = os.path.join(project_dir, f"speech_{i}.mp3")
+                result = await generate_speech(speech, "zh-CN-XiaoxiaoNeural", speech_audio)
                 if result:
                     duration = get_audio_duration(speech_audio)
                     audio_durations[f"speech_{i}"] = duration
         return (1920, 1080)
 def process_sub_path(path):
+    """深度处理FFmpeg路径转义"""
+    # 统一转换为POSIX路径
+    processed = Path(path).as_posix()
+    # 转义特殊字符 [ ] : , ' \
+    processed = processed.translate(str.maketrans({
+        ':': r'\:',
+        "'": r"\\\'",
+        ',': r'\\,',
+        '[': r'\\[',
+        ']': r'\\]',
+        ' ': r'\ '
+    }))
+    return f"'{processed}'"  # 用单引号包裹整个路径
 # 创建视频
 def create_video(project_dir, image_paths, caption_subtitle_file, speech_subtitle_file,
         # 构建滤镜链
         combined_filter = (
             f"subtitles={process_sub_path(caption_subtitle_file)}:"
+            "force_style='"
+            "Fontsize={},"
+            "Alignment=2,"
+            "MarginV={},"
+            "WrapStyle=1,"
+            "BlurEdges=1"
+            "'".format(
+                int(base_fontsize*0.6),
+                video_height//100
             ),
             f"subtitles={process_sub_path(speech_subtitle_file)}:"
+            "force_style='"
+            "Fontsize={},"
+            "Alignment=8,"
+            "MarginV={},"
+            "WrapStyle=1,"
+            "BlurEdges=1"
+            "'".format(
+                int(base_fontsize*0.5),
+                video_height//10
             )
         )
         filter_chain = ",".join(combined_filter)