deeme commited on
Commit
bf9eeef
·
verified ·
1 Parent(s): d51d820

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +41 -75
  2. requirements.txt +1 -2
app.py CHANGED
@@ -1,13 +1,11 @@
1
- from fastapi import FastAPI, HTTPException, BackgroundTasks
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
- from typing import List, Dict
5
  import os
6
  import uuid
7
  import aiohttp
8
- import asyncio
9
  import logging
10
- import tempfile
11
  import openai
12
  from pathlib import Path
13
  import subprocess
@@ -16,9 +14,7 @@ import ssl
16
  import json
17
  from fastapi.staticfiles import StaticFiles
18
  from pydub import AudioSegment
19
- import shlex
20
- from ffmpeg import probe as ffmpeg_probe # 需要安装ffmpeg-python包
21
- import time
22
 
23
  # 配置日志
24
  logging.basicConfig(level=logging.INFO)
@@ -66,13 +62,15 @@ async def download_image(url, output_path):
66
  if response.status == 200:
67
  with open(output_path, 'wb') as f:
68
  f.write(await response.read())
69
- return output_path
 
 
70
  else:
71
  logger.error(f"Failed to download image: {response.status}")
72
- return None
73
  except Exception as e:
74
  logger.error(f"Error downloading image: {e}")
75
- return None
76
 
77
  # 生成语音
78
  async def generate_speech(text, voice="alloy", output_path=None):
@@ -112,40 +110,44 @@ PlayResX: 1920
112
  PlayResY: 1080
113
  [V4+ Styles]
114
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
115
- Style: Caption,Noto Sans CJK SC,54,&H00FFFFFF,&H000000FF,&H00333333,&H00000000,0,0,0,0,100,100,0,0,1,2,3,2,100,100,50,0
116
- Style: Speech,Noto Sans CJK SC,48,&H00FFFFFF,&H000000FF,&H00333333,&H00000000,0,0,0,0,100,100,0,0,1,2,3,8,100,100,50,0
117
  [Events]
118
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
119
  """
120
- def smart_wrap(text, video_width=1920):
121
- """智能换行算法"""
122
- max_chars = int(video_width // 38) # 基于典型字体尺寸计算
 
 
 
 
123
  lines = []
124
  current_line = []
125
- current_len = 0
126
 
127
  for char in text:
128
- char_len = 2 if ord(char) > 255 else 1 # 全角字符计为2单位
129
- if current_len + char_len > max_chars * 2:
130
  lines.append(''.join(current_line))
131
  current_line = [char]
132
- current_len = char_len
133
  else:
134
  current_line.append(char)
135
- current_len += char_len
136
  lines.append(''.join(current_line))
137
 
138
  return r'\N'.join(lines)
139
 
140
  # 创建caption字幕文件(底部显示)
141
- def create_caption_subtitle_file(project_dir, captions, panel_start_times, panel_durations):
142
  try:
143
  subtitle_file = os.path.join(project_dir, "captions.ass")
144
 
145
  with open(subtitle_file, "w", encoding="utf-8") as f:
146
  f.write(ASS_STYLE_HEADER)
147
- for i, (caption, start, duration) in enumerate(zip(captions, panel_start_times, panel_durations)):
148
- wrapped_text = smart_wrap(caption)
149
  f.write(
150
  f"Dialogue: 0,{format_time(start)},{format_time(start + duration)},"
151
  f"Caption,,0,0,0,,{wrapped_text}\n"
@@ -157,14 +159,14 @@ def create_caption_subtitle_file(project_dir, captions, panel_start_times, panel
157
  return None
158
 
159
  # 创建speech字幕文件(顶部显示)
160
- def create_speech_subtitle_file(project_dir, speeches, panel_start_times, panel_durations):
161
  try:
162
  subtitle_file = os.path.join(project_dir, "speeches.ass")
163
 
164
  with open(subtitle_file, "w", encoding="utf-8") as f:
165
  f.write(ASS_STYLE_HEADER)
166
- for i, (speech, start, duration) in enumerate(zip(speeches, panel_start_times, panel_durations)):
167
- wrapped_text = smart_wrap(speech)
168
  f.write(
169
  f"Dialogue: 0,{format_time(start)},{format_time(start + duration)},"
170
  f"Speech,,0,0,0,,{wrapped_text}\n"
@@ -271,21 +273,6 @@ async def create_audio_file(project_dir, captions, speeches):
271
  logger.error(traceback.format_exc())
272
  return None, {}, [], []
273
 
274
- def get_video_dimensions(video_path):
275
- try:
276
- result = subprocess.run(
277
- ["ffprobe", "-v", "error", "-select_streams", "v:0",
278
- "-show_entries", "stream=width,height", "-of", "json", video_path],
279
- capture_output=True,
280
- text=True
281
- )
282
- data = json.loads(result.stdout)
283
- return (int(data['streams'][0]['width']),
284
- int(data['streams'][0]['height']))
285
- except Exception as e:
286
- logger.warning(f"Video dimension detection failed: {e}")
287
- return (1920, 1080)
288
-
289
  def process_sub_path(path):
290
  """深度处理FFmpeg路径转义"""
291
  # 统一转换为POSIX路径
@@ -329,31 +316,10 @@ def create_video(project_dir, image_paths, caption_subtitle_file, speech_subtitl
329
  ]
330
  subprocess.run(cmd1, check=True)
331
 
332
- # 获取视频尺寸(使用改进后的方法)
333
- video_width, video_height = get_video_dimensions(temp_video)
334
- base_fontsize = max(24, video_width // 50)
335
  # 构建滤镜链
336
  combined_filter = (
337
- f"subtitles={process_sub_path(caption_subtitle_file)}:"
338
- "force_style='"
339
- #"FontName=Noto Sans CJK SC,"
340
- "Fontsize={},"
341
- "Alignment=2,"
342
- "MarginV={},"
343
- "'".format(
344
- int(base_fontsize*0.6),
345
- video_height//100
346
- ),
347
- f"subtitles={process_sub_path(speech_subtitle_file)}:"
348
- "force_style='"
349
- #"FontName=Noto Sans CJK SC,"
350
- "Fontsize={},"
351
- "Alignment=8,"
352
- "MarginV={},"
353
- "'".format(
354
- int(base_fontsize*0.5),
355
- video_height//10
356
- )
357
  )
358
  filter_chain = ",".join(combined_filter)
359
  # 优化ffmpeg命令
@@ -367,10 +333,7 @@ def create_video(project_dir, image_paths, caption_subtitle_file, speech_subtitl
367
  "-movflags", "+faststart",
368
  output_video
369
  ]
370
- # 添加执行计时
371
- start_time = time.time()
372
  subprocess.run(cmd_combined, check=True)
373
- logger.info(f"Video processed in {time.time()-start_time:.2f}s")
374
  # 清理临时文件
375
  os.remove(temp_video)
376
  return output_video
@@ -409,7 +372,7 @@ def upload_to_local_storage(local_path, relative_path):
409
  return None
410
 
411
  @app.post("/api/generate-video")
412
- async def generate_video(comic_data: ComicData, background_tasks: BackgroundTasks):
413
  # 创建唯一项目ID
414
  project_id = str(uuid.uuid4())
415
  # 使用绝对路径创建项目目录
@@ -421,11 +384,15 @@ async def generate_video(comic_data: ComicData, background_tasks: BackgroundTask
421
  try:
422
  # 下载图片
423
  image_paths = []
 
424
  for i, panel_url in enumerate(comic_data.panels):
425
  output_path = os.path.join(project_dir, f"panel_{i}.jpg")
426
- result = await download_image(panel_url, output_path)
427
- if result:
428
- image_paths.append(result)
 
 
 
429
 
430
  if not image_paths:
431
  raise HTTPException(status_code=500, detail="Failed to download images")
@@ -443,13 +410,13 @@ async def generate_video(comic_data: ComicData, background_tasks: BackgroundTask
443
 
444
  # 创建字幕文件 - 分别为caption和speech创建
445
  caption_subtitle_file = create_caption_subtitle_file(
446
- project_dir, comic_data.captions, panel_start_times, panel_durations
447
  )
448
  if not caption_subtitle_file:
449
  raise HTTPException(status_code=500, detail="Failed to create caption subtitle file")
450
 
451
  speech_subtitle_file = create_speech_subtitle_file(
452
- project_dir, comic_data.speeches, panel_start_times, panel_durations
453
  )
454
  if not speech_subtitle_file:
455
  raise HTTPException(status_code=500, detail="Failed to create speech subtitle file")
@@ -480,8 +447,7 @@ async def generate_video(comic_data: ComicData, background_tasks: BackgroundTask
480
  # if img_url:
481
  # image_urls.append(img_url)
482
 
483
- # 后台任务清理临时文件
484
- # background_tasks.add_task(lambda: shutil.rmtree(project_dir, ignore_errors=True))
485
  shutil.rmtree(project_dir, ignore_errors=True)
486
 
487
  return {
 
1
+ from fastapi import FastAPI, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
+ from typing import List
5
  import os
6
  import uuid
7
  import aiohttp
 
8
  import logging
 
9
  import openai
10
  from pathlib import Path
11
  import subprocess
 
14
  import json
15
  from fastapi.staticfiles import StaticFiles
16
  from pydub import AudioSegment
17
+ from PIL import Image
 
 
18
 
19
  # 配置日志
20
  logging.basicConfig(level=logging.INFO)
 
62
  if response.status == 200:
63
  with open(output_path, 'wb') as f:
64
  f.write(await response.read())
65
+ with Image.open(output_path) as img: # 新增尺寸获取
66
+ width, height = img.size
67
+ return output_path, width # 返回尺寸
68
  else:
69
  logger.error(f"Failed to download image: {response.status}")
70
+ return None, 0
71
  except Exception as e:
72
  logger.error(f"Error downloading image: {e}")
73
+ return None, 0
74
 
75
  # 生成语音
76
  async def generate_speech(text, voice="alloy", output_path=None):
 
110
  PlayResY: 1080
111
  [V4+ Styles]
112
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
113
+ Style: Caption,Noto Sans CJK SC,39,&H00FFFFFF,&H000000FF,&H00333333,&H00000000,0,0,0,0,100,100,0,0,1,2,3,2,10,10,39,0
114
+ Style: Speech,Noto Sans CJK SC,39,&H00FFFFFF,&H000000FF,&H00333333,&H00000000,0,0,0,0,100,100,0,0,1,2,3,8,10,10,39,0
115
  [Events]
116
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
117
  """
118
+ def smart_wrap(text, image_width, font_size=48):
119
+ """动态计算每行字符数"""
120
+ # 根据实际字体渲染参数计算
121
+ avg_char_width = font_size * 0.6 # 中文字符平均宽度(像素)
122
+ max_chars_per_line = max(1, int(image_width / avg_char_width) - 2) # 保留边距
123
+
124
+ # 实现更精确的断行逻辑
125
  lines = []
126
  current_line = []
127
+ current_width = 0
128
 
129
  for char in text:
130
+ char_width = font_size if ord(char) > 255 else font_size//2
131
+ if current_width + char_width > image_width - 100: # 保留100像素边距
132
  lines.append(''.join(current_line))
133
  current_line = [char]
134
+ current_width = char_width
135
  else:
136
  current_line.append(char)
137
+ current_width += char_width
138
  lines.append(''.join(current_line))
139
 
140
  return r'\N'.join(lines)
141
 
142
  # 创建caption字幕文件(底部显示)
143
+ def create_caption_subtitle_file(project_dir, captions, panel_start_times, panel_durations, image_widths):
144
  try:
145
  subtitle_file = os.path.join(project_dir, "captions.ass")
146
 
147
  with open(subtitle_file, "w", encoding="utf-8") as f:
148
  f.write(ASS_STYLE_HEADER)
149
+ for i, (caption, start, duration, width) in enumerate(zip(captions, panel_start_times, panel_durations, image_widths)):
150
+ wrapped_text = smart_wrap(caption, width)
151
  f.write(
152
  f"Dialogue: 0,{format_time(start)},{format_time(start + duration)},"
153
  f"Caption,,0,0,0,,{wrapped_text}\n"
 
159
  return None
160
 
161
  # 创建speech字幕文件(顶部显示)
162
+ def create_speech_subtitle_file(project_dir, speeches, panel_start_times, panel_durations, image_widths):
163
  try:
164
  subtitle_file = os.path.join(project_dir, "speeches.ass")
165
 
166
  with open(subtitle_file, "w", encoding="utf-8") as f:
167
  f.write(ASS_STYLE_HEADER)
168
+ for i, (speech, start, duration, width) in enumerate(zip(speeches, panel_start_times, panel_durations, image_widths)):
169
+ wrapped_text = smart_wrap(speech, width)
170
  f.write(
171
  f"Dialogue: 0,{format_time(start)},{format_time(start + duration)},"
172
  f"Speech,,0,0,0,,{wrapped_text}\n"
 
273
  logger.error(traceback.format_exc())
274
  return None, {}, [], []
275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  def process_sub_path(path):
277
  """深度处理FFmpeg路径转义"""
278
  # 统一转换为POSIX路径
 
316
  ]
317
  subprocess.run(cmd1, check=True)
318
 
 
 
 
319
  # 构建滤镜链
320
  combined_filter = (
321
+ f"subtitles={process_sub_path(caption_subtitle_file)}",
322
+ f"subtitles={process_sub_path(speech_subtitle_file)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  )
324
  filter_chain = ",".join(combined_filter)
325
  # 优化ffmpeg命令
 
333
  "-movflags", "+faststart",
334
  output_video
335
  ]
 
 
336
  subprocess.run(cmd_combined, check=True)
 
337
  # 清理临时文件
338
  os.remove(temp_video)
339
  return output_video
 
372
  return None
373
 
374
  @app.post("/api/generate-video")
375
+ async def generate_video(comic_data: ComicData):
376
  # 创建唯一项目ID
377
  project_id = str(uuid.uuid4())
378
  # 使用绝对路径创建项目目录
 
384
  try:
385
  # 下载图片
386
  image_paths = []
387
+ image_widths = []
388
  for i, panel_url in enumerate(comic_data.panels):
389
  output_path = os.path.join(project_dir, f"panel_{i}.jpg")
390
+ path_result, img_width = await download_image(panel_url, output_path)
391
+ if path_result:
392
+ image_paths.append(path_result)
393
+ image_widths.append(img_width)
394
+ else:
395
+ image_widths.append(1920) # 失败时使用默认宽度
396
 
397
  if not image_paths:
398
  raise HTTPException(status_code=500, detail="Failed to download images")
 
410
 
411
  # 创建字幕文件 - 分别为caption和speech创建
412
  caption_subtitle_file = create_caption_subtitle_file(
413
+ project_dir, comic_data.captions, panel_start_times, panel_durations, image_widths
414
  )
415
  if not caption_subtitle_file:
416
  raise HTTPException(status_code=500, detail="Failed to create caption subtitle file")
417
 
418
  speech_subtitle_file = create_speech_subtitle_file(
419
+ project_dir, comic_data.speeches, panel_start_times, panel_durations, image_widths
420
  )
421
  if not speech_subtitle_file:
422
  raise HTTPException(status_code=500, detail="Failed to create speech subtitle file")
 
447
  # if img_url:
448
  # image_urls.append(img_url)
449
 
450
+ # 清理临时文件
 
451
  shutil.rmtree(project_dir, ignore_errors=True)
452
 
453
  return {
requirements.txt CHANGED
@@ -4,5 +4,4 @@ aiohttp>=3.8.4
4
  openai>=1.2.0
5
  python-multipart>=0.0.6
6
  pydantic>=1.10.7
7
- pydub
8
- ffmpeg-python
 
4
  openai>=1.2.0
5
  python-multipart>=0.0.6
6
  pydantic>=1.10.7
7
+ pydub