Spaces:
Paused
Paused
Upload app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ import gradio as gr
|
|
| 7 |
from fastapi import FastAPI, HTTPException
|
| 8 |
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
from pydantic import BaseModel, HttpUrl
|
| 10 |
-
from typing import Optional, Union
|
| 11 |
import requests
|
| 12 |
import tempfile
|
| 13 |
import os
|
|
@@ -16,7 +16,6 @@ import numpy as np
|
|
| 16 |
from datetime import datetime
|
| 17 |
import uuid
|
| 18 |
from pathlib import Path
|
| 19 |
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 20 |
|
| 21 |
# 画像・動画処理ライブラリ
|
| 22 |
from pdf2image import convert_from_path
|
|
@@ -31,11 +30,6 @@ from huggingface_hub import HfApi, login
|
|
| 31 |
logging.basicConfig(level=logging.INFO)
|
| 32 |
logger = logging.getLogger(__name__)
|
| 33 |
|
| 34 |
-
MAX_EDUCATION_TTS_WORKERS = max(
|
| 35 |
-
1,
|
| 36 |
-
int(os.getenv("EDUCATION_TTS_MAX_WORKERS", "3")),
|
| 37 |
-
)
|
| 38 |
-
|
| 39 |
# ==============================
|
| 40 |
# リクエスト/レスポンスモデル
|
| 41 |
# ==============================
|
|
@@ -80,29 +74,6 @@ class AudioVideoResponse(BaseModel):
|
|
| 80 |
total_slides: Optional[int] = None
|
| 81 |
video_duration: Optional[float] = None
|
| 82 |
|
| 83 |
-
# ==============================
|
| 84 |
-
# 賢杉賢太郎連携バージョン - 追加モデル
|
| 85 |
-
# ==============================
|
| 86 |
-
|
| 87 |
-
class EducationNotesItem(BaseModel):
|
| 88 |
-
"""賢杉賢太郎: notes配列要素"""
|
| 89 |
-
slide_index: Optional[int] = None
|
| 90 |
-
text: str
|
| 91 |
-
speaking_rate: Optional[float] = 1.25
|
| 92 |
-
padding_seconds: Optional[float] = None
|
| 93 |
-
|
| 94 |
-
class EducationPlaybackPolicy(BaseModel):
|
| 95 |
-
"""賢杉賢太郎: 再生ポリシー"""
|
| 96 |
-
match_audio_length: bool = True
|
| 97 |
-
fallback_seconds_per_slide: float = 6.0
|
| 98 |
-
padding_seconds: float = 0.6
|
| 99 |
-
|
| 100 |
-
class EducationVideoRequest(BaseModel):
|
| 101 |
-
"""賢杉賢太郎連携バージョン - notesをそのまま動画化"""
|
| 102 |
-
pdf_url: str
|
| 103 |
-
notes: Union[str, List[Union[str, Dict]]]
|
| 104 |
-
playback_policy: Optional[EducationPlaybackPolicy] = None
|
| 105 |
-
|
| 106 |
# ==============================
|
| 107 |
# URL前処理ユーティリティ
|
| 108 |
# ==============================
|
|
@@ -286,68 +257,6 @@ def extract_audio_text_v2(slide: dict, slide_index: int, history: list) -> str:
|
|
| 286 |
return ""
|
| 287 |
|
| 288 |
|
| 289 |
-
def normalize_notes_payload(notes_payload: Union[str, List[Union[str, Dict]]]) -> List[dict]:
|
| 290 |
-
"""
|
| 291 |
-
賢杉賢太郎用notesペイロードを正規化
|
| 292 |
-
|
| 293 |
-
Args:
|
| 294 |
-
notes_payload: list もしくは JSON文字列
|
| 295 |
-
|
| 296 |
-
Returns:
|
| 297 |
-
list[dict]: slide_index / text / speaking_rate / padding_seconds を含む辞書配列
|
| 298 |
-
"""
|
| 299 |
-
import json
|
| 300 |
-
|
| 301 |
-
if isinstance(notes_payload, str):
|
| 302 |
-
try:
|
| 303 |
-
raw_notes = json.loads(notes_payload)
|
| 304 |
-
except json.JSONDecodeError as exc:
|
| 305 |
-
raise ValueError(f"notesのJSON解析に失敗しました: {exc}")
|
| 306 |
-
else:
|
| 307 |
-
raw_notes = notes_payload or []
|
| 308 |
-
|
| 309 |
-
normalized: List[dict] = []
|
| 310 |
-
|
| 311 |
-
for idx, item in enumerate(raw_notes):
|
| 312 |
-
if isinstance(item, dict):
|
| 313 |
-
slide_index = item.get("slide_index", idx)
|
| 314 |
-
text = str(item.get("text", "")).strip()
|
| 315 |
-
speaking_rate = item.get("speaking_rate", 1.25)
|
| 316 |
-
padding = item.get("padding_seconds")
|
| 317 |
-
else:
|
| 318 |
-
slide_index = idx
|
| 319 |
-
text = str(item).strip()
|
| 320 |
-
speaking_rate = 1.25
|
| 321 |
-
padding = None
|
| 322 |
-
|
| 323 |
-
try:
|
| 324 |
-
slide_index = int(slide_index)
|
| 325 |
-
except (TypeError, ValueError):
|
| 326 |
-
slide_index = idx
|
| 327 |
-
|
| 328 |
-
try:
|
| 329 |
-
speaking_rate = float(speaking_rate) if speaking_rate is not None else 1.0
|
| 330 |
-
except (TypeError, ValueError):
|
| 331 |
-
speaking_rate = 1.0
|
| 332 |
-
if speaking_rate <= 0:
|
| 333 |
-
speaking_rate = 1.0
|
| 334 |
-
|
| 335 |
-
if padding is not None:
|
| 336 |
-
try:
|
| 337 |
-
padding = float(padding)
|
| 338 |
-
except (TypeError, ValueError):
|
| 339 |
-
padding = None
|
| 340 |
-
|
| 341 |
-
normalized.append({
|
| 342 |
-
"slide_index": slide_index,
|
| 343 |
-
"text": text,
|
| 344 |
-
"speaking_rate": speaking_rate,
|
| 345 |
-
"padding_seconds": padding
|
| 346 |
-
})
|
| 347 |
-
|
| 348 |
-
return normalized
|
| 349 |
-
|
| 350 |
-
|
| 351 |
def convert_pil_to_array(pil_image: Image.Image, target_size: tuple) -> np.ndarray:
|
| 352 |
"""
|
| 353 |
PIL ImageをNumPy配列に変換し、指定サイズにリサイズ
|
|
@@ -375,28 +284,20 @@ def convert_pil_to_array(pil_image: Image.Image, target_size: tuple) -> np.ndarr
|
|
| 375 |
# V2.0: Gemini TTS音声生成
|
| 376 |
# ==============================
|
| 377 |
|
| 378 |
-
def generate_audio_with_gemini(
|
| 379 |
-
audio_text: str,
|
| 380 |
-
gemini_token: str,
|
| 381 |
-
model: str = "gemini-2.5-pro-preview-tts",
|
| 382 |
-
) -> bytes:
|
| 383 |
"""
|
| 384 |
Gemini REST APIでテキストから音声を生成
|
| 385 |
|
| 386 |
Args:
|
| 387 |
audio_text: 読み上げるテキスト
|
| 388 |
gemini_token: GEMINI_TOKEN環境変数
|
| 389 |
-
model: 利用するGemini TTSモデルID
|
| 390 |
|
| 391 |
Returns:
|
| 392 |
WAVバイナリデータ(24kHz PCM16)
|
| 393 |
"""
|
| 394 |
import base64
|
| 395 |
|
| 396 |
-
url =
|
| 397 |
-
"https://generativelanguage.googleapis.com/v1beta/models/"
|
| 398 |
-
f"{model}:generateContent?key={gemini_token}"
|
| 399 |
-
)
|
| 400 |
|
| 401 |
headers = {
|
| 402 |
"Content-Type": "application/json"
|
|
@@ -428,10 +329,10 @@ def generate_audio_with_gemini(
|
|
| 428 |
}
|
| 429 |
}
|
| 430 |
|
| 431 |
-
logger.info(f"Gemini TTS API呼び出し: {len(audio_text)}文字
|
| 432 |
logger.info(f"Payload: {payload}")
|
| 433 |
|
| 434 |
-
response = requests.post(url, json=payload, headers=headers, timeout=
|
| 435 |
|
| 436 |
# エラーレスポンスの詳細をログ出力
|
| 437 |
if response.status_code != 200:
|
|
@@ -946,254 +847,6 @@ def create_video_with_audio_from_slides_v2(
|
|
| 946 |
except Exception as e:
|
| 947 |
logger.warning(f"動画ファイル削除エラー: {e}")
|
| 948 |
|
| 949 |
-
|
| 950 |
-
def create_video_with_notes(
|
| 951 |
-
pdf_url: str,
|
| 952 |
-
notes_payload: Union[str, List[Union[str, Dict]]],
|
| 953 |
-
gemini_token: str,
|
| 954 |
-
playback_policy: Optional[dict] = None,
|
| 955 |
-
progress_callback=None
|
| 956 |
-
) -> tuple:
|
| 957 |
-
"""
|
| 958 |
-
賢杉賢太郎連携バージョン:
|
| 959 |
-
notesフィールド(スピーカーノート)から音声付き動画を生成する。
|
| 960 |
-
|
| 961 |
-
Args:
|
| 962 |
-
pdf_url: GASが生成したPDFのURL
|
| 963 |
-
notes_payload: notes配列(list or JSON string)
|
| 964 |
-
gemini_token: Gemini TTS用トークン
|
| 965 |
-
playback_policy: 再生ポリシー辞書
|
| 966 |
-
progress_callback: Gradio用進捗更新
|
| 967 |
-
|
| 968 |
-
Returns:
|
| 969 |
-
tuple: (video_url, page2_image_url, audio_info_list, total_slides, total_duration)
|
| 970 |
-
"""
|
| 971 |
-
pdf_path = None
|
| 972 |
-
audio_files: List[str] = []
|
| 973 |
-
video_path = None
|
| 974 |
-
page2_image_path = None
|
| 975 |
-
clips = []
|
| 976 |
-
audio_info_list = []
|
| 977 |
-
total_duration = 0.0
|
| 978 |
-
|
| 979 |
-
policy = playback_policy or {}
|
| 980 |
-
match_audio = bool(policy.get("match_audio_length", True))
|
| 981 |
-
fallback_seconds = policy.get("fallback_seconds_per_slide", 6.0)
|
| 982 |
-
if fallback_seconds is None or fallback_seconds <= 0:
|
| 983 |
-
fallback_seconds = 6.0
|
| 984 |
-
padding_default = policy.get("padding_seconds", 0.6)
|
| 985 |
-
if padding_default is None or padding_default < 0:
|
| 986 |
-
padding_default = 0.6
|
| 987 |
-
|
| 988 |
-
try:
|
| 989 |
-
normalized_notes = normalize_notes_payload(notes_payload)
|
| 990 |
-
notes_map = {entry["slide_index"]: entry for entry in normalized_notes}
|
| 991 |
-
|
| 992 |
-
if progress_callback:
|
| 993 |
-
progress_callback(0.05, desc="PDFダウンロード中...")
|
| 994 |
-
|
| 995 |
-
pdf_path = download_pdf_from_url(sanitize_url(pdf_url))
|
| 996 |
-
|
| 997 |
-
if progress_callback:
|
| 998 |
-
progress_callback(0.1, desc="PDF→画像変換中...")
|
| 999 |
-
|
| 1000 |
-
images = convert_pdf_to_images(pdf_path, dpi=150)
|
| 1001 |
-
total_slides = len(images)
|
| 1002 |
-
|
| 1003 |
-
if total_slides == 0:
|
| 1004 |
-
raise Exception("PDFにページが含まれていません")
|
| 1005 |
-
|
| 1006 |
-
note_entries: List[Dict] = []
|
| 1007 |
-
text_map: Dict[int, str] = {}
|
| 1008 |
-
tts_results: Dict[int, Optional[bytes]] = {}
|
| 1009 |
-
|
| 1010 |
-
for idx in range(total_slides):
|
| 1011 |
-
note_entry = notes_map.get(idx, {
|
| 1012 |
-
"slide_index": idx,
|
| 1013 |
-
"text": "",
|
| 1014 |
-
"speaking_rate": 1.0,
|
| 1015 |
-
"padding_seconds": None
|
| 1016 |
-
})
|
| 1017 |
-
note_entries.append(note_entry)
|
| 1018 |
-
text = str(note_entry.get("text", "")).strip()
|
| 1019 |
-
text_map[idx] = text
|
| 1020 |
-
|
| 1021 |
-
total_audio_jobs = sum(1 for text in text_map.values() if text)
|
| 1022 |
-
|
| 1023 |
-
if progress_callback:
|
| 1024 |
-
progress_callback(0.1, desc="音声生成ジョブ準備中...")
|
| 1025 |
-
|
| 1026 |
-
if total_audio_jobs > 0:
|
| 1027 |
-
max_workers = min(MAX_EDUCATION_TTS_WORKERS, total_audio_jobs)
|
| 1028 |
-
futures = {}
|
| 1029 |
-
completed_jobs = 0
|
| 1030 |
-
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 1031 |
-
for idx, text in text_map.items():
|
| 1032 |
-
if not text:
|
| 1033 |
-
tts_results[idx] = None
|
| 1034 |
-
continue
|
| 1035 |
-
futures[executor.submit(
|
| 1036 |
-
generate_audio_with_gemini,
|
| 1037 |
-
text,
|
| 1038 |
-
gemini_token,
|
| 1039 |
-
model="gemini-2.5-flash-preview-tts",
|
| 1040 |
-
)] = idx
|
| 1041 |
-
|
| 1042 |
-
for future in as_completed(futures):
|
| 1043 |
-
idx = futures[future]
|
| 1044 |
-
try:
|
| 1045 |
-
wav_bytes = future.result()
|
| 1046 |
-
except Exception as exc:
|
| 1047 |
-
logger.error(f"Gemini TTS生成失敗 (slide={idx}): {exc}")
|
| 1048 |
-
raise
|
| 1049 |
-
tts_results[idx] = wav_bytes
|
| 1050 |
-
completed_jobs += 1
|
| 1051 |
-
if progress_callback:
|
| 1052 |
-
progress = 0.1 + (completed_jobs / total_audio_jobs) * 0.4
|
| 1053 |
-
progress_callback(
|
| 1054 |
-
min(progress, 0.5),
|
| 1055 |
-
desc=f"音声生成中 ({completed_jobs}/{total_audio_jobs})"
|
| 1056 |
-
)
|
| 1057 |
-
else:
|
| 1058 |
-
if progress_callback:
|
| 1059 |
-
progress_callback(0.5, desc="音声生成スキップ(テキストなし)")
|
| 1060 |
-
|
| 1061 |
-
for idx, pil_image in enumerate(images):
|
| 1062 |
-
note_entry = note_entries[idx]
|
| 1063 |
-
text = text_map[idx]
|
| 1064 |
-
speaking_rate = note_entry.get("speaking_rate", 1.25) or 1.0
|
| 1065 |
-
if speaking_rate <= 0:
|
| 1066 |
-
speaking_rate = 1.0
|
| 1067 |
-
padding_seconds = note_entry.get("padding_seconds")
|
| 1068 |
-
if padding_seconds is None or padding_seconds < 0:
|
| 1069 |
-
padding_seconds = padding_default
|
| 1070 |
-
|
| 1071 |
-
audio_duration = 0.0
|
| 1072 |
-
slide_duration = fallback_seconds
|
| 1073 |
-
audio_url = None
|
| 1074 |
-
audio_path = None
|
| 1075 |
-
|
| 1076 |
-
if text:
|
| 1077 |
-
wav_bytes = tts_results.get(idx)
|
| 1078 |
-
if wav_bytes is None:
|
| 1079 |
-
raise RuntimeError(f"TTS音声が取得できませんでした (slide_index={idx})")
|
| 1080 |
-
|
| 1081 |
-
if speaking_rate and abs(speaking_rate - 1.0) > 0.01:
|
| 1082 |
-
wav_bytes = speed_up_audio(wav_bytes, speed_factor=speaking_rate)
|
| 1083 |
-
|
| 1084 |
-
audio_duration = get_audio_duration(wav_bytes)
|
| 1085 |
-
if match_audio:
|
| 1086 |
-
slide_duration = max(audio_duration + padding_seconds, fallback_seconds)
|
| 1087 |
-
else:
|
| 1088 |
-
slide_duration = fallback_seconds
|
| 1089 |
-
|
| 1090 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_audio:
|
| 1091 |
-
tmp_audio.write(wav_bytes)
|
| 1092 |
-
audio_path = tmp_audio.name
|
| 1093 |
-
audio_files.append(audio_path)
|
| 1094 |
-
|
| 1095 |
-
audio_url = save_audio_to_hf(wav_bytes, prefix=f"education_slide_{idx:02d}")
|
| 1096 |
-
|
| 1097 |
-
else:
|
| 1098 |
-
slide_duration = fallback_seconds
|
| 1099 |
-
|
| 1100 |
-
if progress_callback and total_slides:
|
| 1101 |
-
progress = 0.5 + ((idx + 1) / total_slides) * 0.2
|
| 1102 |
-
progress_callback(
|
| 1103 |
-
min(progress, 0.7),
|
| 1104 |
-
desc=f"動画クリップ生成中 ({idx + 1}/{total_slides})"
|
| 1105 |
-
)
|
| 1106 |
-
|
| 1107 |
-
img_array = convert_pil_to_array(pil_image, target_size=(1280, 720))
|
| 1108 |
-
img_clip = ImageClip(img_array, duration=slide_duration)
|
| 1109 |
-
|
| 1110 |
-
if audio_path:
|
| 1111 |
-
audio_clip = AudioFileClip(audio_path)
|
| 1112 |
-
img_clip = img_clip.set_audio(audio_clip)
|
| 1113 |
-
|
| 1114 |
-
clips.append(img_clip)
|
| 1115 |
-
|
| 1116 |
-
audio_info_list.append({
|
| 1117 |
-
"slide_index": idx,
|
| 1118 |
-
"slide_type": "notes",
|
| 1119 |
-
"audio_url": audio_url,
|
| 1120 |
-
"duration": audio_duration,
|
| 1121 |
-
"text": text,
|
| 1122 |
-
"speaking_rate": speaking_rate,
|
| 1123 |
-
"playback_duration": slide_duration
|
| 1124 |
-
})
|
| 1125 |
-
|
| 1126 |
-
total_duration += slide_duration
|
| 1127 |
-
|
| 1128 |
-
if not clips:
|
| 1129 |
-
raise Exception("動画クリップが生成されませんでした(notesに有効なテキストがありません)")
|
| 1130 |
-
|
| 1131 |
-
if progress_callback:
|
| 1132 |
-
progress_callback(0.7, desc="動画をレンダリング中...")
|
| 1133 |
-
|
| 1134 |
-
final_video = concatenate_videoclips(clips, method="compose")
|
| 1135 |
-
tmp_video = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
|
| 1136 |
-
video_path = tmp_video.name
|
| 1137 |
-
tmp_video.close()
|
| 1138 |
-
|
| 1139 |
-
final_video.write_videofile(
|
| 1140 |
-
video_path,
|
| 1141 |
-
fps=24,
|
| 1142 |
-
codec="libx264",
|
| 1143 |
-
audio_codec="aac",
|
| 1144 |
-
temp_audiofile=os.path.join(tempfile.gettempdir(), f"temp_audio_{uuid.uuid4().hex}.m4a"),
|
| 1145 |
-
remove_temp=True,
|
| 1146 |
-
verbose=False,
|
| 1147 |
-
logger=None
|
| 1148 |
-
)
|
| 1149 |
-
final_video.close()
|
| 1150 |
-
|
| 1151 |
-
for clip in clips:
|
| 1152 |
-
clip.close()
|
| 1153 |
-
|
| 1154 |
-
if progress_callback:
|
| 1155 |
-
progress_callback(0.85, desc="動画をアップロード中...")
|
| 1156 |
-
|
| 1157 |
-
video_url = video_uploader.upload_video(video_path, prefix="education_video")
|
| 1158 |
-
|
| 1159 |
-
page2_image_url = None
|
| 1160 |
-
if total_slides >= 2:
|
| 1161 |
-
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_img:
|
| 1162 |
-
page2_image_path = tmp_img.name
|
| 1163 |
-
images[1].save(page2_image_path, format="JPEG", quality=90)
|
| 1164 |
-
page2_image_url = video_uploader.upload_image(page2_image_path, prefix="education_page2")
|
| 1165 |
-
|
| 1166 |
-
if progress_callback:
|
| 1167 |
-
progress_callback(1.0, desc="完了!")
|
| 1168 |
-
|
| 1169 |
-
return (video_url, page2_image_url, audio_info_list, total_slides, total_duration)
|
| 1170 |
-
|
| 1171 |
-
finally:
|
| 1172 |
-
for audio_file in audio_files:
|
| 1173 |
-
if os.path.exists(audio_file):
|
| 1174 |
-
try:
|
| 1175 |
-
os.remove(audio_file)
|
| 1176 |
-
except Exception as e:
|
| 1177 |
-
logger.warning(f"音声ファイル削除エラー: {e}")
|
| 1178 |
-
|
| 1179 |
-
if video_path and os.path.exists(video_path):
|
| 1180 |
-
try:
|
| 1181 |
-
os.remove(video_path)
|
| 1182 |
-
except Exception as e:
|
| 1183 |
-
logger.warning(f"動画ファイル削除エラー: {e}")
|
| 1184 |
-
|
| 1185 |
-
if page2_image_path and os.path.exists(page2_image_path):
|
| 1186 |
-
try:
|
| 1187 |
-
os.remove(page2_image_path)
|
| 1188 |
-
except Exception as e:
|
| 1189 |
-
logger.warning(f"画像ファイル削除エラー: {e}")
|
| 1190 |
-
|
| 1191 |
-
if pdf_path and os.path.exists(pdf_path):
|
| 1192 |
-
try:
|
| 1193 |
-
os.remove(pdf_path)
|
| 1194 |
-
except Exception as e:
|
| 1195 |
-
logger.warning(f"PDFファイル削除エラー: {e}")
|
| 1196 |
-
|
| 1197 |
# ==============================
|
| 1198 |
# コア機能実装
|
| 1199 |
# ==============================
|
|
@@ -1616,60 +1269,6 @@ async def slidedata_to_video(request: SlideDataToVideoRequest):
|
|
| 1616 |
detail=f"動画生成に失敗しました: {str(e)}"
|
| 1617 |
)
|
| 1618 |
|
| 1619 |
-
|
| 1620 |
-
@app.post(
|
| 1621 |
-
"/api/education/notes-to-video",
|
| 1622 |
-
response_model=AudioVideoResponse,
|
| 1623 |
-
tags=["Video Generation", "Education"],
|
| 1624 |
-
summary="賢杉賢太郎: notes配列から音声付き動画を生成",
|
| 1625 |
-
description="賢杉賢太郎連携バージョン。GASが返すPDF URLとnotes配列を渡すと、音声付き動画を生成してアップロードします。"
|
| 1626 |
-
)
|
| 1627 |
-
async def education_notes_to_video(request: EducationVideoRequest):
|
| 1628 |
-
"""賢杉賢太郎連携バージョン: notesフィールドを活用した動画生成エンドポイント"""
|
| 1629 |
-
gemini_token = os.environ.get("GEMINI_TOKEN")
|
| 1630 |
-
if not gemini_token:
|
| 1631 |
-
raise HTTPException(
|
| 1632 |
-
status_code=500,
|
| 1633 |
-
detail="GEMINI_TOKEN環境変数が設定されていません"
|
| 1634 |
-
)
|
| 1635 |
-
|
| 1636 |
-
try:
|
| 1637 |
-
logger.info("賢杉賢太郎向けAPIリクエスト受信")
|
| 1638 |
-
playback_policy = request.playback_policy.dict() if request.playback_policy else {}
|
| 1639 |
-
|
| 1640 |
-
(
|
| 1641 |
-
video_url,
|
| 1642 |
-
page2_image_url,
|
| 1643 |
-
audio_info_list,
|
| 1644 |
-
total_slides,
|
| 1645 |
-
total_duration
|
| 1646 |
-
) = create_video_with_notes(
|
| 1647 |
-
pdf_url=request.pdf_url,
|
| 1648 |
-
notes_payload=request.notes,
|
| 1649 |
-
gemini_token=gemini_token,
|
| 1650 |
-
playback_policy=playback_policy
|
| 1651 |
-
)
|
| 1652 |
-
|
| 1653 |
-
logger.info(f"賢杉賢太郎向け動画生成完了: {video_url}")
|
| 1654 |
-
|
| 1655 |
-
return AudioVideoResponse(
|
| 1656 |
-
status="success",
|
| 1657 |
-
video_url=video_url,
|
| 1658 |
-
page2_image_url=page2_image_url,
|
| 1659 |
-
audio_urls=audio_info_list,
|
| 1660 |
-
message="賢杉賢太郎用の音声付き動画の生成とアップロードに成功しました",
|
| 1661 |
-
total_slides=total_slides,
|
| 1662 |
-
video_duration=total_duration
|
| 1663 |
-
)
|
| 1664 |
-
|
| 1665 |
-
except HTTPException:
|
| 1666 |
-
raise
|
| 1667 |
-
except Exception as e:
|
| 1668 |
-
logger.error(f"賢杉賢太郎向け動画生成エラー: {e}", exc_info=True)
|
| 1669 |
-
raise HTTPException(
|
| 1670 |
-
status_code=500,
|
| 1671 |
-
detail=f"賢杉賢太郎向け動画生成に失敗しました: {str(e)}"
|
| 1672 |
-
)
|
| 1673 |
@app.get("/health")
|
| 1674 |
async def health_check():
|
| 1675 |
"""ヘルスチェックエンドポイント"""
|
|
|
|
| 7 |
from fastapi import FastAPI, HTTPException
|
| 8 |
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
from pydantic import BaseModel, HttpUrl
|
| 10 |
+
from typing import Optional, Union
|
| 11 |
import requests
|
| 12 |
import tempfile
|
| 13 |
import os
|
|
|
|
| 16 |
from datetime import datetime
|
| 17 |
import uuid
|
| 18 |
from pathlib import Path
|
|
|
|
| 19 |
|
| 20 |
# 画像・動画処理ライブラリ
|
| 21 |
from pdf2image import convert_from_path
|
|
|
|
| 30 |
logging.basicConfig(level=logging.INFO)
|
| 31 |
logger = logging.getLogger(__name__)
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
# ==============================
|
| 34 |
# リクエスト/レスポンスモデル
|
| 35 |
# ==============================
|
|
|
|
| 74 |
total_slides: Optional[int] = None
|
| 75 |
video_duration: Optional[float] = None
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
# ==============================
|
| 78 |
# URL前処理ユーティリティ
|
| 79 |
# ==============================
|
|
|
|
| 257 |
return ""
|
| 258 |
|
| 259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
def convert_pil_to_array(pil_image: Image.Image, target_size: tuple) -> np.ndarray:
|
| 261 |
"""
|
| 262 |
PIL ImageをNumPy配列に変換し、指定サイズにリサイズ
|
|
|
|
| 284 |
# V2.0: Gemini TTS音声生成
|
| 285 |
# ==============================
|
| 286 |
|
| 287 |
+
def generate_audio_with_gemini(audio_text: str, gemini_token: str) -> bytes:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
"""
|
| 289 |
Gemini REST APIでテキストから音声を生成
|
| 290 |
|
| 291 |
Args:
|
| 292 |
audio_text: 読み上げるテキスト
|
| 293 |
gemini_token: GEMINI_TOKEN環境変数
|
|
|
|
| 294 |
|
| 295 |
Returns:
|
| 296 |
WAVバイナリデータ(24kHz PCM16)
|
| 297 |
"""
|
| 298 |
import base64
|
| 299 |
|
| 300 |
+
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro-preview-tts:generateContent?key={gemini_token}"
|
|
|
|
|
|
|
|
|
|
| 301 |
|
| 302 |
headers = {
|
| 303 |
"Content-Type": "application/json"
|
|
|
|
| 329 |
}
|
| 330 |
}
|
| 331 |
|
| 332 |
+
logger.info(f"Gemini TTS API呼び出し: {len(audio_text)}文字")
|
| 333 |
logger.info(f"Payload: {payload}")
|
| 334 |
|
| 335 |
+
response = requests.post(url, json=payload, headers=headers, timeout=60)
|
| 336 |
|
| 337 |
# エラーレスポンスの詳細をログ出力
|
| 338 |
if response.status_code != 200:
|
|
|
|
| 847 |
except Exception as e:
|
| 848 |
logger.warning(f"動画ファイル削除エラー: {e}")
|
| 849 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 850 |
# ==============================
|
| 851 |
# コア機能実装
|
| 852 |
# ==============================
|
|
|
|
| 1269 |
detail=f"動画生成に失敗しました: {str(e)}"
|
| 1270 |
)
|
| 1271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1272 |
@app.get("/health")
|
| 1273 |
async def health_check():
|
| 1274 |
"""ヘルスチェックエンドポイント"""
|