Spaces:
Paused
Paused
Upload app.py
Browse files
app.py
CHANGED
|
@@ -219,10 +219,16 @@ def extract_audio_text_v2(slide: dict, slide_index: int, history: list) -> str:
|
|
| 219 |
slide_type = determine_slide_type(slide)
|
| 220 |
|
| 221 |
if slide_type == "title":
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
elif slide_type == "closing":
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
elif slide_type == "imageText_image_only":
|
| 228 |
# slide_index 1, 3, 5, 7, 9, 11 → history[0, 1, 2, 3, 4, 5]
|
|
@@ -291,8 +297,8 @@ def generate_audio_with_gemini(audio_text: str, gemini_token: str) -> bytes:
|
|
| 291 |
"Content-Type": "application/json"
|
| 292 |
}
|
| 293 |
|
| 294 |
-
# プロンプトベース制御(
|
| 295 |
-
prompt_text = f"
|
| 296 |
|
| 297 |
payload = {
|
| 298 |
"contents": [
|
|
@@ -395,13 +401,13 @@ def get_audio_duration(wav_bytes: bytes) -> float:
|
|
| 395 |
return duration
|
| 396 |
|
| 397 |
|
| 398 |
-
def speed_up_audio(wav_bytes: bytes, speed_factor: float = 1.
|
| 399 |
"""
|
| 400 |
WAVファイルを指定倍速に変換
|
| 401 |
|
| 402 |
Args:
|
| 403 |
wav_bytes: 元のWAVバイナリデータ
|
| 404 |
-
speed_factor: 倍速係数(1.
|
| 405 |
|
| 406 |
Returns:
|
| 407 |
倍速変換後のWAVバイナリデータ
|
|
@@ -520,8 +526,8 @@ def create_video_with_audio_from_slides(
|
|
| 520 |
# 音声生成
|
| 521 |
wav_bytes = generate_audio_with_gemini(audio_text, gemini_token)
|
| 522 |
|
| 523 |
-
# 1.
|
| 524 |
-
wav_bytes = speed_up_audio(wav_bytes, speed_factor=1.
|
| 525 |
|
| 526 |
# 音声長さ測定(倍速処理後)
|
| 527 |
audio_duration = get_audio_duration(wav_bytes)
|
|
@@ -687,8 +693,8 @@ def create_video_with_audio_from_slides_v2(
|
|
| 687 |
# 音声生成
|
| 688 |
wav_bytes = generate_audio_with_gemini(audio_text, gemini_token)
|
| 689 |
|
| 690 |
-
# 1.
|
| 691 |
-
wav_bytes = speed_up_audio(wav_bytes, speed_factor=1.
|
| 692 |
|
| 693 |
# 音声長さ測定(倍速処理後)
|
| 694 |
audio_duration = get_audio_duration(wav_bytes)
|
|
|
|
| 219 |
slide_type = determine_slide_type(slide)
|
| 220 |
|
| 221 |
if slide_type == "title":
|
| 222 |
+
title_text = slide.get("title", "")
|
| 223 |
+
# 「語呂羽丸五郎」を読み仮名に変換(正しく読まれないため)
|
| 224 |
+
title_text = title_text.replace("語呂羽丸五郎", "ごろーまるごろう")
|
| 225 |
+
return title_text
|
| 226 |
|
| 227 |
elif slide_type == "closing":
|
| 228 |
+
closing_text = slide.get("notes", "本日の学習は以上です。復習を忘れずに。")
|
| 229 |
+
# 「語呂羽丸五郎」を読み仮名に変換(正しく読まれないため)
|
| 230 |
+
closing_text = closing_text.replace("語呂羽丸五郎", "ごろーまるごろう")
|
| 231 |
+
return closing_text
|
| 232 |
|
| 233 |
elif slide_type == "imageText_image_only":
|
| 234 |
# slide_index 1, 3, 5, 7, 9, 11 → history[0, 1, 2, 3, 4, 5]
|
|
|
|
| 297 |
"Content-Type": "application/json"
|
| 298 |
}
|
| 299 |
|
| 300 |
+
# プロンプトベース制御(ドラマチック表現と正確な読み上げを重視)
|
| 301 |
+
prompt_text = f"Read this Japanese historical text with dramatic emotional expression and accurate kanji pronunciation, bringing the story to life with vivid intonation: {audio_text}"
|
| 302 |
|
| 303 |
payload = {
|
| 304 |
"contents": [
|
|
|
|
| 401 |
return duration
|
| 402 |
|
| 403 |
|
| 404 |
+
def speed_up_audio(wav_bytes: bytes, speed_factor: float = 1.5) -> bytes:
|
| 405 |
"""
|
| 406 |
WAVファイルを指定倍速に変換
|
| 407 |
|
| 408 |
Args:
|
| 409 |
wav_bytes: 元のWAVバイナリデータ
|
| 410 |
+
speed_factor: 倍速係数(1.5 = 1.5倍速)
|
| 411 |
|
| 412 |
Returns:
|
| 413 |
倍速変換後のWAVバイナリデータ
|
|
|
|
| 526 |
# 音声生成
|
| 527 |
wav_bytes = generate_audio_with_gemini(audio_text, gemini_token)
|
| 528 |
|
| 529 |
+
# 1.5倍速処理(50%アップ)
|
| 530 |
+
wav_bytes = speed_up_audio(wav_bytes, speed_factor=1.5)
|
| 531 |
|
| 532 |
# 音声長さ測定(倍速処理後)
|
| 533 |
audio_duration = get_audio_duration(wav_bytes)
|
|
|
|
| 693 |
# 音声生成
|
| 694 |
wav_bytes = generate_audio_with_gemini(audio_text, gemini_token)
|
| 695 |
|
| 696 |
+
# 1.5倍速処理(50%アップ)
|
| 697 |
+
wav_bytes = speed_up_audio(wav_bytes, speed_factor=1.5)
|
| 698 |
|
| 699 |
# 音声長さ測定(倍速処理後)
|
| 700 |
audio_duration = get_audio_duration(wav_bytes)
|