Spaces:

tomo2chin2
/

PDF_SlideShow

Paused

App Files Files Community

tomo2chin2 commited on Oct 11, 2025

Commit

6b5e7f7

verified ·

1 Parent(s): 318578f

Upload app.py

Browse files

Files changed (1) hide show

app.py +16 -10

app.py CHANGED Viewed

@@ -219,10 +219,16 @@ def extract_audio_text_v2(slide: dict, slide_index: int, history: list) -> str:
     slide_type = determine_slide_type(slide)
     if slide_type == "title":
-        return slide.get("title", "")
     elif slide_type == "closing":
-        return slide.get("notes", "本日の学習は以上です。復習を忘れずに。")
     elif slide_type == "imageText_image_only":
         # slide_index 1, 3, 5, 7, 9, 11 → history[0, 1, 2, 3, 4, 5]
@@ -291,8 +297,8 @@ def generate_audio_with_gemini(audio_text: str, gemini_token: str) -> bytes:
         "Content-Type": "application/json"
     }
-    # プロンプトベース制御（歴史用語の正確な読み上げを重視）
-    prompt_text = f"Pronounce this Japanese text correctly, paying careful attention to historical vocabulary and proper kanji readings: {audio_text}"
     payload = {
         "contents": [
@@ -395,13 +401,13 @@ def get_audio_duration(wav_bytes: bytes) -> float:
     return duration
-def speed_up_audio(wav_bytes: bytes, speed_factor: float = 1.25) -> bytes:
     """
     WAVファイルを指定倍速に変換
     Args:
         wav_bytes: 元のWAVバイナリデータ
-        speed_factor: 倍速係数（1.25 = 1.25倍速）
     Returns:
         倍速変換後のWAVバイナリデータ
@@ -520,8 +526,8 @@ def create_video_with_audio_from_slides(
             # 音声生成
             wav_bytes = generate_audio_with_gemini(audio_text, gemini_token)
-            # 1.25倍速処理
-            wav_bytes = speed_up_audio(wav_bytes, speed_factor=1.25)
             # 音声長さ測定（倍速処理後）
             audio_duration = get_audio_duration(wav_bytes)
@@ -687,8 +693,8 @@ def create_video_with_audio_from_slides_v2(
             # 音声生成
             wav_bytes = generate_audio_with_gemini(audio_text, gemini_token)
-            # 1.25倍速処理
-            wav_bytes = speed_up_audio(wav_bytes, speed_factor=1.25)
             # 音声長さ測定（倍速処理後）
             audio_duration = get_audio_duration(wav_bytes)

     slide_type = determine_slide_type(slide)
     if slide_type == "title":
+        title_text = slide.get("title", "")
+        # 「語呂羽丸五郎」を読み仮名に変換（正しく読まれないため）
+        title_text = title_text.replace("語呂羽丸五郎", "ごろーまるごろう")
+        return title_text
     elif slide_type == "closing":
+        closing_text = slide.get("notes", "本日の学習は以上です。復習を忘れずに。")
+        # 「語呂羽丸五郎」を読み仮名に変換（正しく読まれないため）
+        closing_text = closing_text.replace("語呂羽丸五郎", "ごろーまるごろう")
+        return closing_text
     elif slide_type == "imageText_image_only":
         # slide_index 1, 3, 5, 7, 9, 11 → history[0, 1, 2, 3, 4, 5]
         "Content-Type": "application/json"
     }
+    # プロンプトベース制御（ドラマチック表現と正確な読み上げを重視）
+    prompt_text = f"Read this Japanese historical text with dramatic emotional expression and accurate kanji pronunciation, bringing the story to life with vivid intonation: {audio_text}"
     payload = {
         "contents": [
     return duration
+def speed_up_audio(wav_bytes: bytes, speed_factor: float = 1.5) -> bytes:
     """
     WAVファイルを指定倍速に変換
     Args:
         wav_bytes: 元のWAVバイナリデータ
+        speed_factor: 倍速係数（1.5 = 1.5倍速）
     Returns:
         倍速変換後のWAVバイナリデータ
             # 音声生成
             wav_bytes = generate_audio_with_gemini(audio_text, gemini_token)
+            # 1.5倍速処理（50%アップ）
+            wav_bytes = speed_up_audio(wav_bytes, speed_factor=1.5)
             # 音声長さ測定（倍速処理後）
             audio_duration = get_audio_duration(wav_bytes)
             # 音声生成
             wav_bytes = generate_audio_with_gemini(audio_text, gemini_token)
+            # 1.5倍速処理（50%アップ）
+            wav_bytes = speed_up_audio(wav_bytes, speed_factor=1.5)
             # 音声長さ測定（倍速処理後）
             audio_duration = get_audio_duration(wav_bytes)