tomo2chin2 commited on
Commit
6b5e7f7
·
verified ·
1 Parent(s): 318578f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -10
app.py CHANGED
@@ -219,10 +219,16 @@ def extract_audio_text_v2(slide: dict, slide_index: int, history: list) -> str:
219
  slide_type = determine_slide_type(slide)
220
 
221
  if slide_type == "title":
222
- return slide.get("title", "")
 
 
 
223
 
224
  elif slide_type == "closing":
225
- return slide.get("notes", "本日の学習は以上です。復習を忘れずに。")
 
 
 
226
 
227
  elif slide_type == "imageText_image_only":
228
  # slide_index 1, 3, 5, 7, 9, 11 → history[0, 1, 2, 3, 4, 5]
@@ -291,8 +297,8 @@ def generate_audio_with_gemini(audio_text: str, gemini_token: str) -> bytes:
291
  "Content-Type": "application/json"
292
  }
293
 
294
- # プロンプトベース制御(歴史用語の正確な読み上げを重視)
295
- prompt_text = f"Pronounce this Japanese text correctly, paying careful attention to historical vocabulary and proper kanji readings: {audio_text}"
296
 
297
  payload = {
298
  "contents": [
@@ -395,13 +401,13 @@ def get_audio_duration(wav_bytes: bytes) -> float:
395
  return duration
396
 
397
 
398
- def speed_up_audio(wav_bytes: bytes, speed_factor: float = 1.25) -> bytes:
399
  """
400
  WAVファイルを指定倍速に変換
401
 
402
  Args:
403
  wav_bytes: 元のWAVバイナリデータ
404
- speed_factor: 倍速係数(1.25 = 1.25倍速)
405
 
406
  Returns:
407
  倍速変換後のWAVバイナリデータ
@@ -520,8 +526,8 @@ def create_video_with_audio_from_slides(
520
  # 音声生成
521
  wav_bytes = generate_audio_with_gemini(audio_text, gemini_token)
522
 
523
- # 1.25倍速処理
524
- wav_bytes = speed_up_audio(wav_bytes, speed_factor=1.25)
525
 
526
  # 音声長さ測定(倍速処理後)
527
  audio_duration = get_audio_duration(wav_bytes)
@@ -687,8 +693,8 @@ def create_video_with_audio_from_slides_v2(
687
  # 音声生成
688
  wav_bytes = generate_audio_with_gemini(audio_text, gemini_token)
689
 
690
- # 1.25倍速処理
691
- wav_bytes = speed_up_audio(wav_bytes, speed_factor=1.25)
692
 
693
  # 音声長さ測定(倍速処理後)
694
  audio_duration = get_audio_duration(wav_bytes)
 
219
  slide_type = determine_slide_type(slide)
220
 
221
  if slide_type == "title":
222
+ title_text = slide.get("title", "")
223
+ # 「語呂羽丸五郎」を読み仮名に変換(正しく読まれないため)
224
+ title_text = title_text.replace("語呂羽丸五郎", "ごろーまるごろう")
225
+ return title_text
226
 
227
  elif slide_type == "closing":
228
+ closing_text = slide.get("notes", "本日の学習は以上です。復習を忘れずに。")
229
+ # 「語呂羽丸五郎」を読み仮名に変換(正しく読まれないため)
230
+ closing_text = closing_text.replace("語呂羽丸五郎", "ごろーまるごろう")
231
+ return closing_text
232
 
233
  elif slide_type == "imageText_image_only":
234
  # slide_index 1, 3, 5, 7, 9, 11 → history[0, 1, 2, 3, 4, 5]
 
297
  "Content-Type": "application/json"
298
  }
299
 
300
+ # プロンプトベース制御(ドラマチック表現と正確な読み上げを重視)
301
+ prompt_text = f"Read this Japanese historical text with dramatic emotional expression and accurate kanji pronunciation, bringing the story to life with vivid intonation: {audio_text}"
302
 
303
  payload = {
304
  "contents": [
 
401
  return duration
402
 
403
 
404
+ def speed_up_audio(wav_bytes: bytes, speed_factor: float = 1.5) -> bytes:
405
  """
406
  WAVファイルを指定倍速に変換
407
 
408
  Args:
409
  wav_bytes: 元のWAVバイナリデータ
410
+ speed_factor: 倍速係数(1.5 = 1.5倍速)
411
 
412
  Returns:
413
  倍速変換後のWAVバイナリデータ
 
526
  # 音声生成
527
  wav_bytes = generate_audio_with_gemini(audio_text, gemini_token)
528
 
529
+ # 1.5倍速処理(50%アップ)
530
+ wav_bytes = speed_up_audio(wav_bytes, speed_factor=1.5)
531
 
532
  # 音声長さ測定(倍速処理後)
533
  audio_duration = get_audio_duration(wav_bytes)
 
693
  # 音声生成
694
  wav_bytes = generate_audio_with_gemini(audio_text, gemini_token)
695
 
696
+ # 1.5倍速処理(50%アップ)
697
+ wav_bytes = speed_up_audio(wav_bytes, speed_factor=1.5)
698
 
699
  # 音声長さ測定(倍速処理後)
700
  audio_duration = get_audio_duration(wav_bytes)