Corin1998's picture
Update app.py
5f6c50f verified
import os
import re
import json
import uuid
import shutil
import pathlib
import gradio as gr
from PIL import Image
from generator import generate_marketing_assets
from tts_subtitles import synthesize_tts_openai, transcribe_to_srt_openai
from video import (
compose_video_with_subtitles,
make_background_image,
find_japanese_font_path,
rasterize_svg_to_png,
)
from doc_ingest import summarize_documents_and_urls
DEFAULT_MODEL = os.getenv("OPENAI_LLM_MODEL", "gpt-4o-mini")
DEFAULT_TTS_MODEL = os.getenv("OPENAI_TTS_MODEL", "tts-1")
DEFAULT_WHISPER_MODEL = os.getenv("OPENAI_WHISPER_MODEL", "whisper-1")
VOICE_CHOICES = ["alloy", "verse", "breeze", "bright", "calm"]
OUTPUT_BASE = os.getenv("OUTPUT_DIR", "/tmp/outputs")
pathlib.Path(OUTPUT_BASE).mkdir(parents=True, exist_ok=True)
EXAMPLE = {
"product_name": "FLDapp HbA1c測定",
"value_props": "5秒で測定/アプリ自動記録/医療機関連携",
"target": "30-50代の健康意識が高いビジネスパーソン",
"tone": "信頼性とスピード感",
"lang": "ja",
"seconds": 20,
"voice": "alloy",
}
def _save_upload(x, out_dir, name_hint):
if not x:
return None
src = x if isinstance(x, str) else x.get("path")
if not src or not os.path.exists(src):
return None
ext = os.path.splitext(src)[1].lower()
dst = os.path.join(out_dir, f"{name_hint}{ext}")
if ext == ".svg":
png_dst = dst.replace(".svg", ".png")
rasterize_svg_to_png(src, png_dst, width=1024)
return png_dst
shutil.copy2(src, dst)
return dst
def _save_upload_multi(xs, out_dir, name_prefix):
paths = []
if not xs:
return paths
if isinstance(xs, list):
for i, item in enumerate(xs, 1):
p = _save_upload(item, out_dir, f"{name_prefix}_{i}")
if p:
paths.append(p)
else:
p = _save_upload(xs, out_dir, f"{name_prefix}_1")
if p:
paths.append(p)
return paths
def _parse_urls(url_text: str) -> list[str]:
if not url_text:
return []
tokens = re.split(r"[\s,;]+", url_text.strip())
urls = []
for u in tokens:
u = u.strip()
if u.startswith("http://") or u.startswith("https://"):
urls.append(u)
# 重複除去
return list(dict.fromkeys(urls))
def run_pipeline(
product_name, value_props, target, tone, lang, seconds, voice,
# 参照資料/URL
doc_files, url_texts, use_docs,
# 合成素材
logo_file, bg_image_file, bg_video_file, bgm_file, font_file,
logo_position, logo_width_px,
# ★ 新規: トグル類
show_subtitles, show_title_overlay, title_position, title_fontsize, bg_text_draw
):
if not os.getenv("OPENAI_API_KEY"):
raise gr.Error("OPENAI_API_KEY が設定されていません(SpacesのSecretsに追加)。")
run_id = uuid.uuid4().hex[:8]
out_dir = os.path.join(OUTPUT_BASE, run_id)
os.makedirs(out_dir, exist_ok=True)
# アップロード保存
doc_paths = _save_upload_multi(doc_files, out_dir, "doc")
logo_path = _save_upload(logo_file, out_dir, "logo")
bg_img_user = _save_upload(bg_image_file, out_dir, "bg_user")
bg_video_user = _save_upload(bg_video_file, out_dir, "bg_user_video")
bgm_path = _save_upload(bgm_file, out_dir, "bgm")
font_path = _save_upload(font_file, out_dir, "custom_font")
# URL整形
urls = _parse_urls(url_texts)
# フォント
if font_path and os.path.exists(font_path):
chosen_font = font_path
font_dir = os.path.dirname(font_path)
else:
chosen_font = find_japanese_font_path()
font_dir = os.path.dirname(chosen_font) if chosen_font else None
# 参照資料/URL を要約(任意)
extra_context = None
if use_docs and (doc_paths or urls):
try:
extra_context = summarize_documents_and_urls(
doc_paths, urls, model=DEFAULT_MODEL, lang=lang, target_char_len=2000, work_dir=out_dir
)
except Exception:
extra_context = None
try:
# 1) LLMでコピー/台本生成(資料要約を反映)
assets = generate_marketing_assets(
product_name=product_name,
value_props=value_props,
target=target,
tone=tone,
lang=lang,
seconds=int(seconds),
model=DEFAULT_MODEL,
extra_context=extra_context,
)
headline = assets["headline"]
video_script = assets["video_script"]
thumbnail_text = assets["thumbnail_text"]
ogp = assets["ogp"]
# 2) TTS
audio_path = os.path.join(out_dir, "narration.mp3")
synthesize_tts_openai(
text=video_script,
out_path=audio_path,
voice=voice,
model=DEFAULT_TTS_MODEL,
format="mp3",
)
if not os.path.exists(audio_path):
raise gr.Error("音声生成に失敗しました。")
# 3) 字幕(SRT)
srt_path = os.path.join(out_dir, "captions.srt")
srt_text = transcribe_to_srt_openai(audio_path=audio_path, model=DEFAULT_WHISPER_MODEL)
with open(srt_path, "w", encoding="utf-8") as f:
f.write(srt_text)
# 4) 背景
if bg_img_user and os.path.exists(bg_img_user):
bg_path = bg_img_user
else:
bg_path = os.path.join(out_dir, "bg.png")
make_background_image(
out_path=bg_path,
title=headline,
subtitle=thumbnail_text,
lang=lang,
font_path=chosen_font,
draw_text=bool(bg_text_draw), # ★ 追加
)
# 5) 動画合成(字幕/タイトルのON/OFFに対応)
video_out = os.path.join(out_dir, "short.mp4")
compose_video_with_subtitles(
image_path=bg_path if not bg_video_user else None,
audio_path=audio_path,
srt_path=srt_path,
out_path=video_out,
width=1080,
height=1920,
fps=30,
bg_video_path=bg_video_user,
logo_path=logo_path,
logo_pos=logo_position,
logo_width=int(logo_width_px),
bgm_path=bgm_path,
fonts_dir=font_dir,
include_subtitles=bool(show_subtitles), # ★ 追加
title_text=headline if bool(show_title_overlay) else None, # ★ 追加
title_pos=title_position,
title_fontfile=chosen_font,
title_fontsize=int(title_fontsize),
title_box=True,
)
if not os.path.exists(video_out):
raise gr.Error("動画合成に失敗しました。")
# 6) OGP画像
ogp_img_path = os.path.join(out_dir, "ogp.png")
base = Image.open(bg_path).convert("RGBA").resize((1200, 630))
if logo_path and os.path.exists(logo_path):
try:
logo = Image.open(logo_path).convert("RGBA")
w = int(1200 * (logo_width_px / 1080))
h = int(logo.height * (w / logo.width))
logo = logo.resize((w, h), Image.LANCZOS)
x = 1200 - w - 32 if "right" in logo_position else 32
y = 32 if "top" in logo_position else 630 - h - 32
base.alpha_composite(logo, (x, y))
except Exception:
pass
base.convert("RGB").save(ogp_img_path)
return (
headline,
video_script,
thumbnail_text,
json.dumps(ogp, ensure_ascii=False, indent=2),
audio_path,
srt_path,
video_out,
ogp_img_path,
)
except gr.Error:
raise
except Exception as e:
raise gr.Error(f"処理中にエラー: {type(e).__name__}: {e}")
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# 🎬 マルチモーダル要素→素材ジェネレーター
参照資料(PDF/TXT/DOCX/URL)を要約してコピーに反映し、
**字幕のON/OFF** と **動画内タイトル(ヘッドライン)テロップのON/OFF** を切替可能。
ロゴ/背景/BGM/フォントを使い、日本語フォントで字幕焼き込みの縦型ショート動画を生成します。
""")
with gr.Row():
with gr.Column():
# 元データ
product_name = gr.Textbox(label="製品名")
value_props = gr.Textbox(label="訴求点(カンマ区切り推奨)")
target = gr.Textbox(label="ターゲット")
tone = gr.Textbox(label="トーン", value="信頼性とスピード感")
lang = gr.Dropdown(["ja", "en"], value="ja", label="言語")
seconds = gr.Slider(8, 60, value=20, step=1, label="動画尺(秒・台本目安)")
voice = gr.Dropdown(VOICE_CHOICES, value="alloy", label="ナレーション音声(OpenAI TTS)")
# 参照資料/URL
doc_files = gr.File(label="参照資料(PDF/TXT/DOCX 複数可)", file_count="multiple", file_types=["file"])
url_texts = gr.Textbox(label="参照URL(改行/スペース/カンマ区切りで複数可)", lines=3, placeholder="https://example.com/page1\nhttps://example.com/page2")
use_docs = gr.Checkbox(value=True, label="資料/URLを要約してコピーに反映する(推奨)")
# 合成素材
logo_file = gr.File(label="ロゴ画像(PNG/JPG/SVG対応)", file_types=["image"])
bg_image_file = gr.File(label="背景画像(任意・PNG/JPG)", file_types=["image"])
bg_video_file = gr.File(label="背景動画(任意・mp4/mov など)", file_types=["video"])
bgm_file = gr.File(label="BGM(任意・mp3/wav)", file_types=["audio"])
font_file = gr.File(label="カスタムフォント(任意・TTF/OTF)", file_types=["file"])
logo_position = gr.Dropdown(["top-left", "top-right", "bottom-left", "bottom-right"], value="top-right", label="ロゴ位置")
logo_width_px = gr.Slider(64, 800, value=240, step=1, label="ロゴ幅(px)")
# ★ 新規トグル
show_subtitles = gr.Checkbox(value=True, label="字幕を焼き込む")
show_title_overlay = gr.Checkbox(value=False, label="動画内にタイトル(ヘッドライン)テロップを表示")
title_position = gr.Dropdown(["top", "center", "bottom"], value="top", label="タイトル位置")
title_fontsize = gr.Slider(24, 120, value=64, step=1, label="タイトルフォントサイズ")
bg_text_draw = gr.Checkbox(value=True, label="内製背景にテキストを描く(headline/sub)")
run_btn = gr.Button("生成する", variant="primary")
with gr.Column():
headline = gr.Textbox(label="見出し/ヘッドライン")
video_script = gr.Textbox(label="短尺動画 用 台本", lines=12)
thumbnail_text = gr.Textbox(label="サムネ文言")
ogp_json = gr.Code(label="OGP JSON(title/description/alt)", language="json")
audio = gr.File(label="音声(mp3)")
srt = gr.File(label="字幕(srt)")
video = gr.File(label="動画(mp4)")
ogp_img = gr.File(label="OGP画像(png)")
# Examples は非ファイル入力のみ
gr.Examples(
examples=[
[
EXAMPLE["product_name"], EXAMPLE["value_props"], EXAMPLE["target"], EXAMPLE["tone"], EXAMPLE["lang"],
EXAMPLE["seconds"], EXAMPLE["voice"],
"", True, # url_texts, use_docs
"top-right", 240, # logo_position, logo_width_px
True, False, "top", 64, # show_subtitles, show_title_overlay, title_position, title_fontsize
True # bg_text_draw
]
],
inputs=[
product_name, value_props, target, tone, lang, seconds, voice,
url_texts, use_docs,
logo_position, logo_width_px,
show_subtitles, show_title_overlay, title_position, title_fontsize, bg_text_draw
],
label="クイックフィル(資料/URL/ファイル類は任意でアップロードしてください)",
cache_examples=False,
)
run_btn.click(
fn=run_pipeline,
inputs=[
product_name, value_props, target, tone, lang, seconds, voice,
doc_files, url_texts, use_docs,
logo_file, bg_image_file, bg_video_file, bgm_file, font_file, logo_position, logo_width_px,
show_subtitles, show_title_overlay, title_position, title_fontsize, bg_text_draw
],
outputs=[headline, video_script, thumbnail_text, ogp_json, audio, srt, video, ogp_img],
api_name="generate",
)
if __name__ == "__main__":
demo.launch()