Corin1998 commited on
Commit
86e8ae4
·
verified ·
1 Parent(s): 62a7fcf

Upload 9 files

Browse files
Files changed (9) hide show
  1. README.md +26 -11
  2. app.py +144 -0
  3. generator.py +67 -0
  4. packages.txt +1 -0
  5. prompts.py +33 -0
  6. requirements.txt +6 -0
  7. tts_subtitles.py +42 -0
  8. ui.py +114 -0
  9. video.py +111 -0
README.md CHANGED
@@ -1,12 +1,27 @@
1
- ---
2
- title: Multimodal Elements To Assets
3
- emoji: 🐢
4
- colorFrom: yellow
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 5.44.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # マルチモーダル “要素→素材” ジェネレーター(Hugging Face Space)
 
 
 
 
 
 
 
 
 
2
 
3
+ 製品名・訴求点・ターゲットを入力すると、
4
+ - 見出し
5
+ - 短尺動画用台本
6
+ - サムネ文言
7
+ - OGP文言(title/description/alt)
8
+ を **OpenAI LLM** で構造化生成します。さらに **OpenAI TTS → Whisper(SRT) → ffmpeg** で **縦型ショート動画** を自動合成します。
9
+
10
+ ## 🚀 セットアップ(Hugging Face Spaces)
11
+ 1. 新規 Space を作成(Python)。
12
+ 2. このリポジトリのファイルをアップロード。
13
+ 3. **Secrets** に以下を追加:
14
+ - `OPENAI_API_KEY`: OpenAIのAPIキー
15
+ - (任意)`OPENAI_LLM_MODEL` デフォルト `gpt-4o-mini`
16
+ - (任意)`OPENAI_TTS_MODEL` デフォルト `tts-1`
17
+ - (任意)`OPENAI_WHISPER_MODEL` デフォルト `whisper-1`
18
+ - (任意)`HUGGINGFACE_TOKEN`(pyannote利用時)
19
+ 4. `packages.txt` により ffmpeg が自動導入されます。
20
+ 5. Space を起動。UIから入力し「生成する」。
21
+
22
+ ## 🧩 ローカル実行
23
+ ```bash
24
+ python -m venv .venv && source .venv/bin/activate
25
+ pip install -r requirements.txt
26
+ cp .env.example .env # APIキーを設定
27
+ python app.py
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import tempfile
4
+ import gradio as gr
5
+ from generator import generate_marketing_assets
6
+ from tts_subtitles import synthesize_tts_openai, transcribe_to_srt_openai
7
+ from video import compose_video_with_subtitles, make_background_image
8
+ from PIL import Image
9
+
10
+ DEFAULT_MODEL = os.getenv("OPENAI_LLM_MODEL", "gpt-4o-mini")
11
+ DEFAULT_TTS_MODEL = os.getenv("OPENAI_TTS_MODEL", "tts-1")
12
+ DEFAULT_WHISPER_MODEL = os.getenv("OPENAI_WHISPER_MODEL", "whisper-1")
13
+
14
+ VOICE_CHOICES = ["alloy", "verse", "breeze", "bright", "calm"]
15
+
16
+ EXAMPLE = {
17
+ "product_name": "FLDapp HbA1c測定",
18
+ "value_props": "10秒で測定/非接触・非侵襲/酸素飽和度と脈拍の同時測定",
19
+ "target": "30-50代の健康意識が高いビジネスパーソン",
20
+ "tone": "信頼性とスピード感",
21
+ "lang": "ja",
22
+ "seconds": 30,
23
+ "voice": "alloy",
24
+ }
25
+
26
+ def run_pipeline(product_name, value_props, target, tone, lang, seconds, voice):
27
+ if not os.getenv("OPENAI_API_KEY"):
28
+ raise gr.Error("OPENAI_API_KEY が設定されていません(SpacesのSecretsに追加)。")
29
+
30
+ # 1) LLMで素材群をJSON生成
31
+ assets = generate_marketing_assets(
32
+ product_name=product_name,
33
+ value_props=value_props,
34
+ target=target,
35
+ tone=tone,
36
+ lang=lang,
37
+ seconds=int(seconds),
38
+ model=DEFAULT_MODEL,
39
+ )
40
+
41
+ headline = assets["headline"]
42
+ video_script = assets["video_script"]
43
+ thumbnail_text = assets["thumbnail_text"]
44
+ ogp = assets["ogp"]
45
+
46
+ # 2) TTS(音声合成)→ 3) Whisperで字幕(SRT)
47
+ with tempfile.TemporaryDirectory() as td:
48
+ audio_path = os.path.join(td, "narration.mp3")
49
+ synthesize_tts_openai(
50
+ text=video_script,
51
+ out_path=audio_path,
52
+ voice=voice,
53
+ model=DEFAULT_TTS_MODEL,
54
+ format="mp3",
55
+ )
56
+
57
+ srt_path = os.path.join(td, "captions.srt")
58
+ srt_text = transcribe_to_srt_openai(
59
+ audio_path=audio_path,
60
+ model=DEFAULT_WHISPER_MODEL,
61
+ )
62
+ with open(srt_path, "w", encoding="utf-8") as f:
63
+ f.write(srt_text)
64
+
65
+ # 4) 背景画像の生成(簡易サムネ/OGPにも流用)
66
+ bg_path = os.path.join(td, "bg.png")
67
+ make_background_image(
68
+ out_path=bg_path,
69
+ title=headline,
70
+ subtitle=thumbnail_text,
71
+ lang=lang,
72
+ )
73
+
74
+ # 5) ffmpegで短尺動画合成(縦 1080x1920)
75
+ video_out = os.path.join(td, "short.mp4")
76
+ compose_video_with_subtitles(
77
+ image_path=bg_path,
78
+ audio_path=audio_path,
79
+ srt_path=srt_path,
80
+ out_path=video_out,
81
+ width=1080,
82
+ height=1920,
83
+ )
84
+
85
+ # 6) OGP画像(横 1200x630)
86
+ ogp_img_path = os.path.join(td, "ogp.png")
87
+ img = Image.open(bg_path).convert("RGB").resize((1200, 630))
88
+ img.save(ogp_img_path)
89
+
90
+ return (
91
+ headline,
92
+ video_script,
93
+ thumbnail_text,
94
+ json.dumps(ogp, ensure_ascii=False, indent=2),
95
+ audio_path,
96
+ srt_path,
97
+ video_out,
98
+ ogp_img_path,
99
+ )
100
+
101
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
102
+ gr.Markdown("""
103
+ # 🎬 マルチモーダル要素→素材ジェネレーター
104
+ 入力(製品名 / 訴求点 / ターゲット)から **見出し・短尺動画用台本・サムネ文言・OGP文言** を生成し、
105
+ さらに **TTS音声 + Whisper字幕 + ffmpeg** で **縦型ショート動画** を自動合成します。
106
+ """)
107
+
108
+ with gr.Row():
109
+ with gr.Column():
110
+ product_name = gr.Textbox(label="製品名")
111
+ value_props = gr.Textbox(label="訴求点(カンマ区切り推奨)")
112
+ target = gr.Textbox(label="ターゲット")
113
+ tone = gr.Textbox(label="トーン(例:信頼性/スピード感/ワクワク)", value="信頼性とスピード感")
114
+ lang = gr.Dropdown(["ja", "en"], value="ja", label="言語")
115
+ seconds = gr.Slider(8, 45, value=20, step=1, label="動画尺(秒)")
116
+ voice = gr.Dropdown(VOICE_CHOICES, value="alloy", label="ナレーション音声(OpenAI TTS)")
117
+ run_btn = gr.Button("生成する", variant="primary")
118
+
119
+ with gr.Column():
120
+ headline = gr.Textbox(label="見出し/ヘッドライン")
121
+ video_script = gr.Textbox(label="短尺動画 用 台本", lines=10)
122
+ thumbnail_text = gr.Textbox(label="サムネ文言")
123
+ ogp_json = gr.Code(label="OGP JSON(title/description/alt)", language="json")
124
+ audio = gr.File(label="音声(mp3)")
125
+ srt = gr.File(label="字幕(srt)")
126
+ video = gr.File(label="動画(mp4)")
127
+ ogp_img = gr.File(label="OGP画像(png)")
128
+
129
+ gr.Examples(
130
+ examples=[
131
+ [EXAMPLE["product_name"], EXAMPLE["value_props"], EXAMPLE["target"], EXAMPLE["tone"], EXAMPLE["lang"], EXAMPLE["seconds"], EXAMPLE["voice"]],
132
+ ],
133
+ inputs=[product_name, value_props, target, tone, lang, seconds, voice],
134
+ )
135
+
136
+ run_btn.click(
137
+ fn=run_pipeline,
138
+ inputs=[product_name, value_props, target, tone, lang, seconds, voice],
139
+ outputs=[headline, video_script, thumbnail_text, ogp_json, audio, srt, video, ogp_img],
140
+ api_name="generate",
141
+ )
142
+
143
+ if __name__ == "__main__":
144
+ demo.launch()
generator.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import re
4
+ import requests
5
+ from prompts import build_strctured_prompt
6
+
7
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
8
+ OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
9
+
10
+ HEADERS = {
11
+ "Authorization": f"Bearer{OPENAI_API_KEY}",
12
+ "Content-Type":"application/json",
13
+ }
14
+
15
+ SYSTEM = (
16
+ "You are a bilingual Japanses/English marketing copywriter. "
17
+ "Return results ONLY as strict JSON with UTF-8,never prose."
18
+ )
19
+
20
+ def _post_chat(messages,model:str, response_format_json: bool = True):
21
+ url = f"{OPENAI_BASE_URL}/chat/completions"
22
+ payload = {
23
+ "model":model,
24
+ "messsages":messages,
25
+ "temperature":0.7,
26
+ }
27
+ if response_format_json:
28
+ payload["response_format"]={"type":"json_object"}
29
+ r = requests.post(url, headers=HEADERS, json=payload, timeout=120)
30
+ r.raise_for_status()
31
+ data = r.json()
32
+ return data["choices"][0]["message"]["content"]
33
+
34
+ def _safe_json_extract(text: str):
35
+ try:
36
+ return json.loads(text)
37
+ except Exception:
38
+ m = re.search(r"\{[\s\S]*\}",text)
39
+ if m:
40
+ return json.loads(m.group(0))
41
+ raise
42
+
43
+ def generate_marketing_assets(product_name:str, value_props:str, target:str, tone:str, lang:str, seconds:int, model:str):
44
+ user_prompt = build_strctured_prompt(
45
+ product_name=product_name,
46
+ value_props=value_props,
47
+ target=target,
48
+ tone=tone,
49
+ lang=lang,
50
+ seconds=seconds,
51
+ )
52
+
53
+ messages = [
54
+ {"role": "system", "content": SYSTEM},
55
+ {"role": "user","content": user_prompt},
56
+ ]
57
+
58
+ raw = _post_chat(messages,model=model, response_format_json=True)
59
+ data = _safe_json_extract(raw)
60
+
61
+ required = ["headline","video_script","thumbnail_text","ogp"]
62
+ for k in required:
63
+ if k not in data:
64
+ raise ValueError(f"LLM JSON missing key:{k}")
65
+ for k in ["little", "description","alt"]:
66
+ data["ogp"].setdefault(k,"")
67
+ return data
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
prompts.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from textwrap import dedent
2
+
3
+ # 構造化プロンプト(JSON出力に限定)
4
+ def build_structured_prompt(product_name: str, value_props: str, target: str, tone: str, lang: str, seconds: int) -> str:
5
+ return dedent(f"""
6
+ You are a top-tier performance marketer and short-form video copywriter.
7
+ Language: {lang}
8
+
9
+ Inputs:
10
+ - Product: {product_name}
11
+ - Value Props (comma-separated): {value_props}
12
+ - Target: {target}
13
+ - Tone: {tone}
14
+ - Target video length: {seconds} seconds
15
+
16
+ Task:
17
+ 1) Craft a high-converting headline.
18
+ 2) Write a short-form video script designed for {seconds}s. Use 3–5 concise scenes, strong hook in the first 2s. Keep narrator-friendly phrasing.
19
+ 3) Produce a punchy thumbnail text (<= 14 chars in ja / <= 6 words in en).
20
+ 4) Provide OGP fields.
21
+
22
+ Return STRICT JSON with the following schema ONLY (no explanations):
23
+ {{
24
+ "headline": string,
25
+ "video_script": string, # multi-line; include scene markers like [HOOK], [SCENE2], [CTA]
26
+ "thumbnail_text": string,
27
+ "ogp": {{
28
+ "title": string,
29
+ "description": string,
30
+ "alt": string
31
+ }}
32
+ }}
33
+ """)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=4.36.0
2
+ requests>=2.31.0
3
+ Pillow>=10.3.0
4
+ python-dotenv>=1.0.1
5
+ # Optional (heavy):
6
+ # pyannote.audio>=3.1.0
tts_subtitles.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+
4
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
5
+ OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
6
+
7
+ HEADERS_AUTH = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
8
+
9
+ def synthesize_tts_openai(text: str, out_path: str, voice: str = "alloy", model: str = "tts-1", format: str = "mp3"):
10
+ url = f"{OPENAI_BASE_URL}/audio/speech"
11
+ payload = {
12
+ "model": model,
13
+ "voice": voice,
14
+ "input": text,
15
+ "format": format,
16
+ }
17
+ headers = {**HEADERS_AUTH, "Content-Type": "application/json"}
18
+ r = requests.post(url, headers=headers, json=payload, timeout=300)
19
+ r.raise_for_status()
20
+ with open(out_path, "wb") as f:
21
+ f.write(r.content)
22
+ return out_path
23
+
24
+ def transcribe_to_srt_openai(audio_path: str, model: str = "whisper-1") -> str:
25
+ url = f"{OPENAI_BASE_URL}/audio/transcriptions"
26
+ headers = HEADERS_AUTH
27
+ with open(audio_path, "rb") as af:
28
+ files = {"file": af}
29
+ data = {"model": model, "response_format": "srt"}
30
+ r = requests.post(url, headers=headers, files=files, data=data, timeout=600)
31
+ r.raise_for_status()
32
+ return r.text
33
+
34
+ # --- Optional: pyannote による VAD/クリーンアップ(重いモデルのためデフォルト無効) ---
35
+ # from pyannote.audio import Pipeline
36
+ # def refine_srt_with_pyannote(audio_path: str, srt_text: str) -> str:
37
+ # token = os.getenv("HUGGINGFACE_TOKEN")
38
+ # if not token:
39
+ # return srt_text
40
+ # pipeline = Pipeline.from_pretrained("pyannote/segmentation", use_auth_token=token)
41
+ # # ここで音声区間検出→SRTのタイミングを補正する処理を実装(省略)
42
+ # return srt_text
ui.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import httpx
3
+ import json
4
+
5
+ def build_ui(fastapi_app):
6
+ base = "" # 同一プロセスなので相対パスでOK
7
+
8
+ async def do_summarize(platforms, keywords_csv, brand, limit, language):
9
+ keywords = [k.strip() for k in keywords_csv.split(",") if k.strip()]
10
+ async with httpx.AsyncClient() as client:
11
+ r = await client.post(f"{base}/api/summarize_trends", json={
12
+ "platforms": platforms, "keywords": keywords, "brand": brand,
13
+ "limit": int(limit), "language": language
14
+ })
15
+ r.raise_for_status()
16
+ data = r.json()
17
+ items = data["items"]
18
+ summary = data["summary"]
19
+ return json.dumps(items, ensure_ascii=False, indent=2), summary
20
+
21
+ async def do_generate_plan(brand, language, platforms, keywords_csv, start_date, tone, cta, image_style_hint):
22
+ keywords = [k.strip() for k in keywords_csv.split(",") if k.strip()]
23
+ async with httpx.AsyncClient() as client:
24
+ r = await client.post(f"{base}/api/generate_week_plan", json={
25
+ "brand": brand, "language": language, "platforms": platforms,
26
+ "keywords": keywords, "start_date": (start_date or None),
27
+ "tone": tone, "cta": cta, "image_style_hint": image_style_hint
28
+ })
29
+ r.raise_for_status()
30
+ posts = r.json()
31
+ return json.dumps(posts, ensure_ascii=False, indent=2)
32
+
33
+ async def do_list_calendar():
34
+ async with httpx.AsyncClient() as client:
35
+ r = await client.get(f"{base}/api/calendar")
36
+ r.raise_for_status()
37
+ return json.dumps(r.json(), ensure_ascii=False, indent=2)
38
+
39
+ async def do_approve(post_id):
40
+ async with httpx.AsyncClient() as client:
41
+ r = await client.post(f"{base}/api/approve_post/{int(post_id)}")
42
+ r.raise_for_status()
43
+ return json.dumps(r.json(), ensure_ascii=False, indent=2)
44
+
45
+ async def do_schedule(post_id, iso):
46
+ async with httpx.AsyncClient() as client:
47
+ r = await client.post(f"{base}/api/schedule_post/{int(post_id)}", json={"scheduled_at": iso})
48
+ r.raise_for_status()
49
+ return json.dumps(r.json(), ensure_ascii=False, indent=2)
50
+
51
+ async def save_keywords(keywords_csv):
52
+ keywords = [k.strip() for k in keywords_csv.split(",") if k.strip()]
53
+ async with httpx.AsyncClient() as client:
54
+ r = await client.post(f"{base}/api/keywords", json={"keywords": keywords})
55
+ r.raise_for_status()
56
+ return "保存しました"
57
+
58
+ async def load_keywords():
59
+ async with httpx.AsyncClient() as client:
60
+ r = await client.get(f"{base}/api/keywords")
61
+ r.raise_for_status()
62
+ data = r.json()
63
+ return ", ".join(data.get("keywords", []))
64
+
65
+ with gr.Blocks(title="SNS運用AIライト") as demo:
66
+ gr.Markdown("## SNS運用AIライト — 競合/トレンド要約 → 1週間案 → 承認 → 予約投稿")
67
+
68
+ with gr.Tab("1) トレンド要約"):
69
+ platforms = gr.CheckboxGroup(choices=["x","instagram"], value=["x","instagram"], label="対象プラットフォーム")
70
+ keywords = gr.Textbox(label="監視キーワード(カンマ区切り)", placeholder="自社名, 競合名, 業界ワード")
71
+ brand = gr.Textbox(label="ブランド名(任意)", placeholder="HitC Inc. など")
72
+ limit = gr.Slider(5, 50, step=1, value=20, label="取得件数(疑似/本API対応)")
73
+ language = gr.Dropdown(["ja","en"], value="ja", label="出力言語")
74
+ btn = gr.Button("トレンド要約を実行")
75
+ items_json = gr.Code(label="取得結果(JSON)", language="json")
76
+ summary = gr.Textbox(label="要約", lines=12)
77
+ btn.click(do_summarize, [platforms, keywords, brand, limit, language], [items_json, summary])
78
+
79
+ with gr.Tab("2) 1週間の投稿案生成"):
80
+ brand2 = gr.Textbox(label="ブランド名", placeholder="HitC Inc.")
81
+ language2 = gr.Dropdown(["ja","en"], value="ja", label="言語")
82
+ platforms2 = gr.CheckboxGroup(choices=["x","instagram"], value=["x","instagram"], label="プラットフォーム")
83
+ keywords2 = gr.Textbox(label="キーワード(任意/カンマ区切り)")
84
+ start_date = gr.Textbox(label="開始日(ISO, 任意)", placeholder="2025-09-01")
85
+ tone = gr.Textbox(label="トーン", value="プロフェッショナルで親しみやすい")
86
+ cta = gr.Textbox(label="CTA", value="詳細はこちら")
87
+ imgstyle = gr.Textbox(label="画像ラフのスタイルヒント", value="ミニマル、端的なタイポ、ブランドカラー意識")
88
+ btn2 = gr.Button("投稿案を生成 & DB保存")
89
+ posts_json = gr.Code(label="投稿案(DBにdraft保存)", language="json")
90
+ btn2.click(do_generate_plan, [brand2, language2, platforms2, keywords2, start_date, tone, cta, imgstyle], [posts_json])
91
+
92
+ with gr.Tab("3) 承認・予約・カレンダー"):
93
+ gr.Markdown("承認→予約→公開(APScheduler)が動きます。")
94
+ post_id = gr.Number(label="Post ID")
95
+ approve_btn = gr.Button("承認する")
96
+ schedule_iso = gr.Textbox(label="予約日時(ISO, 例: 2025-09-01T09:00:00Z)")
97
+ schedule_btn = gr.Button("予約に登録")
98
+ out = gr.Code(label="レスポンス", language="json")
99
+ approve_btn.click(do_approve, [post_id], [out])
100
+ schedule_btn.click(do_schedule, [post_id, schedule_iso], [out])
101
+
102
+ cal_btn = gr.Button("カレンダー取得")
103
+ cal_json = gr.Code(label="カレンダー", language="json")
104
+ cal_btn.click(do_list_calendar, [], [cal_json])
105
+
106
+ with gr.Tab("設定"):
107
+ kw_in = gr.Textbox(label="監視キーワード(カンマ区切り)")
108
+ load_btn = gr.Button("読み込み")
109
+ save_btn = gr.Button("保存")
110
+ msg = gr.Textbox(label="メッセージ")
111
+ load_btn.click(load_keywords, [], [kw_in])
112
+ save_btn.click(save_keywords, [kw_in], [msg])
113
+
114
+ return demo
video.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ from PIL import Image, ImageDraw, ImageFont
4
+
5
+ def _run(cmd: list):
6
+ p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
7
+ if p.returncode != 0:
8
+ raise RuntimeError(p.stderr[:2000])
9
+ return p.stdout
10
+
11
+ def _audio_duration_seconds(audio_path: str) -> float:
12
+ cmd = [
13
+ "ffprobe", "-v", "error", "-show_entries", "format=duration",
14
+ "-of", "default=noprint_wrappers=1:nokey=1", audio_path,
15
+ ]
16
+ out = _run(cmd).strip()
17
+ return float(out)
18
+
19
+ def _wrap_text(draw: ImageDraw.ImageDraw, text: str, font: ImageFont.FreeTypeFont, max_width: int) -> str:
20
+ lines, cur = [], ""
21
+ for ch in text:
22
+ test = cur + ch
23
+ if draw.textlength(test, font=font) <= max_width:
24
+ cur = test
25
+ else:
26
+ if cur:
27
+ lines.append(cur)
28
+ cur = ch
29
+ if cur:
30
+ lines.append(cur)
31
+ return "\n".join(lines)
32
+
33
+ def make_background_image(out_path: str, title: str, subtitle: str, lang: str = "ja", width: int = 1080, height: int = 1920):
34
+ # シンプル背景 + テキスト(フォントはDejaVuを想定)
35
+ img = Image.new("RGB", (width, height), (20, 24, 32))
36
+ draw = ImageDraw.Draw(img)
37
+
38
+ try:
39
+ font_title = ImageFont.truetype("DejaVuSans-Bold.ttf", size=72)
40
+ font_sub = ImageFont.truetype("DejaVuSans.ttf", size=48)
41
+ except Exception:
42
+ font_title = ImageFont.load_default()
43
+ font_sub = ImageFont.load_default()
44
+
45
+ margin = 40
46
+ max_w = width - margin * 2
47
+
48
+ title_wrapped = _wrap_text(draw, title[:100], font_title, max_w)
49
+ sub_wrapped = _wrap_text(draw, subtitle[:80], font_sub, max_w)
50
+
51
+ # bbox → width/height
52
+ t_bbox = draw.multiline_textbbox((0, 0), title_wrapped, font=font_title, align="center")
53
+ s_bbox = draw.multiline_textbbox((0, 0), sub_wrapped, font=font_sub, align="center")
54
+ title_w, title_h = (t_bbox[2] - t_bbox[0], t_bbox[3] - t_bbox[1])
55
+ sub_w, sub_h = (s_bbox[2] - s_bbox[0], s_bbox[3] - s_bbox[1])
56
+
57
+ total_h = title_h + 20 + sub_h
58
+ y = (height - total_h) // 2
59
+
60
+ # 描画(簡易ドロップシャドウ)
61
+ shadow = (0, 0, 0)
62
+ color = (255, 255, 255)
63
+
64
+ for dx, dy in [(2, 2), (1, 1)]:
65
+ draw.multiline_text(((width - title_w) // 2 + dx, y + dy), title_wrapped, fill=shadow, font=font_title, align="center")
66
+ draw.multiline_text(((width - title_w) // 2, y), title_wrapped, fill=color, font=font_title, align="center")
67
+
68
+ y2 = y + title_h + 20
69
+ for dx, dy in [(2, 2), (1, 1)]:
70
+ draw.multiline_text(((width - sub_w) // 2 + dx, y2 + dy), sub_wrapped, fill=shadow, font=font_sub, align="center")
71
+ draw.multiline_text(((width - sub_w) // 2, y2), sub_wrapped, fill=color, font=font_sub, align="center")
72
+
73
+ img.save(out_path)
74
+ return out_path
75
+
76
+ def compose_video_with_subtitles(image_path: str, audio_path: str, srt_path: str, out_path: str, width: int = 1080, height: int = 1920, fps: int = 30):
77
+ duration = _audio_duration_seconds(audio_path)
78
+
79
+ # 画像→動画 + 音声を結合
80
+ temp_video = out_path.replace(".mp4", "_temp.mp4")
81
+ cmd = [
82
+ "ffmpeg", "-y",
83
+ "-loop", "1", "-i", image_path,
84
+ "-i", audio_path,
85
+ "-c:v", "libx264",
86
+ "-t", f"{duration:.2f}",
87
+ "-pix_fmt", "yuv420p",
88
+ "-vf", f"scale={width}:{height},fps={fps}",
89
+ "-c:a", "aac", "-shortest",
90
+ temp_video,
91
+ ]
92
+ _run(cmd)
93
+
94
+ # 字幕焼き込み(libass必須)。パスに空白があっても安全にするためクォート
95
+ # Windowsでは異なるエスケープが必要だが、Spaces(Linux)前提
96
+ srt_escaped = srt_path.replace("\\", "\\\\").replace(":", r"\:").replace("'", r"\\'")
97
+ cmd2 = [
98
+ "ffmpeg", "-y",
99
+ "-i", temp_video,
100
+ "-vf", f"subtitles='{srt_escaped}'",
101
+ "-c:a", "copy",
102
+ out_path,
103
+ ]
104
+ _run(cmd2)
105
+
106
+ try:
107
+ os.remove(temp_video)
108
+ except Exception:
109
+ pass
110
+
111
+ return out_path