Upload 9 files
Browse files- README.md +26 -11
- app.py +144 -0
- generator.py +67 -0
- packages.txt +1 -0
- prompts.py +33 -0
- requirements.txt +6 -0
- tts_subtitles.py +42 -0
- ui.py +114 -0
- video.py +111 -0
README.md
CHANGED
|
@@ -1,12 +1,27 @@
|
|
| 1 |
-
|
| 2 |
-
title: Multimodal Elements To Assets
|
| 3 |
-
emoji: 🐢
|
| 4 |
-
colorFrom: yellow
|
| 5 |
-
colorTo: yellow
|
| 6 |
-
sdk: gradio
|
| 7 |
-
sdk_version: 5.44.1
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# マルチモーダル “要素→素材” ジェネレーター(Hugging Face Space)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
+
製品名・訴求点・ターゲットを入力すると、
|
| 4 |
+
- 見出し
|
| 5 |
+
- 短尺動画用台本
|
| 6 |
+
- サムネ文言
|
| 7 |
+
- OGP文言(title/description/alt)
|
| 8 |
+
を **OpenAI LLM** で構造化生成します。さらに **OpenAI TTS → Whisper(SRT) → ffmpeg** で **縦型ショート動画** を自動合成します。
|
| 9 |
+
|
| 10 |
+
## 🚀 セットアップ(Hugging Face Spaces)
|
| 11 |
+
1. 新規 Space を作成(Python)。
|
| 12 |
+
2. このリポジトリのファイルをアップロード。
|
| 13 |
+
3. **Secrets** に以下を追加:
|
| 14 |
+
- `OPENAI_API_KEY`: OpenAIのAPIキー
|
| 15 |
+
- (任意)`OPENAI_LLM_MODEL` デフォルト `gpt-4o-mini`
|
| 16 |
+
- (任意)`OPENAI_TTS_MODEL` デフォルト `tts-1`
|
| 17 |
+
- (任意)`OPENAI_WHISPER_MODEL` デフォルト `whisper-1`
|
| 18 |
+
- (任意)`HUGGINGFACE_TOKEN`(pyannote利用時)
|
| 19 |
+
4. `packages.txt` により ffmpeg が自動導入されます。
|
| 20 |
+
5. Space を起動。UIから入力し「生成する」。
|
| 21 |
+
|
| 22 |
+
## 🧩 ローカル実行
|
| 23 |
+
```bash
|
| 24 |
+
python -m venv .venv && source .venv/bin/activate
|
| 25 |
+
pip install -r requirements.txt
|
| 26 |
+
cp .env.example .env # APIキーを設定
|
| 27 |
+
python app.py
|
app.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tempfile
|
| 4 |
+
import gradio as gr
|
| 5 |
+
from generator import generate_marketing_assets
|
| 6 |
+
from tts_subtitles import synthesize_tts_openai, transcribe_to_srt_openai
|
| 7 |
+
from video import compose_video_with_subtitles, make_background_image
|
| 8 |
+
from PIL import Image
|
| 9 |
+
|
| 10 |
+
DEFAULT_MODEL = os.getenv("OPENAI_LLM_MODEL", "gpt-4o-mini")
|
| 11 |
+
DEFAULT_TTS_MODEL = os.getenv("OPENAI_TTS_MODEL", "tts-1")
|
| 12 |
+
DEFAULT_WHISPER_MODEL = os.getenv("OPENAI_WHISPER_MODEL", "whisper-1")
|
| 13 |
+
|
| 14 |
+
VOICE_CHOICES = ["alloy", "verse", "breeze", "bright", "calm"]
|
| 15 |
+
|
| 16 |
+
EXAMPLE = {
|
| 17 |
+
"product_name": "FLDapp HbA1c測定",
|
| 18 |
+
"value_props": "10秒で測定/非接触・非侵襲/酸素飽和度と脈拍の同時測定",
|
| 19 |
+
"target": "30-50代の健康意識が高いビジネスパーソン",
|
| 20 |
+
"tone": "信頼性とスピード感",
|
| 21 |
+
"lang": "ja",
|
| 22 |
+
"seconds": 30,
|
| 23 |
+
"voice": "alloy",
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
def run_pipeline(product_name, value_props, target, tone, lang, seconds, voice):
|
| 27 |
+
if not os.getenv("OPENAI_API_KEY"):
|
| 28 |
+
raise gr.Error("OPENAI_API_KEY が設定されていません(SpacesのSecretsに追加)。")
|
| 29 |
+
|
| 30 |
+
# 1) LLMで素材群をJSON生成
|
| 31 |
+
assets = generate_marketing_assets(
|
| 32 |
+
product_name=product_name,
|
| 33 |
+
value_props=value_props,
|
| 34 |
+
target=target,
|
| 35 |
+
tone=tone,
|
| 36 |
+
lang=lang,
|
| 37 |
+
seconds=int(seconds),
|
| 38 |
+
model=DEFAULT_MODEL,
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
headline = assets["headline"]
|
| 42 |
+
video_script = assets["video_script"]
|
| 43 |
+
thumbnail_text = assets["thumbnail_text"]
|
| 44 |
+
ogp = assets["ogp"]
|
| 45 |
+
|
| 46 |
+
# 2) TTS(音声合成)→ 3) Whisperで字幕(SRT)
|
| 47 |
+
with tempfile.TemporaryDirectory() as td:
|
| 48 |
+
audio_path = os.path.join(td, "narration.mp3")
|
| 49 |
+
synthesize_tts_openai(
|
| 50 |
+
text=video_script,
|
| 51 |
+
out_path=audio_path,
|
| 52 |
+
voice=voice,
|
| 53 |
+
model=DEFAULT_TTS_MODEL,
|
| 54 |
+
format="mp3",
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
srt_path = os.path.join(td, "captions.srt")
|
| 58 |
+
srt_text = transcribe_to_srt_openai(
|
| 59 |
+
audio_path=audio_path,
|
| 60 |
+
model=DEFAULT_WHISPER_MODEL,
|
| 61 |
+
)
|
| 62 |
+
with open(srt_path, "w", encoding="utf-8") as f:
|
| 63 |
+
f.write(srt_text)
|
| 64 |
+
|
| 65 |
+
# 4) 背景画像の生成(簡易サムネ/OGPにも流用)
|
| 66 |
+
bg_path = os.path.join(td, "bg.png")
|
| 67 |
+
make_background_image(
|
| 68 |
+
out_path=bg_path,
|
| 69 |
+
title=headline,
|
| 70 |
+
subtitle=thumbnail_text,
|
| 71 |
+
lang=lang,
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
# 5) ffmpegで短尺動画合成(縦 1080x1920)
|
| 75 |
+
video_out = os.path.join(td, "short.mp4")
|
| 76 |
+
compose_video_with_subtitles(
|
| 77 |
+
image_path=bg_path,
|
| 78 |
+
audio_path=audio_path,
|
| 79 |
+
srt_path=srt_path,
|
| 80 |
+
out_path=video_out,
|
| 81 |
+
width=1080,
|
| 82 |
+
height=1920,
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
# 6) OGP画像(横 1200x630)
|
| 86 |
+
ogp_img_path = os.path.join(td, "ogp.png")
|
| 87 |
+
img = Image.open(bg_path).convert("RGB").resize((1200, 630))
|
| 88 |
+
img.save(ogp_img_path)
|
| 89 |
+
|
| 90 |
+
return (
|
| 91 |
+
headline,
|
| 92 |
+
video_script,
|
| 93 |
+
thumbnail_text,
|
| 94 |
+
json.dumps(ogp, ensure_ascii=False, indent=2),
|
| 95 |
+
audio_path,
|
| 96 |
+
srt_path,
|
| 97 |
+
video_out,
|
| 98 |
+
ogp_img_path,
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 102 |
+
gr.Markdown("""
|
| 103 |
+
# 🎬 マルチモーダル要素→素材ジェネレーター
|
| 104 |
+
入力(製品名 / 訴求点 / ターゲット)から **見出し・短尺動画用台本・サムネ文言・OGP文言** を生成し、
|
| 105 |
+
さらに **TTS音声 + Whisper字幕 + ffmpeg** で **縦型ショート動画** を自動合成します。
|
| 106 |
+
""")
|
| 107 |
+
|
| 108 |
+
with gr.Row():
|
| 109 |
+
with gr.Column():
|
| 110 |
+
product_name = gr.Textbox(label="製品名")
|
| 111 |
+
value_props = gr.Textbox(label="訴求点(カンマ区切り推奨)")
|
| 112 |
+
target = gr.Textbox(label="ターゲット")
|
| 113 |
+
tone = gr.Textbox(label="トーン(例:信頼性/スピード感/ワクワク)", value="信頼性とスピード感")
|
| 114 |
+
lang = gr.Dropdown(["ja", "en"], value="ja", label="言語")
|
| 115 |
+
seconds = gr.Slider(8, 45, value=20, step=1, label="動画尺(秒)")
|
| 116 |
+
voice = gr.Dropdown(VOICE_CHOICES, value="alloy", label="ナレーション音声(OpenAI TTS)")
|
| 117 |
+
run_btn = gr.Button("生成する", variant="primary")
|
| 118 |
+
|
| 119 |
+
with gr.Column():
|
| 120 |
+
headline = gr.Textbox(label="見出し/ヘッドライン")
|
| 121 |
+
video_script = gr.Textbox(label="短尺動画 用 台本", lines=10)
|
| 122 |
+
thumbnail_text = gr.Textbox(label="サムネ文言")
|
| 123 |
+
ogp_json = gr.Code(label="OGP JSON(title/description/alt)", language="json")
|
| 124 |
+
audio = gr.File(label="音声(mp3)")
|
| 125 |
+
srt = gr.File(label="字幕(srt)")
|
| 126 |
+
video = gr.File(label="動画(mp4)")
|
| 127 |
+
ogp_img = gr.File(label="OGP画像(png)")
|
| 128 |
+
|
| 129 |
+
gr.Examples(
|
| 130 |
+
examples=[
|
| 131 |
+
[EXAMPLE["product_name"], EXAMPLE["value_props"], EXAMPLE["target"], EXAMPLE["tone"], EXAMPLE["lang"], EXAMPLE["seconds"], EXAMPLE["voice"]],
|
| 132 |
+
],
|
| 133 |
+
inputs=[product_name, value_props, target, tone, lang, seconds, voice],
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
run_btn.click(
|
| 137 |
+
fn=run_pipeline,
|
| 138 |
+
inputs=[product_name, value_props, target, tone, lang, seconds, voice],
|
| 139 |
+
outputs=[headline, video_script, thumbnail_text, ogp_json, audio, srt, video, ogp_img],
|
| 140 |
+
api_name="generate",
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
if __name__ == "__main__":
|
| 144 |
+
demo.launch()
|
generator.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import re
|
| 4 |
+
import requests
|
| 5 |
+
from prompts import build_strctured_prompt
|
| 6 |
+
|
| 7 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 8 |
+
OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
|
| 9 |
+
|
| 10 |
+
HEADERS = {
|
| 11 |
+
"Authorization": f"Bearer{OPENAI_API_KEY}",
|
| 12 |
+
"Content-Type":"application/json",
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
SYSTEM = (
|
| 16 |
+
"You are a bilingual Japanses/English marketing copywriter. "
|
| 17 |
+
"Return results ONLY as strict JSON with UTF-8,never prose."
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
def _post_chat(messages,model:str, response_format_json: bool = True):
|
| 21 |
+
url = f"{OPENAI_BASE_URL}/chat/completions"
|
| 22 |
+
payload = {
|
| 23 |
+
"model":model,
|
| 24 |
+
"messsages":messages,
|
| 25 |
+
"temperature":0.7,
|
| 26 |
+
}
|
| 27 |
+
if response_format_json:
|
| 28 |
+
payload["response_format"]={"type":"json_object"}
|
| 29 |
+
r = requests.post(url, headers=HEADERS, json=payload, timeout=120)
|
| 30 |
+
r.raise_for_status()
|
| 31 |
+
data = r.json()
|
| 32 |
+
return data["choices"][0]["message"]["content"]
|
| 33 |
+
|
| 34 |
+
def _safe_json_extract(text: str):
|
| 35 |
+
try:
|
| 36 |
+
return json.loads(text)
|
| 37 |
+
except Exception:
|
| 38 |
+
m = re.search(r"\{[\s\S]*\}",text)
|
| 39 |
+
if m:
|
| 40 |
+
return json.loads(m.group(0))
|
| 41 |
+
raise
|
| 42 |
+
|
| 43 |
+
def generate_marketing_assets(product_name:str, value_props:str, target:str, tone:str, lang:str, seconds:int, model:str):
|
| 44 |
+
user_prompt = build_strctured_prompt(
|
| 45 |
+
product_name=product_name,
|
| 46 |
+
value_props=value_props,
|
| 47 |
+
target=target,
|
| 48 |
+
tone=tone,
|
| 49 |
+
lang=lang,
|
| 50 |
+
seconds=seconds,
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
messages = [
|
| 54 |
+
{"role": "system", "content": SYSTEM},
|
| 55 |
+
{"role": "user","content": user_prompt},
|
| 56 |
+
]
|
| 57 |
+
|
| 58 |
+
raw = _post_chat(messages,model=model, response_format_json=True)
|
| 59 |
+
data = _safe_json_extract(raw)
|
| 60 |
+
|
| 61 |
+
required = ["headline","video_script","thumbnail_text","ogp"]
|
| 62 |
+
for k in required:
|
| 63 |
+
if k not in data:
|
| 64 |
+
raise ValueError(f"LLM JSON missing key:{k}")
|
| 65 |
+
for k in ["little", "description","alt"]:
|
| 66 |
+
data["ogp"].setdefault(k,"")
|
| 67 |
+
return data
|
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
ffmpeg
|
prompts.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from textwrap import dedent
|
| 2 |
+
|
| 3 |
+
# 構造化プロンプト(JSON出力に限定)
|
| 4 |
+
def build_structured_prompt(product_name: str, value_props: str, target: str, tone: str, lang: str, seconds: int) -> str:
|
| 5 |
+
return dedent(f"""
|
| 6 |
+
You are a top-tier performance marketer and short-form video copywriter.
|
| 7 |
+
Language: {lang}
|
| 8 |
+
|
| 9 |
+
Inputs:
|
| 10 |
+
- Product: {product_name}
|
| 11 |
+
- Value Props (comma-separated): {value_props}
|
| 12 |
+
- Target: {target}
|
| 13 |
+
- Tone: {tone}
|
| 14 |
+
- Target video length: {seconds} seconds
|
| 15 |
+
|
| 16 |
+
Task:
|
| 17 |
+
1) Craft a high-converting headline.
|
| 18 |
+
2) Write a short-form video script designed for {seconds}s. Use 3–5 concise scenes, strong hook in the first 2s. Keep narrator-friendly phrasing.
|
| 19 |
+
3) Produce a punchy thumbnail text (<= 14 chars in ja / <= 6 words in en).
|
| 20 |
+
4) Provide OGP fields.
|
| 21 |
+
|
| 22 |
+
Return STRICT JSON with the following schema ONLY (no explanations):
|
| 23 |
+
{{
|
| 24 |
+
"headline": string,
|
| 25 |
+
"video_script": string, # multi-line; include scene markers like [HOOK], [SCENE2], [CTA]
|
| 26 |
+
"thumbnail_text": string,
|
| 27 |
+
"ogp": {{
|
| 28 |
+
"title": string,
|
| 29 |
+
"description": string,
|
| 30 |
+
"alt": string
|
| 31 |
+
}}
|
| 32 |
+
}}
|
| 33 |
+
""")
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.36.0
|
| 2 |
+
requests>=2.31.0
|
| 3 |
+
Pillow>=10.3.0
|
| 4 |
+
python-dotenv>=1.0.1
|
| 5 |
+
# Optional (heavy):
|
| 6 |
+
# pyannote.audio>=3.1.0
|
tts_subtitles.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import requests
|
| 3 |
+
|
| 4 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 5 |
+
OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
|
| 6 |
+
|
| 7 |
+
HEADERS_AUTH = {"Authorization": f"Bearer {OPENAI_API_KEY}"}
|
| 8 |
+
|
| 9 |
+
def synthesize_tts_openai(text: str, out_path: str, voice: str = "alloy", model: str = "tts-1", format: str = "mp3"):
|
| 10 |
+
url = f"{OPENAI_BASE_URL}/audio/speech"
|
| 11 |
+
payload = {
|
| 12 |
+
"model": model,
|
| 13 |
+
"voice": voice,
|
| 14 |
+
"input": text,
|
| 15 |
+
"format": format,
|
| 16 |
+
}
|
| 17 |
+
headers = {**HEADERS_AUTH, "Content-Type": "application/json"}
|
| 18 |
+
r = requests.post(url, headers=headers, json=payload, timeout=300)
|
| 19 |
+
r.raise_for_status()
|
| 20 |
+
with open(out_path, "wb") as f:
|
| 21 |
+
f.write(r.content)
|
| 22 |
+
return out_path
|
| 23 |
+
|
| 24 |
+
def transcribe_to_srt_openai(audio_path: str, model: str = "whisper-1") -> str:
|
| 25 |
+
url = f"{OPENAI_BASE_URL}/audio/transcriptions"
|
| 26 |
+
headers = HEADERS_AUTH
|
| 27 |
+
with open(audio_path, "rb") as af:
|
| 28 |
+
files = {"file": af}
|
| 29 |
+
data = {"model": model, "response_format": "srt"}
|
| 30 |
+
r = requests.post(url, headers=headers, files=files, data=data, timeout=600)
|
| 31 |
+
r.raise_for_status()
|
| 32 |
+
return r.text
|
| 33 |
+
|
| 34 |
+
# --- Optional: pyannote による VAD/クリーンアップ(重いモデルのためデフォルト無効) ---
|
| 35 |
+
# from pyannote.audio import Pipeline
|
| 36 |
+
# def refine_srt_with_pyannote(audio_path: str, srt_text: str) -> str:
|
| 37 |
+
# token = os.getenv("HUGGINGFACE_TOKEN")
|
| 38 |
+
# if not token:
|
| 39 |
+
# return srt_text
|
| 40 |
+
# pipeline = Pipeline.from_pretrained("pyannote/segmentation", use_auth_token=token)
|
| 41 |
+
# # ここで音声区間検出→SRTのタイミングを補正する処理を実装(省略)
|
| 42 |
+
# return srt_text
|
ui.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import httpx
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
def build_ui(fastapi_app):
|
| 6 |
+
base = "" # 同一プロセスなので相対パスでOK
|
| 7 |
+
|
| 8 |
+
async def do_summarize(platforms, keywords_csv, brand, limit, language):
|
| 9 |
+
keywords = [k.strip() for k in keywords_csv.split(",") if k.strip()]
|
| 10 |
+
async with httpx.AsyncClient() as client:
|
| 11 |
+
r = await client.post(f"{base}/api/summarize_trends", json={
|
| 12 |
+
"platforms": platforms, "keywords": keywords, "brand": brand,
|
| 13 |
+
"limit": int(limit), "language": language
|
| 14 |
+
})
|
| 15 |
+
r.raise_for_status()
|
| 16 |
+
data = r.json()
|
| 17 |
+
items = data["items"]
|
| 18 |
+
summary = data["summary"]
|
| 19 |
+
return json.dumps(items, ensure_ascii=False, indent=2), summary
|
| 20 |
+
|
| 21 |
+
async def do_generate_plan(brand, language, platforms, keywords_csv, start_date, tone, cta, image_style_hint):
|
| 22 |
+
keywords = [k.strip() for k in keywords_csv.split(",") if k.strip()]
|
| 23 |
+
async with httpx.AsyncClient() as client:
|
| 24 |
+
r = await client.post(f"{base}/api/generate_week_plan", json={
|
| 25 |
+
"brand": brand, "language": language, "platforms": platforms,
|
| 26 |
+
"keywords": keywords, "start_date": (start_date or None),
|
| 27 |
+
"tone": tone, "cta": cta, "image_style_hint": image_style_hint
|
| 28 |
+
})
|
| 29 |
+
r.raise_for_status()
|
| 30 |
+
posts = r.json()
|
| 31 |
+
return json.dumps(posts, ensure_ascii=False, indent=2)
|
| 32 |
+
|
| 33 |
+
async def do_list_calendar():
|
| 34 |
+
async with httpx.AsyncClient() as client:
|
| 35 |
+
r = await client.get(f"{base}/api/calendar")
|
| 36 |
+
r.raise_for_status()
|
| 37 |
+
return json.dumps(r.json(), ensure_ascii=False, indent=2)
|
| 38 |
+
|
| 39 |
+
async def do_approve(post_id):
|
| 40 |
+
async with httpx.AsyncClient() as client:
|
| 41 |
+
r = await client.post(f"{base}/api/approve_post/{int(post_id)}")
|
| 42 |
+
r.raise_for_status()
|
| 43 |
+
return json.dumps(r.json(), ensure_ascii=False, indent=2)
|
| 44 |
+
|
| 45 |
+
async def do_schedule(post_id, iso):
|
| 46 |
+
async with httpx.AsyncClient() as client:
|
| 47 |
+
r = await client.post(f"{base}/api/schedule_post/{int(post_id)}", json={"scheduled_at": iso})
|
| 48 |
+
r.raise_for_status()
|
| 49 |
+
return json.dumps(r.json(), ensure_ascii=False, indent=2)
|
| 50 |
+
|
| 51 |
+
async def save_keywords(keywords_csv):
|
| 52 |
+
keywords = [k.strip() for k in keywords_csv.split(",") if k.strip()]
|
| 53 |
+
async with httpx.AsyncClient() as client:
|
| 54 |
+
r = await client.post(f"{base}/api/keywords", json={"keywords": keywords})
|
| 55 |
+
r.raise_for_status()
|
| 56 |
+
return "保存しました"
|
| 57 |
+
|
| 58 |
+
async def load_keywords():
|
| 59 |
+
async with httpx.AsyncClient() as client:
|
| 60 |
+
r = await client.get(f"{base}/api/keywords")
|
| 61 |
+
r.raise_for_status()
|
| 62 |
+
data = r.json()
|
| 63 |
+
return ", ".join(data.get("keywords", []))
|
| 64 |
+
|
| 65 |
+
with gr.Blocks(title="SNS運用AIライト") as demo:
|
| 66 |
+
gr.Markdown("## SNS運用AIライト — 競合/トレンド要約 → 1週間案 → 承認 → 予約投稿")
|
| 67 |
+
|
| 68 |
+
with gr.Tab("1) トレンド要約"):
|
| 69 |
+
platforms = gr.CheckboxGroup(choices=["x","instagram"], value=["x","instagram"], label="対象プラットフォーム")
|
| 70 |
+
keywords = gr.Textbox(label="監視キーワード(カンマ区切り)", placeholder="自社名, 競合名, 業界ワード")
|
| 71 |
+
brand = gr.Textbox(label="ブランド名(任意)", placeholder="HitC Inc. など")
|
| 72 |
+
limit = gr.Slider(5, 50, step=1, value=20, label="取得件数(疑似/本API対応)")
|
| 73 |
+
language = gr.Dropdown(["ja","en"], value="ja", label="出力言語")
|
| 74 |
+
btn = gr.Button("トレンド要約を実行")
|
| 75 |
+
items_json = gr.Code(label="取得結果(JSON)", language="json")
|
| 76 |
+
summary = gr.Textbox(label="要約", lines=12)
|
| 77 |
+
btn.click(do_summarize, [platforms, keywords, brand, limit, language], [items_json, summary])
|
| 78 |
+
|
| 79 |
+
with gr.Tab("2) 1週間の投稿案生成"):
|
| 80 |
+
brand2 = gr.Textbox(label="ブランド名", placeholder="HitC Inc.")
|
| 81 |
+
language2 = gr.Dropdown(["ja","en"], value="ja", label="言語")
|
| 82 |
+
platforms2 = gr.CheckboxGroup(choices=["x","instagram"], value=["x","instagram"], label="プラットフォーム")
|
| 83 |
+
keywords2 = gr.Textbox(label="キーワード(任意/カンマ区切り)")
|
| 84 |
+
start_date = gr.Textbox(label="開始日(ISO, 任意)", placeholder="2025-09-01")
|
| 85 |
+
tone = gr.Textbox(label="トーン", value="プロフェッショナルで親しみやすい")
|
| 86 |
+
cta = gr.Textbox(label="CTA", value="詳細はこちら")
|
| 87 |
+
imgstyle = gr.Textbox(label="画像ラフのスタイルヒント", value="ミニマル、端的なタイポ、ブランドカラー意識")
|
| 88 |
+
btn2 = gr.Button("投稿案を生成 & DB保存")
|
| 89 |
+
posts_json = gr.Code(label="投稿案(DBにdraft保存)", language="json")
|
| 90 |
+
btn2.click(do_generate_plan, [brand2, language2, platforms2, keywords2, start_date, tone, cta, imgstyle], [posts_json])
|
| 91 |
+
|
| 92 |
+
with gr.Tab("3) 承認・予約・カレンダー"):
|
| 93 |
+
gr.Markdown("承認→予約→公開(APScheduler)が動きます。")
|
| 94 |
+
post_id = gr.Number(label="Post ID")
|
| 95 |
+
approve_btn = gr.Button("承認する")
|
| 96 |
+
schedule_iso = gr.Textbox(label="予約日時(ISO, 例: 2025-09-01T09:00:00Z)")
|
| 97 |
+
schedule_btn = gr.Button("予約に登録")
|
| 98 |
+
out = gr.Code(label="レスポンス", language="json")
|
| 99 |
+
approve_btn.click(do_approve, [post_id], [out])
|
| 100 |
+
schedule_btn.click(do_schedule, [post_id, schedule_iso], [out])
|
| 101 |
+
|
| 102 |
+
cal_btn = gr.Button("カレンダー取得")
|
| 103 |
+
cal_json = gr.Code(label="カレンダー", language="json")
|
| 104 |
+
cal_btn.click(do_list_calendar, [], [cal_json])
|
| 105 |
+
|
| 106 |
+
with gr.Tab("設定"):
|
| 107 |
+
kw_in = gr.Textbox(label="監視キーワード(カンマ区切り)")
|
| 108 |
+
load_btn = gr.Button("読み込み")
|
| 109 |
+
save_btn = gr.Button("保存")
|
| 110 |
+
msg = gr.Textbox(label="メッセージ")
|
| 111 |
+
load_btn.click(load_keywords, [], [kw_in])
|
| 112 |
+
save_btn.click(save_keywords, [kw_in], [msg])
|
| 113 |
+
|
| 114 |
+
return demo
|
video.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import subprocess
|
| 3 |
+
from PIL import Image, ImageDraw, ImageFont
|
| 4 |
+
|
| 5 |
+
def _run(cmd: list):
|
| 6 |
+
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 7 |
+
if p.returncode != 0:
|
| 8 |
+
raise RuntimeError(p.stderr[:2000])
|
| 9 |
+
return p.stdout
|
| 10 |
+
|
| 11 |
+
def _audio_duration_seconds(audio_path: str) -> float:
|
| 12 |
+
cmd = [
|
| 13 |
+
"ffprobe", "-v", "error", "-show_entries", "format=duration",
|
| 14 |
+
"-of", "default=noprint_wrappers=1:nokey=1", audio_path,
|
| 15 |
+
]
|
| 16 |
+
out = _run(cmd).strip()
|
| 17 |
+
return float(out)
|
| 18 |
+
|
| 19 |
+
def _wrap_text(draw: ImageDraw.ImageDraw, text: str, font: ImageFont.FreeTypeFont, max_width: int) -> str:
|
| 20 |
+
lines, cur = [], ""
|
| 21 |
+
for ch in text:
|
| 22 |
+
test = cur + ch
|
| 23 |
+
if draw.textlength(test, font=font) <= max_width:
|
| 24 |
+
cur = test
|
| 25 |
+
else:
|
| 26 |
+
if cur:
|
| 27 |
+
lines.append(cur)
|
| 28 |
+
cur = ch
|
| 29 |
+
if cur:
|
| 30 |
+
lines.append(cur)
|
| 31 |
+
return "\n".join(lines)
|
| 32 |
+
|
| 33 |
+
def make_background_image(out_path: str, title: str, subtitle: str, lang: str = "ja", width: int = 1080, height: int = 1920):
|
| 34 |
+
# シンプル背景 + テキスト(フォントはDejaVuを想定)
|
| 35 |
+
img = Image.new("RGB", (width, height), (20, 24, 32))
|
| 36 |
+
draw = ImageDraw.Draw(img)
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
font_title = ImageFont.truetype("DejaVuSans-Bold.ttf", size=72)
|
| 40 |
+
font_sub = ImageFont.truetype("DejaVuSans.ttf", size=48)
|
| 41 |
+
except Exception:
|
| 42 |
+
font_title = ImageFont.load_default()
|
| 43 |
+
font_sub = ImageFont.load_default()
|
| 44 |
+
|
| 45 |
+
margin = 40
|
| 46 |
+
max_w = width - margin * 2
|
| 47 |
+
|
| 48 |
+
title_wrapped = _wrap_text(draw, title[:100], font_title, max_w)
|
| 49 |
+
sub_wrapped = _wrap_text(draw, subtitle[:80], font_sub, max_w)
|
| 50 |
+
|
| 51 |
+
# bbox → width/height
|
| 52 |
+
t_bbox = draw.multiline_textbbox((0, 0), title_wrapped, font=font_title, align="center")
|
| 53 |
+
s_bbox = draw.multiline_textbbox((0, 0), sub_wrapped, font=font_sub, align="center")
|
| 54 |
+
title_w, title_h = (t_bbox[2] - t_bbox[0], t_bbox[3] - t_bbox[1])
|
| 55 |
+
sub_w, sub_h = (s_bbox[2] - s_bbox[0], s_bbox[3] - s_bbox[1])
|
| 56 |
+
|
| 57 |
+
total_h = title_h + 20 + sub_h
|
| 58 |
+
y = (height - total_h) // 2
|
| 59 |
+
|
| 60 |
+
# 描画(簡易ドロップシャドウ)
|
| 61 |
+
shadow = (0, 0, 0)
|
| 62 |
+
color = (255, 255, 255)
|
| 63 |
+
|
| 64 |
+
for dx, dy in [(2, 2), (1, 1)]:
|
| 65 |
+
draw.multiline_text(((width - title_w) // 2 + dx, y + dy), title_wrapped, fill=shadow, font=font_title, align="center")
|
| 66 |
+
draw.multiline_text(((width - title_w) // 2, y), title_wrapped, fill=color, font=font_title, align="center")
|
| 67 |
+
|
| 68 |
+
y2 = y + title_h + 20
|
| 69 |
+
for dx, dy in [(2, 2), (1, 1)]:
|
| 70 |
+
draw.multiline_text(((width - sub_w) // 2 + dx, y2 + dy), sub_wrapped, fill=shadow, font=font_sub, align="center")
|
| 71 |
+
draw.multiline_text(((width - sub_w) // 2, y2), sub_wrapped, fill=color, font=font_sub, align="center")
|
| 72 |
+
|
| 73 |
+
img.save(out_path)
|
| 74 |
+
return out_path
|
| 75 |
+
|
| 76 |
+
def compose_video_with_subtitles(image_path: str, audio_path: str, srt_path: str, out_path: str, width: int = 1080, height: int = 1920, fps: int = 30):
|
| 77 |
+
duration = _audio_duration_seconds(audio_path)
|
| 78 |
+
|
| 79 |
+
# 画像→動画 + 音声を結合
|
| 80 |
+
temp_video = out_path.replace(".mp4", "_temp.mp4")
|
| 81 |
+
cmd = [
|
| 82 |
+
"ffmpeg", "-y",
|
| 83 |
+
"-loop", "1", "-i", image_path,
|
| 84 |
+
"-i", audio_path,
|
| 85 |
+
"-c:v", "libx264",
|
| 86 |
+
"-t", f"{duration:.2f}",
|
| 87 |
+
"-pix_fmt", "yuv420p",
|
| 88 |
+
"-vf", f"scale={width}:{height},fps={fps}",
|
| 89 |
+
"-c:a", "aac", "-shortest",
|
| 90 |
+
temp_video,
|
| 91 |
+
]
|
| 92 |
+
_run(cmd)
|
| 93 |
+
|
| 94 |
+
# 字幕焼き込み(libass必須)。パスに空白があっても安全にするためクォート
|
| 95 |
+
# Windowsでは異なるエスケープが必要だが、Spaces(Linux)前提
|
| 96 |
+
srt_escaped = srt_path.replace("\\", "\\\\").replace(":", r"\:").replace("'", r"\\'")
|
| 97 |
+
cmd2 = [
|
| 98 |
+
"ffmpeg", "-y",
|
| 99 |
+
"-i", temp_video,
|
| 100 |
+
"-vf", f"subtitles='{srt_escaped}'",
|
| 101 |
+
"-c:a", "copy",
|
| 102 |
+
out_path,
|
| 103 |
+
]
|
| 104 |
+
_run(cmd2)
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
os.remove(temp_video)
|
| 108 |
+
except Exception:
|
| 109 |
+
pass
|
| 110 |
+
|
| 111 |
+
return out_path
|