Video-Generator-Tools / src /streamlit_app.py
userIdc2024's picture
removed the video generation tab only (not functionality)
571198c verified
import os
import logging
import streamlit as st
from functools import lru_cache
from audio_gen import audio_generation
from caption_gen import caption_generation
from image_gen import image_generation_change_background
from video_gen import video_generation
from prompt_generator import generate_segments_payload, VeoInputs
from dotenv import load_dotenv
load_dotenv()
# --- Logger ---
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
)
logger = logging.getLogger("video_generator_tools")
st.set_page_config(page_title="Video Generator Tools", layout="wide")
def main_app():
st.title("Video Generator Tools")
tabs = st.tabs([
"JSON Prompt Generator",
"Audio Generator",
"Image Generation",
"Generate Caption",
])
# JSON Prompt Generator
with tabs[0]:
st.subheader("JSON Prompt Generator")
with st.form("json_prompt_form", clear_on_submit=False):
script = st.text_area("Script", height=200, placeholder="Paste the full script...")
style = st.text_input("Style", value="clean, lifestyle UGC")
json_format = st.selectbox("jsonFormat", ["standard", "compact", "verbose"], index=0)
continuation = st.toggle("continuationMode", value=True)
voice_type = st.text_input("voiceType", value="")
energy_level = st.text_input("energyLevel", value="")
setting_mode = st.text_input("settingMode", ["single"])
camera_style = st.text_input("cameraStyle", value="handheld steadicam")
energy_arc = st.text_input("energyArc", value="")
narrative_style = st.text_input("narrativeStyle", value="direct address")
accent_region = st.text_input("accentRegion", value="")
image_upload = st.file_uploader("First frame image (optional)", type=["png", "jpg", "jpeg", "webp"])
submitted = st.form_submit_button("Generate Segments Payload")
if submitted:
try:
image_bytes = image_upload.read() if image_upload else None
logger.info("JSON Prompt: script_len=%d style='%s' jsonFormat='%s' img=%s",
len(script or ""), style, json_format, bool(image_bytes))
inputs = VeoInputs(
script=script or "",
style=style or "",
jsonFormat=json_format,
continuationMode=continuation,
voiceType=voice_type or None,
energyLevel=energy_level or None,
settingMode=setting_mode,
cameraStyle=camera_style or None,
energyArc=energy_arc or None,
narrativeStyle=narrative_style or None,
accentRegion=accent_region or None,
)
with st.spinner("Generating segments payload..."):
payload = generate_segments_payload(inputs, image_path=image_bytes, model="gpt-4o")
st.success("Segments payload generated")
st.json(payload)
st.toast("Segments JSON ready", icon="✅")
except Exception as e:
logger.exception("JSON Prompt Generator failed")
st.error(f"Error: {e}")
# Audio Generator
with tabs[1]:
st.subheader("Audio Generator")
with st.form("audio_generation_form", clear_on_submit=False):
scripts = st.text_area("Scripts", height=180, placeholder="Enter narration text…")
voice_id = st.selectbox("Voice ID", ["Wise_Woman", "Friendly_Person", "Inspirational_girl", "Deep_Voice_Man", "Calm_Woman", "Casual_Guy", "Lively_Girl", "Patient_Man", "Young_Knight", "Determined_Man", "Lovely_Girl", "Decent_Boy", "Imposing_Manner", "Elegant_Man", "Abbess", "Sweet_Girl_2", "Exuberant_Girl"], index=0)
speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)
volume = st.slider("Volume", min_value=1.0, max_value=5.0, value=1.0, step=0.5)
pitch = st.slider("Pitch (semitones)", min_value=-12, max_value=12, value=0, step=1)
emotion = st.selectbox("Emotion", ["neutral", "confident", "warm", "excited", "serious"], index=0)
make_audio = st.form_submit_button("Generate Audio")
if make_audio:
if not scripts.strip():
st.error("Please enter scripts text.")
else:
try:
logger.info("Audio Gen: voice_id=%s speed=%.2f volume=%.2f pitch=%d emotion=%s text_len=%d",
voice_id, speed, volume, pitch, emotion, len(scripts or ""))
with st.spinner("Generating audio..."):
url = audio_generation(scripts, voice_id, speed, volume, pitch, emotion)
if url:
st.success("Audio generated")
st.audio(url)
st.write(url)
st.toast("Audio ready", icon="🔊")
else:
st.error("No audio URL returned.")
logger.warning("Audio Gen: empty URL")
except Exception as e:
logger.exception("Audio Generation failed")
st.error(f"Error: {e}")
# Image Generation (BG Change)
with tabs[2]:
st.subheader("Image Generation (for changing character's background)")
with st.form("image_generation_form", clear_on_submit=False):
img = st.file_uploader("Input image", type=["png", "jpg", "jpeg", "webp"])
img_prompt = st.text_input("Prompt", placeholder="New background description, style cues, lighting…")
ar = st.selectbox("Aspect ratio", ["9:16", "16:9"], index=0)
gen_img = st.form_submit_button("Generate Image")
if gen_img:
if not img:
st.error("Please upload an image.")
else:
try:
raw = img.read()
logger.info("Image Gen: bytes=%d ar=%s prompt_len=%d", len(raw or b""), ar, len(img_prompt or ""))
with st.spinner("Generating image..."):
url = image_generation_change_background(raw, img_prompt, ar)
if url:
st.success("Image processed")
cols = st.columns(2)
with cols[0]:
st.caption("Input")
st.image(img, use_container_width=True)
with cols[1]:
st.caption("Output")
st.image(url, use_container_width=True)
st.write(url)
st.toast("Image ready", icon="🖼️")
else:
st.error("No image URL returned.")
logger.warning("Image Gen: empty URL")
except Exception as e:
logger.exception("Image Generation failed")
st.error(f"Error: {e}")
# Generate Caption
with tabs[3]:
st.subheader("Generate Caption")
with st.form("caption_form", clear_on_submit=False):
cap_video = st.file_uploader("Input video", type=["mp4", "mov", "mkv"])
cap_size = st.slider("Caption size (characters)", min_value=10, max_value=100, value=30, step=5)
color = st.text_input("Highlight color (hex)", value="#FFD400")
make_caps = st.form_submit_button("Generate Captions")
if make_caps:
if not cap_video:
st.error("Please upload a video.")
else:
try:
vbytes = cap_video.read()
logger.info("Caption Gen: video_bytes=%d cap_size=%d color=%s",
len(vbytes or b""), int(cap_size), color)
with st.spinner("Generating captions..."):
url = caption_generation(vbytes, cap_size, color)
if url:
st.success("Captions generated")
st.video(url)
st.write(url)
st.toast("Captioned video ready", icon="🎞️")
else:
st.error("No captioned video URL returned.")
logger.warning("Caption Gen: empty URL")
except Exception as e:
logger.exception("Caption Generation failed")
st.error(f"Error: {e}")
@lru_cache(maxsize=1)
def check_token_cached(user_token: str):
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")
if not ACCESS_TOKEN:
return False, "Server error: Access token not configured."
if user_token == ACCESS_TOKEN:
return True, ""
return False, "Invalid token."
def main():
if "authenticated" not in st.session_state:
st.session_state["authenticated"] = False
if not st.session_state["authenticated"]:
st.markdown("## Access Required")
token_input = st.text_input("Enter Access Token", type="password")
if st.button("Unlock App"):
ok, error_msg = check_token_cached(token_input)
if ok:
st.session_state["authenticated"] = True
st.rerun()
else:
st.error(error_msg)
else:
main_app()
if __name__ == "__main__":
main()