Spaces:

rm8630
/

ai-transcript-clipper

Sleeping

File size: 9,829 Bytes

import streamlit as st
from openai import OpenAI
from clipper_prompts import CLIPPER_SYSTEM_MESSAGE, CLIPPER_USER_MESSAGE
from prompts import SYSTEM_MESSAGE, USER_MESSAGE
import json
import os

# Set Streamlit layout to wide mode
st.set_page_config(layout="wide")

st.title("🎬 AI-Powered Content Planner - Clip Creator")
st.markdown("Paste a transcript on the left and view the generated content plan and extractions on the right.")

# List of available models
OPENAI_MODELS = ["gpt-4o", "gpt-4o-mini", "o3-mini"]
GROQ_MODELS = ["llama-3.3-70b-specdec", "llama-3.3-70b-versatile", "mixtral-8x7b-32768"]

# Sidebar for OpenAI API Key and configuration
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
DEFAULT_MODEL = "llama-3.3-70b-specdec"
DEFAULT_GOAL = (
    "Extract multiple self-contained clips by identifying natural narrative peaks, emotional highlights, "
    "and shareable moments (relatable struggles, surprising insights, or friendly debates) in their original "
    "sequence, optimizing for standalone engagement potential."
)

st.sidebar.subheader("📤 Model for Clip Plan Generation")
clip_plan_model = st.sidebar.selectbox(
    "Choose model for clip plan:",
    GROQ_MODELS + OPENAI_MODELS,
    index=0
)

st.sidebar.subheader("📥 Model for Transcript Clipper")
extraction_model = st.sidebar.selectbox(
    "Choose model for transcript clipper:",
    GROQ_MODELS + OPENAI_MODELS,
    index=0
)

# Assign the correct URL based on the selected model
if clip_plan_model in GROQ_MODELS:
    plan_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY"))
else:
    plan_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

if extraction_model in GROQ_MODELS:
    extraction_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY"))
else:
    extraction_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

st.sidebar.subheader("🎯 Customize Prompt")
GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", value=DEFAULT_GOAL, height=100)
GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL

if not OPENAI_API_KEY:
    st.warning("⚠️ Please enter your OpenAI API key.")
    st.stop()

# Layout: Two columns - left for transcript, right for clip plans and extraction
col_transcript, col_output = st.columns([1, 1])

# Left Column: Transcript Input
with col_transcript:
    st.subheader("📝 Paste Your Transcript")
    transcript = st.text_area("Enter the transcript here:", height=400)

    # Add reference link below the transcript text box
    st.markdown("---")
    st.markdown(
        """
        <div style="font-size:18px; font-weight:bold; margin-top:10px;">
            Need a transcript? Use <a href="https://huggingface.co/spaces/sanchit-gandhi/whisper-jax-spaces" target="_blank" style="color:#007bff; text-decoration:none;">
            OpenAI Whisper on Hugging Face</a> to generate one from your audio or video.
        </div>
        """,
        unsafe_allow_html=True
    )

    st.markdown("---")
    st.subheader("🎥 Video/Audio Upload & Playback")

    media_file = st.file_uploader("Upload a video or audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"])
    if media_file is not None:
        # Detect media type and play accordingly
        if media_file.type.startswith("video"):
            st.video(media_file)
        elif media_file.type.startswith("audio"):
            st.audio(media_file)


# Right Column: Clip Plan Generation and Extraction
with col_output:
    st.subheader("📋 Generated Clip Plans")

    # Button to generate clip plans from the transcript
    if st.button("Generate Plan"):
        if not transcript.strip():
            st.error("❌ Please enter a transcript.")
        else:
            with st.spinner("⏳ Generating content plan... Please wait."):
                try:
                    # Prepare prompts for clip plan generation
                    system_prompt = SYSTEM_MESSAGE.format(prompt_goal=GOAL)
                    user_prompt = USER_MESSAGE.format(source_content=transcript)
                    messages = [
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt},
                    ]

                    openai_args = {
                        "model": clip_plan_model,
                        "messages": messages,
                        "response_format": {"type": "json_object"},
                    }
                    if clip_plan_model == "o3-mini":
                        openai_args["reasoning_effort"] = "low"
                    else:
                        openai_args["max_tokens"] = 5000
                        openai_args["temperature"] = 0.45

                    response = plan_client.chat.completions.create(**openai_args)
                    generated_response = response.choices[0].message.content.strip()
                    content_plan = json.loads(generated_response)

                    # Assume the response JSON has a single key containing a list of clip plans
                    plan_key = list(content_plan.keys())[0]
                    clip_plans = content_plan.get(plan_key, [])

                    # Save clip plans in session state so they persist
                    st.session_state.clip_plans = clip_plans

                    # Clear any previous extraction outputs
                    for i in range(len(clip_plans)):
                        st.session_state.pop(f"extracted_clip_{i}", None)
                except json.JSONDecodeError:
                    st.error("⚠️ Failed to parse OpenAI response. Try again.")
                except Exception as e:
                    st.error(f"❌ Error: {str(e)}")

    # Display clip plans if they exist in session state
    if "clip_plans" in st.session_state:
        # We'll work with a reference to the clip plans list
        updated_clip_plans = st.session_state.clip_plans

        for i, clip in enumerate(updated_clip_plans):
            # Each clip is rendered in an expander with editable fields
            with st.expander(f"🎬 Clip {i + 1}", expanded=True):
                new_title = st.text_input("Title", value=clip.get("Title", "N/A"), key=f"title_{i}")
                new_focus = st.text_area("Focus Prompt", value=clip.get("Focus Prompt", "N/A"), key=f"focus_{i}")
                new_duration = st.number_input(
                    "Duration Target (seconds)",
                    value=float(clip.get("Duration Target", 0)),
                    key=f"duration_{i}",
                    step=1.0
                )

                # Update the clip plan with the edited values
                updated_clip_plans[i]["Title"] = new_title
                updated_clip_plans[i]["Focus Prompt"] = new_focus
                updated_clip_plans[i]["Duration Target"] = new_duration

                # Button to delete this clip plan
                if st.button("Delete Clip", key=f"delete_{i}"):
                    # Create a copy of the clip plans list
                    updated_clip_plans = st.session_state.clip_plans.copy()
                    # Remove the clip at index `i`
                    del updated_clip_plans[i]
                    # Update session state with the modified list
                    st.session_state.clip_plans = updated_clip_plans
                    # Rerun the app to reflect the changes
                    st.rerun()

                # Button for transcript extraction for this clip
                if st.button("Extract Transcript", key=f"extract_{i}"):
                    with st.spinner("⏳ Extracting transcript section... Please wait."):
                        try:
                            # Send only the specific (and possibly edited) clip plan to the extractor
                            single_clip_json = json.dumps(updated_clip_plans[i])
                            clipper_user_prompt = CLIPPER_USER_MESSAGE.format(
                                source_content=transcript,
                                clip_plan=single_clip_json
                            )
                            clipper_messages = [
                                {"role": "system", "content": CLIPPER_SYSTEM_MESSAGE},
                                {"role": "user", "content": clipper_user_prompt},
                            ]

                            extraction_args = {
                                "model": extraction_model,
                                "messages": clipper_messages,
                                "response_format": {"type": "json_object"},
                            }
                            if extraction_model == "o3-mini":
                                extraction_args["reasoning_effort"] = "low"
                            else:
                                extraction_args["max_tokens"] = 10000
                                extraction_args["temperature"] = 0.45

                            clipper_response = extraction_client.chat.completions.create(**extraction_args)
                            extraction_response = clipper_response.choices[0].message.content.strip()
                            extracted_clip = json.loads(extraction_response)

                            # Save the extraction result for this clip in session state
                            st.session_state[f"extracted_clip_{i}"] = extracted_clip
                        except Exception as e:
                            st.error(f"❌ Extraction error: {str(e)}")

                # Display extraction output if available
                if f"extracted_clip_{i}" in st.session_state:
                    st.markdown("#### 📝 Extracted Transcript Section:")
                    st.write(st.session_state[f"extracted_clip_{i}"])