import streamlit as st from openai import OpenAI from clipper_prompts import CLIPPER_SYSTEM_MESSAGE, CLIPPER_USER_MESSAGE from prompts import SYSTEM_MESSAGE, USER_MESSAGE import json import os # Set Streamlit layout to wide mode st.set_page_config(layout="wide") st.title("🎬 AI-Powered Content Planner - Clip Creator") st.markdown("Paste a transcript on the left and view the generated content plan and extractions on the right.") # List of available models OPENAI_MODELS = ["gpt-4o", "gpt-4o-mini", "o3-mini"] GROQ_MODELS = ["llama-3.3-70b-specdec", "llama-3.3-70b-versatile", "mixtral-8x7b-32768"] # Sidebar for OpenAI API Key and configuration OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") DEFAULT_MODEL = "llama-3.3-70b-specdec" DEFAULT_GOAL = ( "Extract multiple self-contained clips by identifying natural narrative peaks, emotional highlights, " "and shareable moments (relatable struggles, surprising insights, or friendly debates) in their original " "sequence, optimizing for standalone engagement potential." ) st.sidebar.subheader("📤 Model for Clip Plan Generation") clip_plan_model = st.sidebar.selectbox( "Choose model for clip plan:", GROQ_MODELS + OPENAI_MODELS, index=0 ) st.sidebar.subheader("📥 Model for Transcript Clipper") extraction_model = st.sidebar.selectbox( "Choose model for transcript clipper:", GROQ_MODELS + OPENAI_MODELS, index=0 ) # Assign the correct URL based on the selected model if clip_plan_model in GROQ_MODELS: plan_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY")) else: plan_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) if extraction_model in GROQ_MODELS: extraction_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY")) else: extraction_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) st.sidebar.subheader("🎯 Customize Prompt") GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", value=DEFAULT_GOAL, height=100) GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL if not OPENAI_API_KEY: st.warning("⚠️ Please enter your OpenAI API key.") st.stop() # Layout: Two columns - left for transcript, right for clip plans and extraction col_transcript, col_output = st.columns([1, 1]) # Left Column: Transcript Input with col_transcript: st.subheader("📝 Paste Your Transcript") transcript = st.text_area("Enter the transcript here:", height=400) # Add reference link below the transcript text box st.markdown("---") st.markdown( """
Need a transcript? Use OpenAI Whisper on Hugging Face to generate one from your audio or video.
""", unsafe_allow_html=True ) st.markdown("---") st.subheader("🎥 Video/Audio Upload & Playback") media_file = st.file_uploader("Upload a video or audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"]) if media_file is not None: # Detect media type and play accordingly if media_file.type.startswith("video"): st.video(media_file) elif media_file.type.startswith("audio"): st.audio(media_file) # Right Column: Clip Plan Generation and Extraction with col_output: st.subheader("📋 Generated Clip Plans") # Button to generate clip plans from the transcript if st.button("Generate Plan"): if not transcript.strip(): st.error("❌ Please enter a transcript.") else: with st.spinner("⏳ Generating content plan... Please wait."): try: # Prepare prompts for clip plan generation system_prompt = SYSTEM_MESSAGE.format(prompt_goal=GOAL) user_prompt = USER_MESSAGE.format(source_content=transcript) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ] openai_args = { "model": clip_plan_model, "messages": messages, "response_format": {"type": "json_object"}, } if clip_plan_model == "o3-mini": openai_args["reasoning_effort"] = "low" else: openai_args["max_tokens"] = 5000 openai_args["temperature"] = 0.45 response = plan_client.chat.completions.create(**openai_args) generated_response = response.choices[0].message.content.strip() content_plan = json.loads(generated_response) # Assume the response JSON has a single key containing a list of clip plans plan_key = list(content_plan.keys())[0] clip_plans = content_plan.get(plan_key, []) # Save clip plans in session state so they persist st.session_state.clip_plans = clip_plans # Clear any previous extraction outputs for i in range(len(clip_plans)): st.session_state.pop(f"extracted_clip_{i}", None) except json.JSONDecodeError: st.error("⚠️ Failed to parse OpenAI response. Try again.") except Exception as e: st.error(f"❌ Error: {str(e)}") # Display clip plans if they exist in session state if "clip_plans" in st.session_state: # We'll work with a reference to the clip plans list updated_clip_plans = st.session_state.clip_plans for i, clip in enumerate(updated_clip_plans): # Each clip is rendered in an expander with editable fields with st.expander(f"🎬 Clip {i + 1}", expanded=True): new_title = st.text_input("Title", value=clip.get("Title", "N/A"), key=f"title_{i}") new_focus = st.text_area("Focus Prompt", value=clip.get("Focus Prompt", "N/A"), key=f"focus_{i}") new_duration = st.number_input( "Duration Target (seconds)", value=float(clip.get("Duration Target", 0)), key=f"duration_{i}", step=1.0 ) # Update the clip plan with the edited values updated_clip_plans[i]["Title"] = new_title updated_clip_plans[i]["Focus Prompt"] = new_focus updated_clip_plans[i]["Duration Target"] = new_duration # Button to delete this clip plan if st.button("Delete Clip", key=f"delete_{i}"): # Create a copy of the clip plans list updated_clip_plans = st.session_state.clip_plans.copy() # Remove the clip at index `i` del updated_clip_plans[i] # Update session state with the modified list st.session_state.clip_plans = updated_clip_plans # Rerun the app to reflect the changes st.rerun() # Button for transcript extraction for this clip if st.button("Extract Transcript", key=f"extract_{i}"): with st.spinner("⏳ Extracting transcript section... Please wait."): try: # Send only the specific (and possibly edited) clip plan to the extractor single_clip_json = json.dumps(updated_clip_plans[i]) clipper_user_prompt = CLIPPER_USER_MESSAGE.format( source_content=transcript, clip_plan=single_clip_json ) clipper_messages = [ {"role": "system", "content": CLIPPER_SYSTEM_MESSAGE}, {"role": "user", "content": clipper_user_prompt}, ] extraction_args = { "model": extraction_model, "messages": clipper_messages, "response_format": {"type": "json_object"}, } if extraction_model == "o3-mini": extraction_args["reasoning_effort"] = "low" else: extraction_args["max_tokens"] = 10000 extraction_args["temperature"] = 0.45 clipper_response = extraction_client.chat.completions.create(**extraction_args) extraction_response = clipper_response.choices[0].message.content.strip() extracted_clip = json.loads(extraction_response) # Save the extraction result for this clip in session state st.session_state[f"extracted_clip_{i}"] = extracted_clip except Exception as e: st.error(f"❌ Extraction error: {str(e)}") # Display extraction output if available if f"extracted_clip_{i}" in st.session_state: st.markdown("#### 📝 Extracted Transcript Section:") st.write(st.session_state[f"extracted_clip_{i}"])