Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from openai import OpenAI | |
| from clipper_prompts import CLIPPER_SYSTEM_MESSAGE, CLIPPER_USER_MESSAGE | |
| from prompts import SYSTEM_MESSAGE, USER_MESSAGE | |
| import json | |
| import os | |
| # Set Streamlit layout to wide mode | |
| st.set_page_config(layout="wide") | |
| st.title("π¬ AI-Powered Content Planner - Clip Creator") | |
| st.markdown("Paste a transcript on the left and view the generated content plan and extractions on the right.") | |
| # List of available models | |
| OPENAI_MODELS = ["gpt-4o", "gpt-4o-mini", "o3-mini"] | |
| GROQ_MODELS = ["llama-3.3-70b-specdec", "llama-3.3-70b-versatile", "mixtral-8x7b-32768"] | |
| # Sidebar for OpenAI API Key and configuration | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| DEFAULT_MODEL = "llama-3.3-70b-specdec" | |
| DEFAULT_GOAL = ( | |
| "Extract multiple self-contained clips by identifying natural narrative peaks, emotional highlights, " | |
| "and shareable moments (relatable struggles, surprising insights, or friendly debates) in their original " | |
| "sequence, optimizing for standalone engagement potential." | |
| ) | |
| st.sidebar.subheader("π€ Model for Clip Plan Generation") | |
| clip_plan_model = st.sidebar.selectbox( | |
| "Choose model for clip plan:", | |
| GROQ_MODELS + OPENAI_MODELS, | |
| index=0 | |
| ) | |
| st.sidebar.subheader("π₯ Model for Transcript Clipper") | |
| extraction_model = st.sidebar.selectbox( | |
| "Choose model for transcript clipper:", | |
| GROQ_MODELS + OPENAI_MODELS, | |
| index=0 | |
| ) | |
| # Assign the correct URL based on the selected model | |
| if clip_plan_model in GROQ_MODELS: | |
| plan_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY")) | |
| else: | |
| plan_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) | |
| if extraction_model in GROQ_MODELS: | |
| extraction_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY")) | |
| else: | |
| extraction_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) | |
| st.sidebar.subheader("π― Customize Prompt") | |
| GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", value=DEFAULT_GOAL, height=100) | |
| GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL | |
| if not OPENAI_API_KEY: | |
| st.warning("β οΈ Please enter your OpenAI API key.") | |
| st.stop() | |
| # Layout: Two columns - left for transcript, right for clip plans and extraction | |
| col_transcript, col_output = st.columns([1, 1]) | |
| # Left Column: Transcript Input | |
| with col_transcript: | |
| st.subheader("π Paste Your Transcript") | |
| transcript = st.text_area("Enter the transcript here:", height=400) | |
| # Add reference link below the transcript text box | |
| st.markdown("---") | |
| st.markdown( | |
| """ | |
| <div style="font-size:18px; font-weight:bold; margin-top:10px;"> | |
| Need a transcript? Use <a href="https://huggingface.co/spaces/sanchit-gandhi/whisper-jax-spaces" target="_blank" style="color:#007bff; text-decoration:none;"> | |
| OpenAI Whisper on Hugging Face</a> to generate one from your audio or video. | |
| </div> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| st.markdown("---") | |
| st.subheader("π₯ Video/Audio Upload & Playback") | |
| media_file = st.file_uploader("Upload a video or audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"]) | |
| if media_file is not None: | |
| # Detect media type and play accordingly | |
| if media_file.type.startswith("video"): | |
| st.video(media_file) | |
| elif media_file.type.startswith("audio"): | |
| st.audio(media_file) | |
| # Right Column: Clip Plan Generation and Extraction | |
| with col_output: | |
| st.subheader("π Generated Clip Plans") | |
| # Button to generate clip plans from the transcript | |
| if st.button("Generate Plan"): | |
| if not transcript.strip(): | |
| st.error("β Please enter a transcript.") | |
| else: | |
| with st.spinner("β³ Generating content plan... Please wait."): | |
| try: | |
| # Prepare prompts for clip plan generation | |
| system_prompt = SYSTEM_MESSAGE.format(prompt_goal=GOAL) | |
| user_prompt = USER_MESSAGE.format(source_content=transcript) | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt}, | |
| ] | |
| openai_args = { | |
| "model": clip_plan_model, | |
| "messages": messages, | |
| "response_format": {"type": "json_object"}, | |
| } | |
| if clip_plan_model == "o3-mini": | |
| openai_args["reasoning_effort"] = "low" | |
| else: | |
| openai_args["max_tokens"] = 5000 | |
| openai_args["temperature"] = 0.45 | |
| response = plan_client.chat.completions.create(**openai_args) | |
| generated_response = response.choices[0].message.content.strip() | |
| content_plan = json.loads(generated_response) | |
| # Assume the response JSON has a single key containing a list of clip plans | |
| plan_key = list(content_plan.keys())[0] | |
| clip_plans = content_plan.get(plan_key, []) | |
| # Save clip plans in session state so they persist | |
| st.session_state.clip_plans = clip_plans | |
| # Clear any previous extraction outputs | |
| for i in range(len(clip_plans)): | |
| st.session_state.pop(f"extracted_clip_{i}", None) | |
| except json.JSONDecodeError: | |
| st.error("β οΈ Failed to parse OpenAI response. Try again.") | |
| except Exception as e: | |
| st.error(f"β Error: {str(e)}") | |
| # Display clip plans if they exist in session state | |
| if "clip_plans" in st.session_state: | |
| # We'll work with a reference to the clip plans list | |
| updated_clip_plans = st.session_state.clip_plans | |
| for i, clip in enumerate(updated_clip_plans): | |
| # Each clip is rendered in an expander with editable fields | |
| with st.expander(f"π¬ Clip {i + 1}", expanded=True): | |
| new_title = st.text_input("Title", value=clip.get("Title", "N/A"), key=f"title_{i}") | |
| new_focus = st.text_area("Focus Prompt", value=clip.get("Focus Prompt", "N/A"), key=f"focus_{i}") | |
| new_duration = st.number_input( | |
| "Duration Target (seconds)", | |
| value=float(clip.get("Duration Target", 0)), | |
| key=f"duration_{i}", | |
| step=1.0 | |
| ) | |
| # Update the clip plan with the edited values | |
| updated_clip_plans[i]["Title"] = new_title | |
| updated_clip_plans[i]["Focus Prompt"] = new_focus | |
| updated_clip_plans[i]["Duration Target"] = new_duration | |
| # Button to delete this clip plan | |
| if st.button("Delete Clip", key=f"delete_{i}"): | |
| # Create a copy of the clip plans list | |
| updated_clip_plans = st.session_state.clip_plans.copy() | |
| # Remove the clip at index `i` | |
| del updated_clip_plans[i] | |
| # Update session state with the modified list | |
| st.session_state.clip_plans = updated_clip_plans | |
| # Rerun the app to reflect the changes | |
| st.rerun() | |
| # Button for transcript extraction for this clip | |
| if st.button("Extract Transcript", key=f"extract_{i}"): | |
| with st.spinner("β³ Extracting transcript section... Please wait."): | |
| try: | |
| # Send only the specific (and possibly edited) clip plan to the extractor | |
| single_clip_json = json.dumps(updated_clip_plans[i]) | |
| clipper_user_prompt = CLIPPER_USER_MESSAGE.format( | |
| source_content=transcript, | |
| clip_plan=single_clip_json | |
| ) | |
| clipper_messages = [ | |
| {"role": "system", "content": CLIPPER_SYSTEM_MESSAGE}, | |
| {"role": "user", "content": clipper_user_prompt}, | |
| ] | |
| extraction_args = { | |
| "model": extraction_model, | |
| "messages": clipper_messages, | |
| "response_format": {"type": "json_object"}, | |
| } | |
| if extraction_model == "o3-mini": | |
| extraction_args["reasoning_effort"] = "low" | |
| else: | |
| extraction_args["max_tokens"] = 10000 | |
| extraction_args["temperature"] = 0.45 | |
| clipper_response = extraction_client.chat.completions.create(**extraction_args) | |
| extraction_response = clipper_response.choices[0].message.content.strip() | |
| extracted_clip = json.loads(extraction_response) | |
| # Save the extraction result for this clip in session state | |
| st.session_state[f"extracted_clip_{i}"] = extracted_clip | |
| except Exception as e: | |
| st.error(f"β Extraction error: {str(e)}") | |
| # Display extraction output if available | |
| if f"extracted_clip_{i}" in st.session_state: | |
| st.markdown("#### π Extracted Transcript Section:") | |
| st.write(st.session_state[f"extracted_clip_{i}"]) |