Spaces:

rm8630
/

ai-transcript-clipper

Sleeping

App Files Files Community

Raj Jayendrakumar Muchhala commited on Feb 5, 2025

Commit

96430b4

1 Parent(s): bf2bf9c

update to individual clip plan

Browse files

Files changed (2) hide show

app.py +130 -123
clipper_prompts.py +25 -17

app.py CHANGED Viewed

@@ -8,31 +8,32 @@ import os
 # Set Streamlit layout to wide mode
 st.set_page_config(layout="wide")
-st.title("🎬 AI-Powered Content Planner -  Clip Creator")
-st.markdown("Paste a transcript on the left and view the generated content plan on the right.")
-# Sidebar for OpenAI API Key
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 DEFAULT_MODEL = "gpt-4o-2024-08-06"
-DEFAULT_GOAL = "Extract multiple self-contained clips by identifying natural narrative peaks, emotional highlights, and shareable moments (relatable struggles, surprising insights, or friendly debates) in their original sequence, optimizing for standalone engagement potential."
-# Sidebar: Model Selection for Stage 1 (Clip Plan Generation)
 st.sidebar.subheader("📤 Model for Clip Plan Generation")
 clip_plan_model = st.sidebar.selectbox(
     "Choose model for clip plan:",
     [DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
-    index=0  # Default selection
 )
-# Sidebar: Model Selection for Stage 2 (Transcript Extraction)
 st.sidebar.subheader("📥 Model for Transcript Clipper")
 extraction_model = st.sidebar.selectbox(
     "Choose model for transcript clipper:",
     [DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
-    index=0  # Default selection
 )
-# Sidebar: Goal Customization
 st.sidebar.subheader("🎯 Customize Prompt")
 GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", height=100)
 GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL
@@ -43,121 +44,127 @@ if not OPENAI_API_KEY:
 client = OpenAI(api_key=OPENAI_API_KEY)
-# Layout: Left (Input) | Right (Output)
-col1, col2 = st.columns([1, 1])
-with col1:
     st.subheader("📝 Paste Your Transcript")
     transcript = st.text_area("Enter the transcript here:", height=400)
-with col2:
-    st.subheader("📋 Generated Content Plan")
-    generated_plan_container = st.container()
-generate_button = st.button("Generate Plan")
-if generate_button:
-    if not transcript.strip():
-        st.error("❌ Please enter a transcript.")
-    else:
-        with st.spinner("⏳ Generating content plan... Please wait."):
-            try:
-                # Define prompts
-                system_prompt = SYSTEM_MESSAGE.format(prompt_goal=GOAL)
-                user_prompt = USER_MESSAGE.format(source_content=transcript)
-                messages = [
-                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": user_prompt},
-                ]
-                # Create placeholder for dynamic streaming
-                generated_plan_container.empty()
-                openai_args = {
-                    "model": clip_plan_model,
-                    "messages": messages,
-                    "response_format": {"type": "json_object"},
-                }
-                if clip_plan_model == 'o3-mini':
-                    openai_args['reasoning_effort'] = "low"
-                else:
-                    openai_args["max_tokens"] = 5000
-                    openai_args["temperature"] = 0.45
-                # Stream OpenAI API Response
-                response = client.chat.completions.create(**openai_args)
-                # Parse the response
-                generated_response = response.choices[0].message.content.strip()
-                content_plan = json.loads(generated_response)
-                # Save the content plan in the session state for Stage 2 : Transcript clipper
-                st.session_state.clip_plan_json = json.dumps(content_plan)
-                # Extract key (assuming there is only one key in the JSON response)
-                plan_key = list(content_plan.keys())[0]
-                clip_plans = content_plan.get(plan_key, [])
-                # Display final output
-                if clip_plans:
-                    with generated_plan_container.container():
-                        for i, clip in enumerate(clip_plans):
-                            st.markdown(f"### 🎬 Clip {i + 1}")
-                            st.write(f"**Title:** {clip.get('Title', 'N/A')}")
-                            st.write(f"**Focus Prompt:** {clip.get('Focus Prompt', 'N/A')}")
-                            st.write(f"**Duration:** {clip.get('Duration Target', 'N/A')} seconds")
-                            st.markdown("---")
-                else:
-                    st.error("⚠️ No clips were generated. Try again.")
-            except json.JSONDecodeError:
-                st.error("⚠️ Failed to parse OpenAI response. Try again.")
-            except Exception as e:
-                st.error(f"❌ Error: {str(e)}")
-# Stage 2: Extract Transcript Sections for Each Clip
-# Only show this if a clip plan was successfully generated
-if transcript.strip() and "clip_plan_json" in st.session_state:
-    st.subheader("✂️ AI Powered Transcript Clipper")
-    extract_button = st.button("Extract Transcript Sections")
-    if extract_button:
-        with st.spinner("⏳ Extracting transcript sections... Please wait."):
-            try:
-                clipper_system_prompt = CLIPPER_SYSTEM_MESSAGE
-                clipper_user_prompt = CLIPPER_USER_MESSAGE.format(source_content=transcript, clip_plan=st.session_state.clip_plan_json)
-                clipper_messages = [
-                    {"role": "system", "content": clipper_system_prompt},
-                    {"role": "user", "content": clipper_user_prompt},
-                ]
-                openai_args = {
-                    "model": extraction_model,
-                    "messages": clipper_messages,
-                    "response_format": {"type": "json_object"},
-                }
-                if extraction_model == 'o3-mini':
-                    openai_args['reasoning_effort'] = "low"
-                else:
-                    openai_args["max_tokens"] = 10000
-                    openai_args["temperature"] = 0.45
-                # Stream OpenAI API Response
-                clipper_response = client.chat.completions.create(**openai_args)
-                # Parse the extraction response
-                extraction_generated_response = clipper_response.choices[0].message.content.strip()
-                transcript_extraction = json.loads(extraction_generated_response)
-                # Display the extracted transcript sections
-                st.markdown("### 📝 Extracted Transcript Sections")
-                for clip_title, section in transcript_extraction.items():
-                    st.markdown(f"#### 🎬 {clip_title}")
-                    st.write(section)
-                    st.markdown("---")
-            except json.JSONDecodeError:
-                st.error("⚠️ Failed to parse transcript extraction response. Try again.")
-            except Exception as e:
-                st.error(f"❌ Error: {str(e)}")

 # Set Streamlit layout to wide mode
 st.set_page_config(layout="wide")
+st.title("🎬 AI-Powered Content Planner - Clip Creator")
+st.markdown("Paste a transcript on the left and view the generated content plan and extractions on the right.")
+# Sidebar for OpenAI API Key and configuration
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 DEFAULT_MODEL = "gpt-4o-2024-08-06"
+DEFAULT_GOAL = (
+    "Extract multiple self-contained clips by identifying natural narrative peaks, emotional highlights, "
+    "and shareable moments (relatable struggles, surprising insights, or friendly debates) in their original "
+    "sequence, optimizing for standalone engagement potential."
+)
 st.sidebar.subheader("📤 Model for Clip Plan Generation")
 clip_plan_model = st.sidebar.selectbox(
     "Choose model for clip plan:",
     [DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
+    index=0
 )
 st.sidebar.subheader("📥 Model for Transcript Clipper")
 extraction_model = st.sidebar.selectbox(
     "Choose model for transcript clipper:",
     [DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
+    index=0
 )
 st.sidebar.subheader("🎯 Customize Prompt")
 GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", height=100)
 GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL
 client = OpenAI(api_key=OPENAI_API_KEY)
+# Layout: Two columns - left for transcript, right for clip plans and extraction
+col_transcript, col_output = st.columns([1, 1])
+# Left Column: Transcript Input
+with col_transcript:
     st.subheader("📝 Paste Your Transcript")
     transcript = st.text_area("Enter the transcript here:", height=400)
+# Right Column: Clip Plan Generation and Extraction
+with col_output:
+    st.subheader("📋 Generated Clip Plans")
+    # Button to generate clip plans from the transcript
+    if st.button("Generate Plan"):
+        if not transcript.strip():
+            st.error("❌ Please enter a transcript.")
+        else:
+            with st.spinner("⏳ Generating content plan... Please wait."):
+                try:
+                    # Prepare prompts for clip plan generation
+                    system_prompt = SYSTEM_MESSAGE.format(prompt_goal=GOAL)
+                    user_prompt = USER_MESSAGE.format(source_content=transcript)
+                    messages = [
+                        {"role": "system", "content": system_prompt},
+                        {"role": "user", "content": user_prompt},
+                    ]
+                    openai_args = {
+                        "model": clip_plan_model,
+                        "messages": messages,
+                        "response_format": {"type": "json_object"},
+                    }
+                    if clip_plan_model == "o3-mini":
+                        openai_args["reasoning_effort"] = "low"
+                    else:
+                        openai_args["max_tokens"] = 5000
+                        openai_args["temperature"] = 0.45
+                    response = client.chat.completions.create(**openai_args)
+                    generated_response = response.choices[0].message.content.strip()
+                    content_plan = json.loads(generated_response)
+                    # Assume the response JSON has a single key containing a list of clip plans
+                    plan_key = list(content_plan.keys())[0]
+                    clip_plans = content_plan.get(plan_key, [])
+                    # Save clip plans in session state so they persist
+                    st.session_state.clip_plans = clip_plans
+                    # Clear any previous extraction outputs
+                    for i in range(len(clip_plans)):
+                        st.session_state.pop(f"extracted_clip_{i}", None)
+                except json.JSONDecodeError:
+                    st.error("⚠️ Failed to parse OpenAI response. Try again.")
+                except Exception as e:
+                    st.error(f"❌ Error: {str(e)}")
+    # Display clip plans if they exist in session state
+    if "clip_plans" in st.session_state:
+        # We'll work with a reference to the clip plans list
+        updated_clip_plans = st.session_state.clip_plans
+        for i, clip in enumerate(updated_clip_plans):
+            # Each clip is rendered in an expander with editable fields
+            with st.expander(f"🎬 Clip {i + 1}", expanded=True):
+                new_title = st.text_input("Title", value=clip.get("Title", "N/A"), key=f"title_{i}")
+                new_focus = st.text_area("Focus Prompt", value=clip.get("Focus Prompt", "N/A"), key=f"focus_{i}")
+                new_duration = st.number_input(
+                    "Duration Target (seconds)",
+                    value=float(clip.get("Duration Target", 0)),
+                    key=f"duration_{i}",
+                    step=1.0
+                )
+                # Update the clip plan with the edited values
+                updated_clip_plans[i]["Title"] = new_title
+                updated_clip_plans[i]["Focus Prompt"] = new_focus
+                updated_clip_plans[i]["Duration Target"] = new_duration
+                # Button to delete this clip plan
+                if st.button("Delete Clip", key=f"delete_{i}"):
+                    updated_clip_plans.pop(i)
+                    st.session_state.clip_plans = updated_clip_plans
+                    st.experimental_rerun()
+                # Button for transcript extraction for this clip
+                if st.button("Extract Transcript", key=f"extract_{i}"):
+                    with st.spinner("⏳ Extracting transcript section... Please wait."):
+                        try:
+                            # Send only the specific (and possibly edited) clip plan to the extractor
+                            single_clip_json = json.dumps(updated_clip_plans[i])
+                            clipper_user_prompt = CLIPPER_USER_MESSAGE.format(
+                                source_content=transcript,
+                                clip_plan=single_clip_json
+                            )
+                            clipper_messages = [
+                                {"role": "system", "content": CLIPPER_SYSTEM_MESSAGE},
+                                {"role": "user", "content": clipper_user_prompt},
+                            ]
+                            extraction_args = {
+                                "model": extraction_model,
+                                "messages": clipper_messages,
+                                "response_format": {"type": "json_object"},
+                            }
+                            if extraction_model == "o3-mini":
+                                extraction_args["reasoning_effort"] = "low"
+                            else:
+                                extraction_args["max_tokens"] = 10000
+                                extraction_args["temperature"] = 0.45
+                            clipper_response = client.chat.completions.create(**extraction_args)
+                            extraction_response = clipper_response.choices[0].message.content.strip()
+                            extracted_clip = json.loads(extraction_response)
+                            # Save the extraction result for this clip in session state
+                            st.session_state[f"extracted_clip_{i}"] = extracted_clip
+                        except Exception as e:
+                            st.error(f"❌ Extraction error: {str(e)}")
+                # Display extraction output if available
+                if f"extracted_clip_{i}" in st.session_state:
+                    st.markdown("#### 📝 Extracted Transcript Section:")
+                    st.write(st.session_state[f"extracted_clip_{i}"])

clipper_prompts.py CHANGED Viewed

@@ -1,33 +1,41 @@
 CLIPPER_SYSTEM_MESSAGE = '''
-You are a TranscriptExtractor, a specialist in processing long-form transcripts. Your role is to extract verbatim segments from a full transcript that directly correspond to each clip concept specified in a high-level clip plan.
-Each clip concept includes:
 - **Title:** A concise label for the clip.
-- **Focus Prompt:** A description of the key idea or moment to focus on.
-- **Duration Target:** The estimated length of the clip in seconds (for context only).
-When extracting transcript sections, follow these guidelines:
-1. **Verbatim Extraction:** Return the text exactly as it appears in the transcript.
-2. **Comprehensive Relevance:** Identify and extract all segments of the transcript that are clearly related to the clip’s Focus Prompt and Title. If multiple passages are relevant, combine them into one cohesive section. Do not omit any useful content—even if it means the extracted text is longer than the Duration Target.
-3. **Order:** Present the extracted content in the order it appears in the transcript.
-4. **Output Format:** Structure your answer as a JSON object. Each key should be the clip Title, and its value should be an object with a single key `"transcript"` containing the full extracted text for that clip.
-Focus solely on capturing every piece of relevant content that supports the clip concept.
-Do not add any extra commentary or paraphrasing.
 '''
 CLIPPER_USER_MESSAGE = '''
-Here is the reference transcript for extracting verbatim segments for each clip concept:
 ```
 {source_content}
 ```
-The high-level clip plan generated from this transcript includes the following clip concepts (in JSON format)::
 ```
 {clip_plan}
 ```
-Using the Focus Prompt and Title from each clip in the clip plan, extract all relevant portions of the transcript that directly support that clip concept.
-Ensure that you capture every useful piece of content from the transcript related to each clip—even
-if the total content exceeds the Duration Target.
 '''

 CLIPPER_SYSTEM_MESSAGE = '''
+You are a TranscriptExtractor, a specialized expert in finding and extracting verbatim segments from a full transcript to create engaging video clips.
+Your task is to extract verbatim segments from the transcript for a given clip plan based on:
 - **Title:** A concise label for the clip.
+- **Focus Prompt:** This is the main focus of the clip on which the verbatim segments should be based.
+- **Duration Target:** The desired duration for the clip.
+Instructions to follow when extracting verbatim segments from the transcript are as follows:
+1. Return the transcript text exactly as it appears.
+2. Identify and extract every segment of the transcript that relates to the Focus Prompt.
+3. If there are multiple relevant passages, combine them in the order they appear in the transcript.
+4. Duration Rules:
+    - Use the Duration Target as a guideline to select the most relevant content.
+    - Aim to extract transcript segments that roughly match the target duration.
+    - If the available relevant content is naturally shorter than the duration target, extract only what is relevant, do not force inclusion of irrelevant content solely to reach the target duration.
+### **OUTPUT:**
+- The response should be in the form of a JSON object with the following structure:
+```
+{
+    "Title": "Clip Title",
+    "Focus Prompt": "Clip Focus Prompt",
+    "Transcript": "Verbatim Segments",
+    "Duration": "Clip Duration",
+}
+```
 '''
 CLIPPER_USER_MESSAGE = '''
+Here is the reference transcript for extracting verbatim segments for a given clip plan:
 ```
 {source_content}
 ```
+The high-level clip plan is as follows:
 ```
 {clip_plan}
 ```
 '''