Spaces:
Sleeping
Sleeping
Raj Jayendrakumar Muchhala
commited on
Commit
·
bf2bf9c
1
Parent(s):
9c850c5
Update files for transcript clipper
Browse files- app.py +163 -0
- clipper_prompts.py +33 -0
- prompts.py +28 -0
- requirements.txt +1 -0
app.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
from clipper_prompts import CLIPPER_SYSTEM_MESSAGE, CLIPPER_USER_MESSAGE
|
| 4 |
+
from prompts import SYSTEM_MESSAGE, USER_MESSAGE
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
# Set Streamlit layout to wide mode
|
| 9 |
+
st.set_page_config(layout="wide")
|
| 10 |
+
|
| 11 |
+
st.title("🎬 AI-Powered Content Planner - Clip Creator")
|
| 12 |
+
st.markdown("Paste a transcript on the left and view the generated content plan on the right.")
|
| 13 |
+
|
| 14 |
+
# Sidebar for OpenAI API Key
|
| 15 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 16 |
+
DEFAULT_MODEL = "gpt-4o-2024-08-06"
|
| 17 |
+
DEFAULT_GOAL = "Extract multiple self-contained clips by identifying natural narrative peaks, emotional highlights, and shareable moments (relatable struggles, surprising insights, or friendly debates) in their original sequence, optimizing for standalone engagement potential."
|
| 18 |
+
|
| 19 |
+
# Sidebar: Model Selection for Stage 1 (Clip Plan Generation)
|
| 20 |
+
st.sidebar.subheader("📤 Model for Clip Plan Generation")
|
| 21 |
+
clip_plan_model = st.sidebar.selectbox(
|
| 22 |
+
"Choose model for clip plan:",
|
| 23 |
+
[DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
|
| 24 |
+
index=0 # Default selection
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# Sidebar: Model Selection for Stage 2 (Transcript Extraction)
|
| 28 |
+
st.sidebar.subheader("📥 Model for Transcript Clipper")
|
| 29 |
+
extraction_model = st.sidebar.selectbox(
|
| 30 |
+
"Choose model for transcript clipper:",
|
| 31 |
+
[DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
|
| 32 |
+
index=0 # Default selection
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Sidebar: Goal Customization
|
| 36 |
+
st.sidebar.subheader("🎯 Customize Prompt")
|
| 37 |
+
GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", height=100)
|
| 38 |
+
GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL
|
| 39 |
+
|
| 40 |
+
if not OPENAI_API_KEY:
|
| 41 |
+
st.warning("⚠️ Please enter your OpenAI API key.")
|
| 42 |
+
st.stop()
|
| 43 |
+
|
| 44 |
+
client = OpenAI(api_key=OPENAI_API_KEY)
|
| 45 |
+
|
| 46 |
+
# Layout: Left (Input) | Right (Output)
|
| 47 |
+
col1, col2 = st.columns([1, 1])
|
| 48 |
+
|
| 49 |
+
with col1:
|
| 50 |
+
st.subheader("📝 Paste Your Transcript")
|
| 51 |
+
transcript = st.text_area("Enter the transcript here:", height=400)
|
| 52 |
+
|
| 53 |
+
with col2:
|
| 54 |
+
st.subheader("📋 Generated Content Plan")
|
| 55 |
+
generated_plan_container = st.container()
|
| 56 |
+
|
| 57 |
+
generate_button = st.button("Generate Plan")
|
| 58 |
+
|
| 59 |
+
if generate_button:
|
| 60 |
+
if not transcript.strip():
|
| 61 |
+
st.error("❌ Please enter a transcript.")
|
| 62 |
+
else:
|
| 63 |
+
with st.spinner("⏳ Generating content plan... Please wait."):
|
| 64 |
+
try:
|
| 65 |
+
# Define prompts
|
| 66 |
+
system_prompt = SYSTEM_MESSAGE.format(prompt_goal=GOAL)
|
| 67 |
+
user_prompt = USER_MESSAGE.format(source_content=transcript)
|
| 68 |
+
|
| 69 |
+
messages = [
|
| 70 |
+
{"role": "system", "content": system_prompt},
|
| 71 |
+
{"role": "user", "content": user_prompt},
|
| 72 |
+
]
|
| 73 |
+
|
| 74 |
+
# Create placeholder for dynamic streaming
|
| 75 |
+
generated_plan_container.empty()
|
| 76 |
+
|
| 77 |
+
openai_args = {
|
| 78 |
+
"model": clip_plan_model,
|
| 79 |
+
"messages": messages,
|
| 80 |
+
"response_format": {"type": "json_object"},
|
| 81 |
+
}
|
| 82 |
+
if clip_plan_model == 'o3-mini':
|
| 83 |
+
openai_args['reasoning_effort'] = "low"
|
| 84 |
+
else:
|
| 85 |
+
openai_args["max_tokens"] = 5000
|
| 86 |
+
openai_args["temperature"] = 0.45
|
| 87 |
+
|
| 88 |
+
# Stream OpenAI API Response
|
| 89 |
+
response = client.chat.completions.create(**openai_args)
|
| 90 |
+
|
| 91 |
+
# Parse the response
|
| 92 |
+
generated_response = response.choices[0].message.content.strip()
|
| 93 |
+
content_plan = json.loads(generated_response)
|
| 94 |
+
|
| 95 |
+
# Save the content plan in the session state for Stage 2 : Transcript clipper
|
| 96 |
+
st.session_state.clip_plan_json = json.dumps(content_plan)
|
| 97 |
+
|
| 98 |
+
# Extract key (assuming there is only one key in the JSON response)
|
| 99 |
+
plan_key = list(content_plan.keys())[0]
|
| 100 |
+
clip_plans = content_plan.get(plan_key, [])
|
| 101 |
+
|
| 102 |
+
# Display final output
|
| 103 |
+
if clip_plans:
|
| 104 |
+
with generated_plan_container.container():
|
| 105 |
+
for i, clip in enumerate(clip_plans):
|
| 106 |
+
st.markdown(f"### 🎬 Clip {i + 1}")
|
| 107 |
+
st.write(f"**Title:** {clip.get('Title', 'N/A')}")
|
| 108 |
+
st.write(f"**Focus Prompt:** {clip.get('Focus Prompt', 'N/A')}")
|
| 109 |
+
st.write(f"**Duration:** {clip.get('Duration Target', 'N/A')} seconds")
|
| 110 |
+
st.markdown("---")
|
| 111 |
+
else:
|
| 112 |
+
st.error("⚠️ No clips were generated. Try again.")
|
| 113 |
+
|
| 114 |
+
except json.JSONDecodeError:
|
| 115 |
+
st.error("⚠️ Failed to parse OpenAI response. Try again.")
|
| 116 |
+
except Exception as e:
|
| 117 |
+
st.error(f"❌ Error: {str(e)}")
|
| 118 |
+
|
| 119 |
+
# Stage 2: Extract Transcript Sections for Each Clip
|
| 120 |
+
# Only show this if a clip plan was successfully generated
|
| 121 |
+
if transcript.strip() and "clip_plan_json" in st.session_state:
|
| 122 |
+
st.subheader("✂️ AI Powered Transcript Clipper")
|
| 123 |
+
extract_button = st.button("Extract Transcript Sections")
|
| 124 |
+
|
| 125 |
+
if extract_button:
|
| 126 |
+
with st.spinner("⏳ Extracting transcript sections... Please wait."):
|
| 127 |
+
try:
|
| 128 |
+
clipper_system_prompt = CLIPPER_SYSTEM_MESSAGE
|
| 129 |
+
clipper_user_prompt = CLIPPER_USER_MESSAGE.format(source_content=transcript, clip_plan=st.session_state.clip_plan_json)
|
| 130 |
+
clipper_messages = [
|
| 131 |
+
{"role": "system", "content": clipper_system_prompt},
|
| 132 |
+
{"role": "user", "content": clipper_user_prompt},
|
| 133 |
+
]
|
| 134 |
+
|
| 135 |
+
openai_args = {
|
| 136 |
+
"model": extraction_model,
|
| 137 |
+
"messages": clipper_messages,
|
| 138 |
+
"response_format": {"type": "json_object"},
|
| 139 |
+
}
|
| 140 |
+
if extraction_model == 'o3-mini':
|
| 141 |
+
openai_args['reasoning_effort'] = "low"
|
| 142 |
+
else:
|
| 143 |
+
openai_args["max_tokens"] = 10000
|
| 144 |
+
openai_args["temperature"] = 0.45
|
| 145 |
+
|
| 146 |
+
# Stream OpenAI API Response
|
| 147 |
+
clipper_response = client.chat.completions.create(**openai_args)
|
| 148 |
+
|
| 149 |
+
# Parse the extraction response
|
| 150 |
+
extraction_generated_response = clipper_response.choices[0].message.content.strip()
|
| 151 |
+
transcript_extraction = json.loads(extraction_generated_response)
|
| 152 |
+
|
| 153 |
+
# Display the extracted transcript sections
|
| 154 |
+
st.markdown("### 📝 Extracted Transcript Sections")
|
| 155 |
+
for clip_title, section in transcript_extraction.items():
|
| 156 |
+
st.markdown(f"#### 🎬 {clip_title}")
|
| 157 |
+
st.write(section)
|
| 158 |
+
st.markdown("---")
|
| 159 |
+
|
| 160 |
+
except json.JSONDecodeError:
|
| 161 |
+
st.error("⚠️ Failed to parse transcript extraction response. Try again.")
|
| 162 |
+
except Exception as e:
|
| 163 |
+
st.error(f"❌ Error: {str(e)}")
|
clipper_prompts.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CLIPPER_SYSTEM_MESSAGE = '''
|
| 2 |
+
You are a TranscriptExtractor, a specialist in processing long-form transcripts. Your role is to extract verbatim segments from a full transcript that directly correspond to each clip concept specified in a high-level clip plan.
|
| 3 |
+
|
| 4 |
+
Each clip concept includes:
|
| 5 |
+
- **Title:** A concise label for the clip.
|
| 6 |
+
- **Focus Prompt:** A description of the key idea or moment to focus on.
|
| 7 |
+
- **Duration Target:** The estimated length of the clip in seconds (for context only).
|
| 8 |
+
|
| 9 |
+
When extracting transcript sections, follow these guidelines:
|
| 10 |
+
1. **Verbatim Extraction:** Return the text exactly as it appears in the transcript.
|
| 11 |
+
2. **Comprehensive Relevance:** Identify and extract all segments of the transcript that are clearly related to the clip’s Focus Prompt and Title. If multiple passages are relevant, combine them into one cohesive section. Do not omit any useful content—even if it means the extracted text is longer than the Duration Target.
|
| 12 |
+
3. **Order:** Present the extracted content in the order it appears in the transcript.
|
| 13 |
+
4. **Output Format:** Structure your answer as a JSON object. Each key should be the clip Title, and its value should be an object with a single key `"transcript"` containing the full extracted text for that clip.
|
| 14 |
+
|
| 15 |
+
Focus solely on capturing every piece of relevant content that supports the clip concept.
|
| 16 |
+
Do not add any extra commentary or paraphrasing.
|
| 17 |
+
'''
|
| 18 |
+
|
| 19 |
+
CLIPPER_USER_MESSAGE = '''
|
| 20 |
+
Here is the reference transcript for extracting verbatim segments for each clip concept:
|
| 21 |
+
```
|
| 22 |
+
{source_content}
|
| 23 |
+
```
|
| 24 |
+
The high-level clip plan generated from this transcript includes the following clip concepts (in JSON format)::
|
| 25 |
+
```
|
| 26 |
+
{clip_plan}
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
Using the Focus Prompt and Title from each clip in the clip plan, extract all relevant portions of the transcript that directly support that clip concept.
|
| 30 |
+
Ensure that you capture every useful piece of content from the transcript related to each clip—even
|
| 31 |
+
if the total content exceeds the Duration Target.
|
| 32 |
+
'''
|
| 33 |
+
|
prompts.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SYSTEM_MESSAGE = '''
|
| 2 |
+
You are a ClipCreator, a specialized expert that helps users create multiple video clips from a single source
|
| 3 |
+
transcript to maximize engagement and audience retention helping marketers looking to repurpose long form interviews,
|
| 4 |
+
webinars and event footage by generating a **high-level plan** for these clips.
|
| 5 |
+
|
| 6 |
+
### **GOAL**
|
| 7 |
+
Your main goal is to **{prompt_goal}**
|
| 8 |
+
|
| 9 |
+
### **IMPORTANT: Topic-Specific Output**
|
| 10 |
+
If the main goal specifies that only clips related to a particular topic or idea should be extracted, then you must strictly adhere to that subject:
|
| 11 |
+
- **Do not** generate clip concepts for topics that are not directly related to the specified idea.
|
| 12 |
+
- **Do not** create multiple variations of clip titles for the same idea unless the transcript clearly warrants additional distinct clips.
|
| 13 |
+
- Avoid splitting a single idea into multiple clips based solely on isolated sentences. Instead, if the content related to the idea is continuous or does not offer clear, separate subtopics, consolidate it into one cohesive clip.
|
| 14 |
+
- Only include clip plans that are directly supported by the content of the transcript and aligned with the given goal.
|
| 15 |
+
|
| 16 |
+
### **OUTPUT : Generate Clip Concepts**
|
| 17 |
+
Format the response as a JSON object with Title, Focus Prompt, and Duration Target for each clip concept:
|
| 18 |
+
- **Title:** A concise and descriptive title for the clip that encapsulate the main idea or highlight.
|
| 19 |
+
- **Focus Prompt:** A short description starting with "Focus on..." that explains the central theme or takeaway of the clip.
|
| 20 |
+
- **Duration Target:** Estimated duration (in seconds) based on the transcript length and complexity.
|
| 21 |
+
'''
|
| 22 |
+
|
| 23 |
+
USER_MESSAGE = '''
|
| 24 |
+
Here is the reference transcript for generating a high level plan for multiple video clips:
|
| 25 |
+
```
|
| 26 |
+
{source_content}
|
| 27 |
+
```
|
| 28 |
+
'''
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
openai
|