Spaces:
Sleeping
Sleeping
Raj Jayendrakumar Muchhala
commited on
Commit
·
96430b4
1
Parent(s):
bf2bf9c
update to individual clip plan
Browse files- app.py +130 -123
- clipper_prompts.py +25 -17
app.py
CHANGED
|
@@ -8,31 +8,32 @@ import os
|
|
| 8 |
# Set Streamlit layout to wide mode
|
| 9 |
st.set_page_config(layout="wide")
|
| 10 |
|
| 11 |
-
st.title("🎬 AI-Powered Content Planner -
|
| 12 |
-
st.markdown("Paste a transcript on the left and view the generated content plan on the right.")
|
| 13 |
|
| 14 |
-
# Sidebar for OpenAI API Key
|
| 15 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 16 |
DEFAULT_MODEL = "gpt-4o-2024-08-06"
|
| 17 |
-
DEFAULT_GOAL =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
# Sidebar: Model Selection for Stage 1 (Clip Plan Generation)
|
| 20 |
st.sidebar.subheader("📤 Model for Clip Plan Generation")
|
| 21 |
clip_plan_model = st.sidebar.selectbox(
|
| 22 |
"Choose model for clip plan:",
|
| 23 |
[DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
|
| 24 |
-
index=0
|
| 25 |
)
|
| 26 |
|
| 27 |
-
# Sidebar: Model Selection for Stage 2 (Transcript Extraction)
|
| 28 |
st.sidebar.subheader("📥 Model for Transcript Clipper")
|
| 29 |
extraction_model = st.sidebar.selectbox(
|
| 30 |
"Choose model for transcript clipper:",
|
| 31 |
[DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
|
| 32 |
-
index=0
|
| 33 |
)
|
| 34 |
|
| 35 |
-
# Sidebar: Goal Customization
|
| 36 |
st.sidebar.subheader("🎯 Customize Prompt")
|
| 37 |
GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", height=100)
|
| 38 |
GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL
|
|
@@ -43,121 +44,127 @@ if not OPENAI_API_KEY:
|
|
| 43 |
|
| 44 |
client = OpenAI(api_key=OPENAI_API_KEY)
|
| 45 |
|
| 46 |
-
# Layout:
|
| 47 |
-
|
| 48 |
|
| 49 |
-
|
|
|
|
| 50 |
st.subheader("📝 Paste Your Transcript")
|
| 51 |
transcript = st.text_area("Enter the transcript here:", height=400)
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
if
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
#
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
# Set Streamlit layout to wide mode
|
| 9 |
st.set_page_config(layout="wide")
|
| 10 |
|
| 11 |
+
st.title("🎬 AI-Powered Content Planner - Clip Creator")
|
| 12 |
+
st.markdown("Paste a transcript on the left and view the generated content plan and extractions on the right.")
|
| 13 |
|
| 14 |
+
# Sidebar for OpenAI API Key and configuration
|
| 15 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 16 |
DEFAULT_MODEL = "gpt-4o-2024-08-06"
|
| 17 |
+
DEFAULT_GOAL = (
|
| 18 |
+
"Extract multiple self-contained clips by identifying natural narrative peaks, emotional highlights, "
|
| 19 |
+
"and shareable moments (relatable struggles, surprising insights, or friendly debates) in their original "
|
| 20 |
+
"sequence, optimizing for standalone engagement potential."
|
| 21 |
+
)
|
| 22 |
|
|
|
|
| 23 |
st.sidebar.subheader("📤 Model for Clip Plan Generation")
|
| 24 |
clip_plan_model = st.sidebar.selectbox(
|
| 25 |
"Choose model for clip plan:",
|
| 26 |
[DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
|
| 27 |
+
index=0
|
| 28 |
)
|
| 29 |
|
|
|
|
| 30 |
st.sidebar.subheader("📥 Model for Transcript Clipper")
|
| 31 |
extraction_model = st.sidebar.selectbox(
|
| 32 |
"Choose model for transcript clipper:",
|
| 33 |
[DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
|
| 34 |
+
index=0
|
| 35 |
)
|
| 36 |
|
|
|
|
| 37 |
st.sidebar.subheader("🎯 Customize Prompt")
|
| 38 |
GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", height=100)
|
| 39 |
GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL
|
|
|
|
| 44 |
|
| 45 |
client = OpenAI(api_key=OPENAI_API_KEY)
|
| 46 |
|
| 47 |
+
# Layout: Two columns - left for transcript, right for clip plans and extraction
|
| 48 |
+
col_transcript, col_output = st.columns([1, 1])
|
| 49 |
|
| 50 |
+
# Left Column: Transcript Input
|
| 51 |
+
with col_transcript:
|
| 52 |
st.subheader("📝 Paste Your Transcript")
|
| 53 |
transcript = st.text_area("Enter the transcript here:", height=400)
|
| 54 |
|
| 55 |
+
# Right Column: Clip Plan Generation and Extraction
|
| 56 |
+
with col_output:
|
| 57 |
+
st.subheader("📋 Generated Clip Plans")
|
| 58 |
+
|
| 59 |
+
# Button to generate clip plans from the transcript
|
| 60 |
+
if st.button("Generate Plan"):
|
| 61 |
+
if not transcript.strip():
|
| 62 |
+
st.error("❌ Please enter a transcript.")
|
| 63 |
+
else:
|
| 64 |
+
with st.spinner("⏳ Generating content plan... Please wait."):
|
| 65 |
+
try:
|
| 66 |
+
# Prepare prompts for clip plan generation
|
| 67 |
+
system_prompt = SYSTEM_MESSAGE.format(prompt_goal=GOAL)
|
| 68 |
+
user_prompt = USER_MESSAGE.format(source_content=transcript)
|
| 69 |
+
messages = [
|
| 70 |
+
{"role": "system", "content": system_prompt},
|
| 71 |
+
{"role": "user", "content": user_prompt},
|
| 72 |
+
]
|
| 73 |
+
|
| 74 |
+
openai_args = {
|
| 75 |
+
"model": clip_plan_model,
|
| 76 |
+
"messages": messages,
|
| 77 |
+
"response_format": {"type": "json_object"},
|
| 78 |
+
}
|
| 79 |
+
if clip_plan_model == "o3-mini":
|
| 80 |
+
openai_args["reasoning_effort"] = "low"
|
| 81 |
+
else:
|
| 82 |
+
openai_args["max_tokens"] = 5000
|
| 83 |
+
openai_args["temperature"] = 0.45
|
| 84 |
+
|
| 85 |
+
response = client.chat.completions.create(**openai_args)
|
| 86 |
+
generated_response = response.choices[0].message.content.strip()
|
| 87 |
+
content_plan = json.loads(generated_response)
|
| 88 |
+
|
| 89 |
+
# Assume the response JSON has a single key containing a list of clip plans
|
| 90 |
+
plan_key = list(content_plan.keys())[0]
|
| 91 |
+
clip_plans = content_plan.get(plan_key, [])
|
| 92 |
+
|
| 93 |
+
# Save clip plans in session state so they persist
|
| 94 |
+
st.session_state.clip_plans = clip_plans
|
| 95 |
+
|
| 96 |
+
# Clear any previous extraction outputs
|
| 97 |
+
for i in range(len(clip_plans)):
|
| 98 |
+
st.session_state.pop(f"extracted_clip_{i}", None)
|
| 99 |
+
except json.JSONDecodeError:
|
| 100 |
+
st.error("⚠️ Failed to parse OpenAI response. Try again.")
|
| 101 |
+
except Exception as e:
|
| 102 |
+
st.error(f"❌ Error: {str(e)}")
|
| 103 |
+
|
| 104 |
+
# Display clip plans if they exist in session state
|
| 105 |
+
if "clip_plans" in st.session_state:
|
| 106 |
+
# We'll work with a reference to the clip plans list
|
| 107 |
+
updated_clip_plans = st.session_state.clip_plans
|
| 108 |
+
|
| 109 |
+
for i, clip in enumerate(updated_clip_plans):
|
| 110 |
+
# Each clip is rendered in an expander with editable fields
|
| 111 |
+
with st.expander(f"🎬 Clip {i + 1}", expanded=True):
|
| 112 |
+
new_title = st.text_input("Title", value=clip.get("Title", "N/A"), key=f"title_{i}")
|
| 113 |
+
new_focus = st.text_area("Focus Prompt", value=clip.get("Focus Prompt", "N/A"), key=f"focus_{i}")
|
| 114 |
+
new_duration = st.number_input(
|
| 115 |
+
"Duration Target (seconds)",
|
| 116 |
+
value=float(clip.get("Duration Target", 0)),
|
| 117 |
+
key=f"duration_{i}",
|
| 118 |
+
step=1.0
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
# Update the clip plan with the edited values
|
| 122 |
+
updated_clip_plans[i]["Title"] = new_title
|
| 123 |
+
updated_clip_plans[i]["Focus Prompt"] = new_focus
|
| 124 |
+
updated_clip_plans[i]["Duration Target"] = new_duration
|
| 125 |
+
|
| 126 |
+
# Button to delete this clip plan
|
| 127 |
+
if st.button("Delete Clip", key=f"delete_{i}"):
|
| 128 |
+
updated_clip_plans.pop(i)
|
| 129 |
+
st.session_state.clip_plans = updated_clip_plans
|
| 130 |
+
st.experimental_rerun()
|
| 131 |
+
|
| 132 |
+
# Button for transcript extraction for this clip
|
| 133 |
+
if st.button("Extract Transcript", key=f"extract_{i}"):
|
| 134 |
+
with st.spinner("⏳ Extracting transcript section... Please wait."):
|
| 135 |
+
try:
|
| 136 |
+
# Send only the specific (and possibly edited) clip plan to the extractor
|
| 137 |
+
single_clip_json = json.dumps(updated_clip_plans[i])
|
| 138 |
+
clipper_user_prompt = CLIPPER_USER_MESSAGE.format(
|
| 139 |
+
source_content=transcript,
|
| 140 |
+
clip_plan=single_clip_json
|
| 141 |
+
)
|
| 142 |
+
clipper_messages = [
|
| 143 |
+
{"role": "system", "content": CLIPPER_SYSTEM_MESSAGE},
|
| 144 |
+
{"role": "user", "content": clipper_user_prompt},
|
| 145 |
+
]
|
| 146 |
+
|
| 147 |
+
extraction_args = {
|
| 148 |
+
"model": extraction_model,
|
| 149 |
+
"messages": clipper_messages,
|
| 150 |
+
"response_format": {"type": "json_object"},
|
| 151 |
+
}
|
| 152 |
+
if extraction_model == "o3-mini":
|
| 153 |
+
extraction_args["reasoning_effort"] = "low"
|
| 154 |
+
else:
|
| 155 |
+
extraction_args["max_tokens"] = 10000
|
| 156 |
+
extraction_args["temperature"] = 0.45
|
| 157 |
+
|
| 158 |
+
clipper_response = client.chat.completions.create(**extraction_args)
|
| 159 |
+
extraction_response = clipper_response.choices[0].message.content.strip()
|
| 160 |
+
extracted_clip = json.loads(extraction_response)
|
| 161 |
+
|
| 162 |
+
# Save the extraction result for this clip in session state
|
| 163 |
+
st.session_state[f"extracted_clip_{i}"] = extracted_clip
|
| 164 |
+
except Exception as e:
|
| 165 |
+
st.error(f"❌ Extraction error: {str(e)}")
|
| 166 |
+
|
| 167 |
+
# Display extraction output if available
|
| 168 |
+
if f"extracted_clip_{i}" in st.session_state:
|
| 169 |
+
st.markdown("#### 📝 Extracted Transcript Section:")
|
| 170 |
+
st.write(st.session_state[f"extracted_clip_{i}"])
|
clipper_prompts.py
CHANGED
|
@@ -1,33 +1,41 @@
|
|
| 1 |
CLIPPER_SYSTEM_MESSAGE = '''
|
| 2 |
-
You are a TranscriptExtractor, a
|
| 3 |
|
| 4 |
-
|
| 5 |
- **Title:** A concise label for the clip.
|
| 6 |
-
- **Focus Prompt:**
|
| 7 |
-
- **Duration Target:** The
|
| 8 |
|
| 9 |
-
|
| 10 |
-
1.
|
| 11 |
-
2.
|
| 12 |
-
3.
|
| 13 |
-
4.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
Focus solely on capturing every piece of relevant content that supports the clip concept.
|
| 16 |
-
Do not add any extra commentary or paraphrasing.
|
| 17 |
'''
|
| 18 |
|
| 19 |
CLIPPER_USER_MESSAGE = '''
|
| 20 |
-
Here is the reference transcript for extracting verbatim segments for
|
| 21 |
```
|
| 22 |
{source_content}
|
| 23 |
```
|
| 24 |
-
The high-level clip plan
|
| 25 |
```
|
| 26 |
{clip_plan}
|
| 27 |
```
|
| 28 |
-
|
| 29 |
-
Using the Focus Prompt and Title from each clip in the clip plan, extract all relevant portions of the transcript that directly support that clip concept.
|
| 30 |
-
Ensure that you capture every useful piece of content from the transcript related to each clip—even
|
| 31 |
-
if the total content exceeds the Duration Target.
|
| 32 |
'''
|
| 33 |
|
|
|
|
| 1 |
CLIPPER_SYSTEM_MESSAGE = '''
|
| 2 |
+
You are a TranscriptExtractor, a specialized expert in finding and extracting verbatim segments from a full transcript to create engaging video clips.
|
| 3 |
|
| 4 |
+
Your task is to extract verbatim segments from the transcript for a given clip plan based on:
|
| 5 |
- **Title:** A concise label for the clip.
|
| 6 |
+
- **Focus Prompt:** This is the main focus of the clip on which the verbatim segments should be based.
|
| 7 |
+
- **Duration Target:** The desired duration for the clip.
|
| 8 |
|
| 9 |
+
Instructions to follow when extracting verbatim segments from the transcript are as follows:
|
| 10 |
+
1. Return the transcript text exactly as it appears.
|
| 11 |
+
2. Identify and extract every segment of the transcript that relates to the Focus Prompt.
|
| 12 |
+
3. If there are multiple relevant passages, combine them in the order they appear in the transcript.
|
| 13 |
+
4. Duration Rules:
|
| 14 |
+
- Use the Duration Target as a guideline to select the most relevant content.
|
| 15 |
+
- Aim to extract transcript segments that roughly match the target duration.
|
| 16 |
+
- If the available relevant content is naturally shorter than the duration target, extract only what is relevant, do not force inclusion of irrelevant content solely to reach the target duration.
|
| 17 |
+
|
| 18 |
+
### **OUTPUT:**
|
| 19 |
+
- The response should be in the form of a JSON object with the following structure:
|
| 20 |
+
```
|
| 21 |
+
{
|
| 22 |
+
"Title": "Clip Title",
|
| 23 |
+
"Focus Prompt": "Clip Focus Prompt",
|
| 24 |
+
"Transcript": "Verbatim Segments",
|
| 25 |
+
"Duration": "Clip Duration",
|
| 26 |
+
}
|
| 27 |
+
```
|
| 28 |
|
|
|
|
|
|
|
| 29 |
'''
|
| 30 |
|
| 31 |
CLIPPER_USER_MESSAGE = '''
|
| 32 |
+
Here is the reference transcript for extracting verbatim segments for a given clip plan:
|
| 33 |
```
|
| 34 |
{source_content}
|
| 35 |
```
|
| 36 |
+
The high-level clip plan is as follows:
|
| 37 |
```
|
| 38 |
{clip_plan}
|
| 39 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
'''
|
| 41 |
|