Raj Jayendrakumar Muchhala commited on
Commit
96430b4
·
1 Parent(s): bf2bf9c

update to individual clip plan

Browse files
Files changed (2) hide show
  1. app.py +130 -123
  2. clipper_prompts.py +25 -17
app.py CHANGED
@@ -8,31 +8,32 @@ import os
8
  # Set Streamlit layout to wide mode
9
  st.set_page_config(layout="wide")
10
 
11
- st.title("🎬 AI-Powered Content Planner - Clip Creator")
12
- st.markdown("Paste a transcript on the left and view the generated content plan on the right.")
13
 
14
- # Sidebar for OpenAI API Key
15
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
  DEFAULT_MODEL = "gpt-4o-2024-08-06"
17
- DEFAULT_GOAL = "Extract multiple self-contained clips by identifying natural narrative peaks, emotional highlights, and shareable moments (relatable struggles, surprising insights, or friendly debates) in their original sequence, optimizing for standalone engagement potential."
 
 
 
 
18
 
19
- # Sidebar: Model Selection for Stage 1 (Clip Plan Generation)
20
  st.sidebar.subheader("📤 Model for Clip Plan Generation")
21
  clip_plan_model = st.sidebar.selectbox(
22
  "Choose model for clip plan:",
23
  [DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
24
- index=0 # Default selection
25
  )
26
 
27
- # Sidebar: Model Selection for Stage 2 (Transcript Extraction)
28
  st.sidebar.subheader("📥 Model for Transcript Clipper")
29
  extraction_model = st.sidebar.selectbox(
30
  "Choose model for transcript clipper:",
31
  [DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
32
- index=0 # Default selection
33
  )
34
 
35
- # Sidebar: Goal Customization
36
  st.sidebar.subheader("🎯 Customize Prompt")
37
  GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", height=100)
38
  GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL
@@ -43,121 +44,127 @@ if not OPENAI_API_KEY:
43
 
44
  client = OpenAI(api_key=OPENAI_API_KEY)
45
 
46
- # Layout: Left (Input) | Right (Output)
47
- col1, col2 = st.columns([1, 1])
48
 
49
- with col1:
 
50
  st.subheader("📝 Paste Your Transcript")
51
  transcript = st.text_area("Enter the transcript here:", height=400)
52
 
53
- with col2:
54
- st.subheader("📋 Generated Content Plan")
55
- generated_plan_container = st.container()
56
-
57
- generate_button = st.button("Generate Plan")
58
-
59
- if generate_button:
60
- if not transcript.strip():
61
- st.error("❌ Please enter a transcript.")
62
- else:
63
- with st.spinner("⏳ Generating content plan... Please wait."):
64
- try:
65
- # Define prompts
66
- system_prompt = SYSTEM_MESSAGE.format(prompt_goal=GOAL)
67
- user_prompt = USER_MESSAGE.format(source_content=transcript)
68
-
69
- messages = [
70
- {"role": "system", "content": system_prompt},
71
- {"role": "user", "content": user_prompt},
72
- ]
73
-
74
- # Create placeholder for dynamic streaming
75
- generated_plan_container.empty()
76
-
77
- openai_args = {
78
- "model": clip_plan_model,
79
- "messages": messages,
80
- "response_format": {"type": "json_object"},
81
- }
82
- if clip_plan_model == 'o3-mini':
83
- openai_args['reasoning_effort'] = "low"
84
- else:
85
- openai_args["max_tokens"] = 5000
86
- openai_args["temperature"] = 0.45
87
-
88
- # Stream OpenAI API Response
89
- response = client.chat.completions.create(**openai_args)
90
-
91
- # Parse the response
92
- generated_response = response.choices[0].message.content.strip()
93
- content_plan = json.loads(generated_response)
94
-
95
- # Save the content plan in the session state for Stage 2 : Transcript clipper
96
- st.session_state.clip_plan_json = json.dumps(content_plan)
97
-
98
- # Extract key (assuming there is only one key in the JSON response)
99
- plan_key = list(content_plan.keys())[0]
100
- clip_plans = content_plan.get(plan_key, [])
101
-
102
- # Display final output
103
- if clip_plans:
104
- with generated_plan_container.container():
105
- for i, clip in enumerate(clip_plans):
106
- st.markdown(f"### 🎬 Clip {i + 1}")
107
- st.write(f"**Title:** {clip.get('Title', 'N/A')}")
108
- st.write(f"**Focus Prompt:** {clip.get('Focus Prompt', 'N/A')}")
109
- st.write(f"**Duration:** {clip.get('Duration Target', 'N/A')} seconds")
110
- st.markdown("---")
111
- else:
112
- st.error("⚠️ No clips were generated. Try again.")
113
-
114
- except json.JSONDecodeError:
115
- st.error("⚠️ Failed to parse OpenAI response. Try again.")
116
- except Exception as e:
117
- st.error(f"❌ Error: {str(e)}")
118
-
119
- # Stage 2: Extract Transcript Sections for Each Clip
120
- # Only show this if a clip plan was successfully generated
121
- if transcript.strip() and "clip_plan_json" in st.session_state:
122
- st.subheader("✂️ AI Powered Transcript Clipper")
123
- extract_button = st.button("Extract Transcript Sections")
124
-
125
- if extract_button:
126
- with st.spinner("⏳ Extracting transcript sections... Please wait."):
127
- try:
128
- clipper_system_prompt = CLIPPER_SYSTEM_MESSAGE
129
- clipper_user_prompt = CLIPPER_USER_MESSAGE.format(source_content=transcript, clip_plan=st.session_state.clip_plan_json)
130
- clipper_messages = [
131
- {"role": "system", "content": clipper_system_prompt},
132
- {"role": "user", "content": clipper_user_prompt},
133
- ]
134
-
135
- openai_args = {
136
- "model": extraction_model,
137
- "messages": clipper_messages,
138
- "response_format": {"type": "json_object"},
139
- }
140
- if extraction_model == 'o3-mini':
141
- openai_args['reasoning_effort'] = "low"
142
- else:
143
- openai_args["max_tokens"] = 10000
144
- openai_args["temperature"] = 0.45
145
-
146
- # Stream OpenAI API Response
147
- clipper_response = client.chat.completions.create(**openai_args)
148
-
149
- # Parse the extraction response
150
- extraction_generated_response = clipper_response.choices[0].message.content.strip()
151
- transcript_extraction = json.loads(extraction_generated_response)
152
-
153
- # Display the extracted transcript sections
154
- st.markdown("### 📝 Extracted Transcript Sections")
155
- for clip_title, section in transcript_extraction.items():
156
- st.markdown(f"#### 🎬 {clip_title}")
157
- st.write(section)
158
- st.markdown("---")
159
-
160
- except json.JSONDecodeError:
161
- st.error("⚠️ Failed to parse transcript extraction response. Try again.")
162
- except Exception as e:
163
- st.error(f"❌ Error: {str(e)}")
 
 
 
 
 
 
8
  # Set Streamlit layout to wide mode
9
  st.set_page_config(layout="wide")
10
 
11
+ st.title("🎬 AI-Powered Content Planner - Clip Creator")
12
+ st.markdown("Paste a transcript on the left and view the generated content plan and extractions on the right.")
13
 
14
+ # Sidebar for OpenAI API Key and configuration
15
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
  DEFAULT_MODEL = "gpt-4o-2024-08-06"
17
+ DEFAULT_GOAL = (
18
+ "Extract multiple self-contained clips by identifying natural narrative peaks, emotional highlights, "
19
+ "and shareable moments (relatable struggles, surprising insights, or friendly debates) in their original "
20
+ "sequence, optimizing for standalone engagement potential."
21
+ )
22
 
 
23
  st.sidebar.subheader("📤 Model for Clip Plan Generation")
24
  clip_plan_model = st.sidebar.selectbox(
25
  "Choose model for clip plan:",
26
  [DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
27
+ index=0
28
  )
29
 
 
30
  st.sidebar.subheader("📥 Model for Transcript Clipper")
31
  extraction_model = st.sidebar.selectbox(
32
  "Choose model for transcript clipper:",
33
  [DEFAULT_MODEL, "gpt-4o-mini", "o3-mini"],
34
+ index=0
35
  )
36
 
 
37
  st.sidebar.subheader("🎯 Customize Prompt")
38
  GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", height=100)
39
  GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL
 
44
 
45
  client = OpenAI(api_key=OPENAI_API_KEY)
46
 
47
+ # Layout: Two columns - left for transcript, right for clip plans and extraction
48
+ col_transcript, col_output = st.columns([1, 1])
49
 
50
+ # Left Column: Transcript Input
51
+ with col_transcript:
52
  st.subheader("📝 Paste Your Transcript")
53
  transcript = st.text_area("Enter the transcript here:", height=400)
54
 
55
+ # Right Column: Clip Plan Generation and Extraction
56
+ with col_output:
57
+ st.subheader("📋 Generated Clip Plans")
58
+
59
+ # Button to generate clip plans from the transcript
60
+ if st.button("Generate Plan"):
61
+ if not transcript.strip():
62
+ st.error("❌ Please enter a transcript.")
63
+ else:
64
+ with st.spinner("⏳ Generating content plan... Please wait."):
65
+ try:
66
+ # Prepare prompts for clip plan generation
67
+ system_prompt = SYSTEM_MESSAGE.format(prompt_goal=GOAL)
68
+ user_prompt = USER_MESSAGE.format(source_content=transcript)
69
+ messages = [
70
+ {"role": "system", "content": system_prompt},
71
+ {"role": "user", "content": user_prompt},
72
+ ]
73
+
74
+ openai_args = {
75
+ "model": clip_plan_model,
76
+ "messages": messages,
77
+ "response_format": {"type": "json_object"},
78
+ }
79
+ if clip_plan_model == "o3-mini":
80
+ openai_args["reasoning_effort"] = "low"
81
+ else:
82
+ openai_args["max_tokens"] = 5000
83
+ openai_args["temperature"] = 0.45
84
+
85
+ response = client.chat.completions.create(**openai_args)
86
+ generated_response = response.choices[0].message.content.strip()
87
+ content_plan = json.loads(generated_response)
88
+
89
+ # Assume the response JSON has a single key containing a list of clip plans
90
+ plan_key = list(content_plan.keys())[0]
91
+ clip_plans = content_plan.get(plan_key, [])
92
+
93
+ # Save clip plans in session state so they persist
94
+ st.session_state.clip_plans = clip_plans
95
+
96
+ # Clear any previous extraction outputs
97
+ for i in range(len(clip_plans)):
98
+ st.session_state.pop(f"extracted_clip_{i}", None)
99
+ except json.JSONDecodeError:
100
+ st.error("⚠️ Failed to parse OpenAI response. Try again.")
101
+ except Exception as e:
102
+ st.error(f"❌ Error: {str(e)}")
103
+
104
+ # Display clip plans if they exist in session state
105
+ if "clip_plans" in st.session_state:
106
+ # We'll work with a reference to the clip plans list
107
+ updated_clip_plans = st.session_state.clip_plans
108
+
109
+ for i, clip in enumerate(updated_clip_plans):
110
+ # Each clip is rendered in an expander with editable fields
111
+ with st.expander(f"🎬 Clip {i + 1}", expanded=True):
112
+ new_title = st.text_input("Title", value=clip.get("Title", "N/A"), key=f"title_{i}")
113
+ new_focus = st.text_area("Focus Prompt", value=clip.get("Focus Prompt", "N/A"), key=f"focus_{i}")
114
+ new_duration = st.number_input(
115
+ "Duration Target (seconds)",
116
+ value=float(clip.get("Duration Target", 0)),
117
+ key=f"duration_{i}",
118
+ step=1.0
119
+ )
120
+
121
+ # Update the clip plan with the edited values
122
+ updated_clip_plans[i]["Title"] = new_title
123
+ updated_clip_plans[i]["Focus Prompt"] = new_focus
124
+ updated_clip_plans[i]["Duration Target"] = new_duration
125
+
126
+ # Button to delete this clip plan
127
+ if st.button("Delete Clip", key=f"delete_{i}"):
128
+ updated_clip_plans.pop(i)
129
+ st.session_state.clip_plans = updated_clip_plans
130
+ st.experimental_rerun()
131
+
132
+ # Button for transcript extraction for this clip
133
+ if st.button("Extract Transcript", key=f"extract_{i}"):
134
+ with st.spinner(" Extracting transcript section... Please wait."):
135
+ try:
136
+ # Send only the specific (and possibly edited) clip plan to the extractor
137
+ single_clip_json = json.dumps(updated_clip_plans[i])
138
+ clipper_user_prompt = CLIPPER_USER_MESSAGE.format(
139
+ source_content=transcript,
140
+ clip_plan=single_clip_json
141
+ )
142
+ clipper_messages = [
143
+ {"role": "system", "content": CLIPPER_SYSTEM_MESSAGE},
144
+ {"role": "user", "content": clipper_user_prompt},
145
+ ]
146
+
147
+ extraction_args = {
148
+ "model": extraction_model,
149
+ "messages": clipper_messages,
150
+ "response_format": {"type": "json_object"},
151
+ }
152
+ if extraction_model == "o3-mini":
153
+ extraction_args["reasoning_effort"] = "low"
154
+ else:
155
+ extraction_args["max_tokens"] = 10000
156
+ extraction_args["temperature"] = 0.45
157
+
158
+ clipper_response = client.chat.completions.create(**extraction_args)
159
+ extraction_response = clipper_response.choices[0].message.content.strip()
160
+ extracted_clip = json.loads(extraction_response)
161
+
162
+ # Save the extraction result for this clip in session state
163
+ st.session_state[f"extracted_clip_{i}"] = extracted_clip
164
+ except Exception as e:
165
+ st.error(f"❌ Extraction error: {str(e)}")
166
+
167
+ # Display extraction output if available
168
+ if f"extracted_clip_{i}" in st.session_state:
169
+ st.markdown("#### 📝 Extracted Transcript Section:")
170
+ st.write(st.session_state[f"extracted_clip_{i}"])
clipper_prompts.py CHANGED
@@ -1,33 +1,41 @@
1
  CLIPPER_SYSTEM_MESSAGE = '''
2
- You are a TranscriptExtractor, a specialist in processing long-form transcripts. Your role is to extract verbatim segments from a full transcript that directly correspond to each clip concept specified in a high-level clip plan.
3
 
4
- Each clip concept includes:
5
  - **Title:** A concise label for the clip.
6
- - **Focus Prompt:** A description of the key idea or moment to focus on.
7
- - **Duration Target:** The estimated length of the clip in seconds (for context only).
8
 
9
- When extracting transcript sections, follow these guidelines:
10
- 1. **Verbatim Extraction:** Return the text exactly as it appears in the transcript.
11
- 2. **Comprehensive Relevance:** Identify and extract all segments of the transcript that are clearly related to the clip’s Focus Prompt and Title. If multiple passages are relevant, combine them into one cohesive section. Do not omit any useful content—even if it means the extracted text is longer than the Duration Target.
12
- 3. **Order:** Present the extracted content in the order it appears in the transcript.
13
- 4. **Output Format:** Structure your answer as a JSON object. Each key should be the clip Title, and its value should be an object with a single key `"transcript"` containing the full extracted text for that clip.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- Focus solely on capturing every piece of relevant content that supports the clip concept.
16
- Do not add any extra commentary or paraphrasing.
17
  '''
18
 
19
  CLIPPER_USER_MESSAGE = '''
20
- Here is the reference transcript for extracting verbatim segments for each clip concept:
21
  ```
22
  {source_content}
23
  ```
24
- The high-level clip plan generated from this transcript includes the following clip concepts (in JSON format)::
25
  ```
26
  {clip_plan}
27
  ```
28
-
29
- Using the Focus Prompt and Title from each clip in the clip plan, extract all relevant portions of the transcript that directly support that clip concept.
30
- Ensure that you capture every useful piece of content from the transcript related to each clip—even
31
- if the total content exceeds the Duration Target.
32
  '''
33
 
 
1
  CLIPPER_SYSTEM_MESSAGE = '''
2
+ You are a TranscriptExtractor, a specialized expert in finding and extracting verbatim segments from a full transcript to create engaging video clips.
3
 
4
+ Your task is to extract verbatim segments from the transcript for a given clip plan based on:
5
  - **Title:** A concise label for the clip.
6
+ - **Focus Prompt:** This is the main focus of the clip on which the verbatim segments should be based.
7
+ - **Duration Target:** The desired duration for the clip.
8
 
9
+ Instructions to follow when extracting verbatim segments from the transcript are as follows:
10
+ 1. Return the transcript text exactly as it appears.
11
+ 2. Identify and extract every segment of the transcript that relates to the Focus Prompt.
12
+ 3. If there are multiple relevant passages, combine them in the order they appear in the transcript.
13
+ 4. Duration Rules:
14
+ - Use the Duration Target as a guideline to select the most relevant content.
15
+ - Aim to extract transcript segments that roughly match the target duration.
16
+ - If the available relevant content is naturally shorter than the duration target, extract only what is relevant, do not force inclusion of irrelevant content solely to reach the target duration.
17
+
18
+ ### **OUTPUT:**
19
+ - The response should be in the form of a JSON object with the following structure:
20
+ ```
21
+ {
22
+ "Title": "Clip Title",
23
+ "Focus Prompt": "Clip Focus Prompt",
24
+ "Transcript": "Verbatim Segments",
25
+ "Duration": "Clip Duration",
26
+ }
27
+ ```
28
 
 
 
29
  '''
30
 
31
  CLIPPER_USER_MESSAGE = '''
32
+ Here is the reference transcript for extracting verbatim segments for a given clip plan:
33
  ```
34
  {source_content}
35
  ```
36
+ The high-level clip plan is as follows:
37
  ```
38
  {clip_plan}
39
  ```
 
 
 
 
40
  '''
41