File size: 9,829 Bytes
bf2bf9c
 
 
 
 
 
 
 
 
 
96430b4
 
bf2bf9c
2ef1f15
 
6f7ca43
2ef1f15
96430b4
bf2bf9c
2ef1f15
96430b4
 
 
 
 
bf2bf9c
 
 
 
2ef1f15
96430b4
bf2bf9c
 
 
 
 
2ef1f15
96430b4
bf2bf9c
 
2ef1f15
 
 
 
 
 
 
 
 
 
 
bf2bf9c
1f9a012
bf2bf9c
 
 
 
 
 
569dd99
 
 
 
 
e033e0f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18d8a7f
 
 
 
569dd99
4377407
569dd99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ef1f15
569dd99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
debb8ec
 
 
 
 
569dd99
debb8ec
 
569dd99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ef1f15
569dd99
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import streamlit as st
from openai import OpenAI
from clipper_prompts import CLIPPER_SYSTEM_MESSAGE, CLIPPER_USER_MESSAGE
from prompts import SYSTEM_MESSAGE, USER_MESSAGE
import json
import os

# Set Streamlit layout to wide mode
st.set_page_config(layout="wide")

st.title("🎬 AI-Powered Content Planner - Clip Creator")
st.markdown("Paste a transcript on the left and view the generated content plan and extractions on the right.")

# List of available models
OPENAI_MODELS = ["gpt-4o", "gpt-4o-mini", "o3-mini"]
GROQ_MODELS = ["llama-3.3-70b-specdec", "llama-3.3-70b-versatile", "mixtral-8x7b-32768"]

# Sidebar for OpenAI API Key and configuration
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
DEFAULT_MODEL = "llama-3.3-70b-specdec"
DEFAULT_GOAL = (
    "Extract multiple self-contained clips by identifying natural narrative peaks, emotional highlights, "
    "and shareable moments (relatable struggles, surprising insights, or friendly debates) in their original "
    "sequence, optimizing for standalone engagement potential."
)

st.sidebar.subheader("πŸ“€ Model for Clip Plan Generation")
clip_plan_model = st.sidebar.selectbox(
    "Choose model for clip plan:",
    GROQ_MODELS + OPENAI_MODELS,
    index=0
)

st.sidebar.subheader("πŸ“₯ Model for Transcript Clipper")
extraction_model = st.sidebar.selectbox(
    "Choose model for transcript clipper:",
    GROQ_MODELS + OPENAI_MODELS,
    index=0
)

# Assign the correct URL based on the selected model
if clip_plan_model in GROQ_MODELS:
    plan_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY"))
else:
    plan_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

if extraction_model in GROQ_MODELS:
    extraction_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY"))
else:
    extraction_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

st.sidebar.subheader("🎯 Customize Prompt")
GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", value=DEFAULT_GOAL, height=100)
GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL

if not OPENAI_API_KEY:
    st.warning("⚠️ Please enter your OpenAI API key.")
    st.stop()

# Layout: Two columns - left for transcript, right for clip plans and extraction
col_transcript, col_output = st.columns([1, 1])

# Left Column: Transcript Input
with col_transcript:
    st.subheader("πŸ“ Paste Your Transcript")
    transcript = st.text_area("Enter the transcript here:", height=400)

    # Add reference link below the transcript text box
    st.markdown("---")
    st.markdown(
        """
        <div style="font-size:18px; font-weight:bold; margin-top:10px;">
            Need a transcript? Use <a href="https://huggingface.co/spaces/sanchit-gandhi/whisper-jax-spaces" target="_blank" style="color:#007bff; text-decoration:none;">
            OpenAI Whisper on Hugging Face</a> to generate one from your audio or video.
        </div>
        """,
        unsafe_allow_html=True
    )

    st.markdown("---")
    st.subheader("πŸŽ₯ Video/Audio Upload & Playback")

    media_file = st.file_uploader("Upload a video or audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"])
    if media_file is not None:
        # Detect media type and play accordingly
        if media_file.type.startswith("video"):
            st.video(media_file)
        elif media_file.type.startswith("audio"):
            st.audio(media_file)


# Right Column: Clip Plan Generation and Extraction
with col_output:
    st.subheader("πŸ“‹ Generated Clip Plans")

    # Button to generate clip plans from the transcript
    if st.button("Generate Plan"):
        if not transcript.strip():
            st.error("❌ Please enter a transcript.")
        else:
            with st.spinner("⏳ Generating content plan... Please wait."):
                try:
                    # Prepare prompts for clip plan generation
                    system_prompt = SYSTEM_MESSAGE.format(prompt_goal=GOAL)
                    user_prompt = USER_MESSAGE.format(source_content=transcript)
                    messages = [
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt},
                    ]

                    openai_args = {
                        "model": clip_plan_model,
                        "messages": messages,
                        "response_format": {"type": "json_object"},
                    }
                    if clip_plan_model == "o3-mini":
                        openai_args["reasoning_effort"] = "low"
                    else:
                        openai_args["max_tokens"] = 5000
                        openai_args["temperature"] = 0.45

                    response = plan_client.chat.completions.create(**openai_args)
                    generated_response = response.choices[0].message.content.strip()
                    content_plan = json.loads(generated_response)

                    # Assume the response JSON has a single key containing a list of clip plans
                    plan_key = list(content_plan.keys())[0]
                    clip_plans = content_plan.get(plan_key, [])

                    # Save clip plans in session state so they persist
                    st.session_state.clip_plans = clip_plans

                    # Clear any previous extraction outputs
                    for i in range(len(clip_plans)):
                        st.session_state.pop(f"extracted_clip_{i}", None)
                except json.JSONDecodeError:
                    st.error("⚠️ Failed to parse OpenAI response. Try again.")
                except Exception as e:
                    st.error(f"❌ Error: {str(e)}")

    # Display clip plans if they exist in session state
    if "clip_plans" in st.session_state:
        # We'll work with a reference to the clip plans list
        updated_clip_plans = st.session_state.clip_plans

        for i, clip in enumerate(updated_clip_plans):
            # Each clip is rendered in an expander with editable fields
            with st.expander(f"🎬 Clip {i + 1}", expanded=True):
                new_title = st.text_input("Title", value=clip.get("Title", "N/A"), key=f"title_{i}")
                new_focus = st.text_area("Focus Prompt", value=clip.get("Focus Prompt", "N/A"), key=f"focus_{i}")
                new_duration = st.number_input(
                    "Duration Target (seconds)",
                    value=float(clip.get("Duration Target", 0)),
                    key=f"duration_{i}",
                    step=1.0
                )

                # Update the clip plan with the edited values
                updated_clip_plans[i]["Title"] = new_title
                updated_clip_plans[i]["Focus Prompt"] = new_focus
                updated_clip_plans[i]["Duration Target"] = new_duration

                # Button to delete this clip plan
                if st.button("Delete Clip", key=f"delete_{i}"):
                    # Create a copy of the clip plans list
                    updated_clip_plans = st.session_state.clip_plans.copy()
                    # Remove the clip at index `i`
                    del updated_clip_plans[i]
                    # Update session state with the modified list
                    st.session_state.clip_plans = updated_clip_plans
                    # Rerun the app to reflect the changes
                    st.rerun()

                # Button for transcript extraction for this clip
                if st.button("Extract Transcript", key=f"extract_{i}"):
                    with st.spinner("⏳ Extracting transcript section... Please wait."):
                        try:
                            # Send only the specific (and possibly edited) clip plan to the extractor
                            single_clip_json = json.dumps(updated_clip_plans[i])
                            clipper_user_prompt = CLIPPER_USER_MESSAGE.format(
                                source_content=transcript,
                                clip_plan=single_clip_json
                            )
                            clipper_messages = [
                                {"role": "system", "content": CLIPPER_SYSTEM_MESSAGE},
                                {"role": "user", "content": clipper_user_prompt},
                            ]

                            extraction_args = {
                                "model": extraction_model,
                                "messages": clipper_messages,
                                "response_format": {"type": "json_object"},
                            }
                            if extraction_model == "o3-mini":
                                extraction_args["reasoning_effort"] = "low"
                            else:
                                extraction_args["max_tokens"] = 10000
                                extraction_args["temperature"] = 0.45

                            clipper_response = extraction_client.chat.completions.create(**extraction_args)
                            extraction_response = clipper_response.choices[0].message.content.strip()
                            extracted_clip = json.loads(extraction_response)

                            # Save the extraction result for this clip in session state
                            st.session_state[f"extracted_clip_{i}"] = extracted_clip
                        except Exception as e:
                            st.error(f"❌ Extraction error: {str(e)}")

                # Display extraction output if available
                if f"extracted_clip_{i}" in st.session_state:
                    st.markdown("#### πŸ“ Extracted Transcript Section:")
                    st.write(st.session_state[f"extracted_clip_{i}"])