Spaces:
Sleeping
Sleeping
File size: 9,829 Bytes
bf2bf9c 96430b4 bf2bf9c 2ef1f15 6f7ca43 2ef1f15 96430b4 bf2bf9c 2ef1f15 96430b4 bf2bf9c 2ef1f15 96430b4 bf2bf9c 2ef1f15 96430b4 bf2bf9c 2ef1f15 bf2bf9c 1f9a012 bf2bf9c 569dd99 e033e0f 18d8a7f 569dd99 4377407 569dd99 2ef1f15 569dd99 debb8ec 569dd99 debb8ec 569dd99 2ef1f15 569dd99 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
import streamlit as st
from openai import OpenAI
from clipper_prompts import CLIPPER_SYSTEM_MESSAGE, CLIPPER_USER_MESSAGE
from prompts import SYSTEM_MESSAGE, USER_MESSAGE
import json
import os
# Set Streamlit layout to wide mode
st.set_page_config(layout="wide")
st.title("π¬ AI-Powered Content Planner - Clip Creator")
st.markdown("Paste a transcript on the left and view the generated content plan and extractions on the right.")
# List of available models
OPENAI_MODELS = ["gpt-4o", "gpt-4o-mini", "o3-mini"]
GROQ_MODELS = ["llama-3.3-70b-specdec", "llama-3.3-70b-versatile", "mixtral-8x7b-32768"]
# Sidebar for OpenAI API Key and configuration
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
DEFAULT_MODEL = "llama-3.3-70b-specdec"
DEFAULT_GOAL = (
"Extract multiple self-contained clips by identifying natural narrative peaks, emotional highlights, "
"and shareable moments (relatable struggles, surprising insights, or friendly debates) in their original "
"sequence, optimizing for standalone engagement potential."
)
st.sidebar.subheader("π€ Model for Clip Plan Generation")
clip_plan_model = st.sidebar.selectbox(
"Choose model for clip plan:",
GROQ_MODELS + OPENAI_MODELS,
index=0
)
st.sidebar.subheader("π₯ Model for Transcript Clipper")
extraction_model = st.sidebar.selectbox(
"Choose model for transcript clipper:",
GROQ_MODELS + OPENAI_MODELS,
index=0
)
# Assign the correct URL based on the selected model
if clip_plan_model in GROQ_MODELS:
plan_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY"))
else:
plan_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
if extraction_model in GROQ_MODELS:
extraction_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY"))
else:
extraction_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
st.sidebar.subheader("π― Customize Prompt")
GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", value=DEFAULT_GOAL, height=100)
GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL
if not OPENAI_API_KEY:
st.warning("β οΈ Please enter your OpenAI API key.")
st.stop()
# Layout: Two columns - left for transcript, right for clip plans and extraction
col_transcript, col_output = st.columns([1, 1])
# Left Column: Transcript Input
with col_transcript:
st.subheader("π Paste Your Transcript")
transcript = st.text_area("Enter the transcript here:", height=400)
# Add reference link below the transcript text box
st.markdown("---")
st.markdown(
"""
<div style="font-size:18px; font-weight:bold; margin-top:10px;">
Need a transcript? Use <a href="https://huggingface.co/spaces/sanchit-gandhi/whisper-jax-spaces" target="_blank" style="color:#007bff; text-decoration:none;">
OpenAI Whisper on Hugging Face</a> to generate one from your audio or video.
</div>
""",
unsafe_allow_html=True
)
st.markdown("---")
st.subheader("π₯ Video/Audio Upload & Playback")
media_file = st.file_uploader("Upload a video or audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"])
if media_file is not None:
# Detect media type and play accordingly
if media_file.type.startswith("video"):
st.video(media_file)
elif media_file.type.startswith("audio"):
st.audio(media_file)
# Right Column: Clip Plan Generation and Extraction
with col_output:
st.subheader("π Generated Clip Plans")
# Button to generate clip plans from the transcript
if st.button("Generate Plan"):
if not transcript.strip():
st.error("β Please enter a transcript.")
else:
with st.spinner("β³ Generating content plan... Please wait."):
try:
# Prepare prompts for clip plan generation
system_prompt = SYSTEM_MESSAGE.format(prompt_goal=GOAL)
user_prompt = USER_MESSAGE.format(source_content=transcript)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
openai_args = {
"model": clip_plan_model,
"messages": messages,
"response_format": {"type": "json_object"},
}
if clip_plan_model == "o3-mini":
openai_args["reasoning_effort"] = "low"
else:
openai_args["max_tokens"] = 5000
openai_args["temperature"] = 0.45
response = plan_client.chat.completions.create(**openai_args)
generated_response = response.choices[0].message.content.strip()
content_plan = json.loads(generated_response)
# Assume the response JSON has a single key containing a list of clip plans
plan_key = list(content_plan.keys())[0]
clip_plans = content_plan.get(plan_key, [])
# Save clip plans in session state so they persist
st.session_state.clip_plans = clip_plans
# Clear any previous extraction outputs
for i in range(len(clip_plans)):
st.session_state.pop(f"extracted_clip_{i}", None)
except json.JSONDecodeError:
st.error("β οΈ Failed to parse OpenAI response. Try again.")
except Exception as e:
st.error(f"β Error: {str(e)}")
# Display clip plans if they exist in session state
if "clip_plans" in st.session_state:
# We'll work with a reference to the clip plans list
updated_clip_plans = st.session_state.clip_plans
for i, clip in enumerate(updated_clip_plans):
# Each clip is rendered in an expander with editable fields
with st.expander(f"π¬ Clip {i + 1}", expanded=True):
new_title = st.text_input("Title", value=clip.get("Title", "N/A"), key=f"title_{i}")
new_focus = st.text_area("Focus Prompt", value=clip.get("Focus Prompt", "N/A"), key=f"focus_{i}")
new_duration = st.number_input(
"Duration Target (seconds)",
value=float(clip.get("Duration Target", 0)),
key=f"duration_{i}",
step=1.0
)
# Update the clip plan with the edited values
updated_clip_plans[i]["Title"] = new_title
updated_clip_plans[i]["Focus Prompt"] = new_focus
updated_clip_plans[i]["Duration Target"] = new_duration
# Button to delete this clip plan
if st.button("Delete Clip", key=f"delete_{i}"):
# Create a copy of the clip plans list
updated_clip_plans = st.session_state.clip_plans.copy()
# Remove the clip at index `i`
del updated_clip_plans[i]
# Update session state with the modified list
st.session_state.clip_plans = updated_clip_plans
# Rerun the app to reflect the changes
st.rerun()
# Button for transcript extraction for this clip
if st.button("Extract Transcript", key=f"extract_{i}"):
with st.spinner("β³ Extracting transcript section... Please wait."):
try:
# Send only the specific (and possibly edited) clip plan to the extractor
single_clip_json = json.dumps(updated_clip_plans[i])
clipper_user_prompt = CLIPPER_USER_MESSAGE.format(
source_content=transcript,
clip_plan=single_clip_json
)
clipper_messages = [
{"role": "system", "content": CLIPPER_SYSTEM_MESSAGE},
{"role": "user", "content": clipper_user_prompt},
]
extraction_args = {
"model": extraction_model,
"messages": clipper_messages,
"response_format": {"type": "json_object"},
}
if extraction_model == "o3-mini":
extraction_args["reasoning_effort"] = "low"
else:
extraction_args["max_tokens"] = 10000
extraction_args["temperature"] = 0.45
clipper_response = extraction_client.chat.completions.create(**extraction_args)
extraction_response = clipper_response.choices[0].message.content.strip()
extracted_clip = json.loads(extraction_response)
# Save the extraction result for this clip in session state
st.session_state[f"extracted_clip_{i}"] = extracted_clip
except Exception as e:
st.error(f"β Extraction error: {str(e)}")
# Display extraction output if available
if f"extracted_clip_{i}" in st.session_state:
st.markdown("#### π Extracted Transcript Section:")
st.write(st.session_state[f"extracted_clip_{i}"]) |