Spaces:

rm8630
/

ai-transcript-clipper

Sleeping

Raj Jayendrakumar Muchhala

add mixtral support

6f7ca43 10 months ago

9.83 kB

	import streamlit as st
	from openai import OpenAI
	from clipper_prompts import CLIPPER_SYSTEM_MESSAGE, CLIPPER_USER_MESSAGE
	from prompts import SYSTEM_MESSAGE, USER_MESSAGE
	import json
	import os

	# Set Streamlit layout to wide mode
	st.set_page_config(layout="wide")

	st.title("🎬 AI-Powered Content Planner - Clip Creator")
	st.markdown("Paste a transcript on the left and view the generated content plan and extractions on the right.")

	# List of available models
	OPENAI_MODELS = ["gpt-4o", "gpt-4o-mini", "o3-mini"]
	GROQ_MODELS = ["llama-3.3-70b-specdec", "llama-3.3-70b-versatile", "mixtral-8x7b-32768"]

	# Sidebar for OpenAI API Key and configuration
	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	DEFAULT_MODEL = "llama-3.3-70b-specdec"
	DEFAULT_GOAL = (
	"Extract multiple self-contained clips by identifying natural narrative peaks, emotional highlights, "
	"and shareable moments (relatable struggles, surprising insights, or friendly debates) in their original "
	"sequence, optimizing for standalone engagement potential."
	)

	st.sidebar.subheader("📤 Model for Clip Plan Generation")
	clip_plan_model = st.sidebar.selectbox(
	"Choose model for clip plan:",
	GROQ_MODELS + OPENAI_MODELS,
	index=0
	)

	st.sidebar.subheader("📥 Model for Transcript Clipper")
	extraction_model = st.sidebar.selectbox(
	"Choose model for transcript clipper:",
	GROQ_MODELS + OPENAI_MODELS,
	index=0
	)

	# Assign the correct URL based on the selected model
	if clip_plan_model in GROQ_MODELS:
	plan_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY"))
	else:
	plan_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

	if extraction_model in GROQ_MODELS:
	extraction_client = OpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.environ.get("GROQ_API_KEY"))
	else:
	extraction_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

	st.sidebar.subheader("🎯 Customize Prompt")
	GOAL = st.sidebar.text_area("Specify specific prompt to extract clips (optional):", value=DEFAULT_GOAL, height=100)
	GOAL = GOAL.strip() if GOAL.strip() else DEFAULT_GOAL

	if not OPENAI_API_KEY:
	st.warning("⚠️ Please enter your OpenAI API key.")
	st.stop()

	# Layout: Two columns - left for transcript, right for clip plans and extraction
	col_transcript, col_output = st.columns([1, 1])

	# Left Column: Transcript Input
	with col_transcript:
	st.subheader("📝 Paste Your Transcript")
	transcript = st.text_area("Enter the transcript here:", height=400)

	# Add reference link below the transcript text box
	st.markdown("---")
	st.markdown(
	"""
	<div style="font-size:18px; font-weight:bold; margin-top:10px;">
	Need a transcript? Use <a href="https://huggingface.co/spaces/sanchit-gandhi/whisper-jax-spaces" target="_blank" style="color:#007bff; text-decoration:none;">
	OpenAI Whisper on Hugging Face</a> to generate one from your audio or video.
	</div>
	""",
	unsafe_allow_html=True
	)

	st.markdown("---")
	st.subheader("🎥 Video/Audio Upload & Playback")

	media_file = st.file_uploader("Upload a video or audio file", type=["mp4", "mov", "avi", "mp3", "wav", "ogg"])
	if media_file is not None:
	# Detect media type and play accordingly
	if media_file.type.startswith("video"):
	st.video(media_file)
	elif media_file.type.startswith("audio"):
	st.audio(media_file)


	# Right Column: Clip Plan Generation and Extraction
	with col_output:
	st.subheader("📋 Generated Clip Plans")

	# Button to generate clip plans from the transcript
	if st.button("Generate Plan"):
	if not transcript.strip():
	st.error("❌ Please enter a transcript.")
	else:
	with st.spinner("⏳ Generating content plan... Please wait."):
	try:
	# Prepare prompts for clip plan generation
	system_prompt = SYSTEM_MESSAGE.format(prompt_goal=GOAL)
	user_prompt = USER_MESSAGE.format(source_content=transcript)
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt},
	]

	openai_args = {
	"model": clip_plan_model,
	"messages": messages,
	"response_format": {"type": "json_object"},
	}
	if clip_plan_model == "o3-mini":
	openai_args["reasoning_effort"] = "low"
	else:
	openai_args["max_tokens"] = 5000
	openai_args["temperature"] = 0.45

	response = plan_client.chat.completions.create(**openai_args)
	generated_response = response.choices[0].message.content.strip()
	content_plan = json.loads(generated_response)

	# Assume the response JSON has a single key containing a list of clip plans
	plan_key = list(content_plan.keys())[0]
	clip_plans = content_plan.get(plan_key, [])

	# Save clip plans in session state so they persist
	st.session_state.clip_plans = clip_plans

	# Clear any previous extraction outputs
	for i in range(len(clip_plans)):
	st.session_state.pop(f"extracted_clip_{i}", None)
	except json.JSONDecodeError:
	st.error("⚠️ Failed to parse OpenAI response. Try again.")
	except Exception as e:
	st.error(f"❌ Error: {str(e)}")

	# Display clip plans if they exist in session state
	if "clip_plans" in st.session_state:
	# We'll work with a reference to the clip plans list
	updated_clip_plans = st.session_state.clip_plans

	for i, clip in enumerate(updated_clip_plans):
	# Each clip is rendered in an expander with editable fields
	with st.expander(f"🎬 Clip {i + 1}", expanded=True):
	new_title = st.text_input("Title", value=clip.get("Title", "N/A"), key=f"title_{i}")
	new_focus = st.text_area("Focus Prompt", value=clip.get("Focus Prompt", "N/A"), key=f"focus_{i}")
	new_duration = st.number_input(
	"Duration Target (seconds)",
	value=float(clip.get("Duration Target", 0)),
	key=f"duration_{i}",
	step=1.0
	)

	# Update the clip plan with the edited values
	updated_clip_plans[i]["Title"] = new_title
	updated_clip_plans[i]["Focus Prompt"] = new_focus
	updated_clip_plans[i]["Duration Target"] = new_duration

	# Button to delete this clip plan
	if st.button("Delete Clip", key=f"delete_{i}"):
	# Create a copy of the clip plans list
	updated_clip_plans = st.session_state.clip_plans.copy()
	# Remove the clip at index `i`
	del updated_clip_plans[i]
	# Update session state with the modified list
	st.session_state.clip_plans = updated_clip_plans
	# Rerun the app to reflect the changes
	st.rerun()

	# Button for transcript extraction for this clip
	if st.button("Extract Transcript", key=f"extract_{i}"):
	with st.spinner("⏳ Extracting transcript section... Please wait."):
	try:
	# Send only the specific (and possibly edited) clip plan to the extractor
	single_clip_json = json.dumps(updated_clip_plans[i])
	clipper_user_prompt = CLIPPER_USER_MESSAGE.format(
	source_content=transcript,
	clip_plan=single_clip_json
	)
	clipper_messages = [
	{"role": "system", "content": CLIPPER_SYSTEM_MESSAGE},
	{"role": "user", "content": clipper_user_prompt},
	]

	extraction_args = {
	"model": extraction_model,
	"messages": clipper_messages,
	"response_format": {"type": "json_object"},
	}
	if extraction_model == "o3-mini":
	extraction_args["reasoning_effort"] = "low"
	else:
	extraction_args["max_tokens"] = 10000
	extraction_args["temperature"] = 0.45

	clipper_response = extraction_client.chat.completions.create(**extraction_args)
	extraction_response = clipper_response.choices[0].message.content.strip()
	extracted_clip = json.loads(extraction_response)

	# Save the extraction result for this clip in session state
	st.session_state[f"extracted_clip_{i}"] = extracted_clip
	except Exception as e:
	st.error(f"❌ Extraction error: {str(e)}")

	# Display extraction output if available
	if f"extracted_clip_{i}" in st.session_state:
	st.markdown("#### 📝 Extracted Transcript Section:")
	st.write(st.session_state[f"extracted_clip_{i}"])