Spaces:

ombhojane
/

gpt4v

Runtime error

App Files Files Community

gpt4v / app.py

ombhojane

trying

300bcc0 over 2 years ago

raw

history blame contribute delete

5.69 kB

	from dotenv import load_dotenv
	from IPython.display import display, Image, Audio
	from moviepy.editor import VideoFileClip, AudioFileClip
	from moviepy.audio.io.AudioFileClip import AudioFileClip

	import cv2 # We're using OpenCV to read video
	import base64
	import time
	import io
	import openai
	import os
	import requests

	import streamlit as st
	import tempfile
	import numpy as np

	load_dotenv()


	def video_to_frames(video_file):
	# Save the uploaded video file to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
	tmpfile.write(video_file.read())
	video_filename = tmpfile.name

	video_duration = VideoFileClip(video_filename).duration

	video = cv2.VideoCapture(video_filename)
	base64Frames = []

	while video.isOpened():
	success, frame = video.read()
	if not success:
	break
	_, buffer = cv2.imencode(".jpg", frame)
	base64Frames.append(base64.b64encode(buffer).decode("utf-8"))

	video.release()
	print(len(base64Frames), "frames read.")
	return base64Frames, video_filename, video_duration


	def frames_to_story(frames, prompt):

	try:
	# Format prompt with Markdown
	prompt = f"Summary: {prompt}"

	messages = [{
	"role": "user",
	"content": [prompt, *frames]
	}]

	params = {
	"model": "gpt-3.5-turbo",
	"messages": messages,
	"max_tokens": 100,
	}

	response = openai.Chat.create(**params)

	story = response.choices[0].message.content

	# Generate audio stream from text
	audio_bytes = text_to_audio(story)

	# Display word count
	word_count = len(story.split())
	st.write(f"Words: {word_count}")

	return story, audio_bytes

	except Exception as e:
	st.error("Error generating story:", e)
	return None, None



	def text_to_audio(text):
	response = requests.post(
	"https://api.openai.com/v1/audio/speech",
	headers={
	"Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
	},
	json={
	"model": "tts-1",
	"input": text,
	"voice": "onyx",
	},
	)

	# audio_file_path = "output_audio.wav"
	# with open(audio_file_path, "wb") as audio_file:
	# for chunk in response.iter_content(chunk_size=1024 * 1024):
	# audio_file.write(chunk)

	# # To play the audio in Jupyter after saving
	# Audio(audio_file_path)
	# Check if the request was successful
	if response.status_code != 200:
	raise Exception("Request failed with status code")
	# ...
	# Create an in-memory bytes buffer
	audio_bytes_io = io.BytesIO()

	# Write audio data to the in-memory bytes buffer
	for chunk in response.iter_content(chunk_size=1024 * 1024):
	audio_bytes_io.write(chunk)

	# Important: Seek to the start of the BytesIO buffer before returning
	audio_bytes_io.seek(0)

	# Save audio to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmpfile:
	for chunk in response.iter_content(chunk_size=1024 * 1024):
	tmpfile.write(chunk)
	audio_filename = tmpfile.name

	return audio_filename, audio_bytes_io


	def merge_audio_video(video_filename, audio_filename, output_filename):
	print("Merging audio and video...")
	print("Video filename:", video_filename)
	print("Audio filename:", audio_filename)

	# Load the video file
	video_clip = VideoFileClip(video_filename)

	# Load the audio file
	audio_clip = AudioFileClip(audio_filename)

	# Set the audio of the video clip as the audio file
	final_clip = video_clip.set_audio(audio_clip)

	# Write the result to a file (without audio)
	final_clip.write_videofile(
	output_filename, codec='libx264', audio_codec='aac')

	# Close the clips
	video_clip.close()
	audio_clip.close()

	# Return the path to the new video file
	return output_filename


	def main():
	st.set_page_config(page_title="Video voice over", page_icon=":bird:")

	st.header("Video voice over :bird:")
	uploaded_file = st.file_uploader("Choose a file")

	if uploaded_file is not None:
	st.video(uploaded_file)
	prompt = st.text_area(
	"Prompt", value="These are frames of a quick product demo walkthrough. Create a short voiceover script that outline the key actions to take, that can be used along this product demo.")

	if st.button('Generate', type="primary") and uploaded_file is not None:
	with st.spinner('Processing...'):
	base64Frames, video_filename, video_duration = video_to_frames(
	uploaded_file)

	est_word_count = video_duration * 2
	final_prompt = prompt + f"(This video is ONLY {video_duration} seconds long, so make sure the voice over MUST be able to be explained in less than {est_word_count} words)"

	# st.write(final_prompt)
	text = frames_to_story(base64Frames, final_prompt)
	st.write(text)

	# Generate audio from text
	audio_filename, audio_bytes_io = text_to_audio(text)

	# Merge audio and video
	output_video_filename = os.path.splitext(video_filename)[
	0] + '_output.mp4'
	final_video_filename = merge_audio_video(
	video_filename, audio_filename, output_video_filename)

	# Display the result
	st.video(final_video_filename)

	# Clean up the temporary files
	os.unlink(video_filename)
	os.unlink(audio_filename)
	os.unlink(final_video_filename)


	if __name__ == '__main__':
	main()