Spaces:
Runtime error
Runtime error
| import cv2 | |
| import streamlit as st | |
| import tempfile | |
| import base64 | |
| import os | |
| from dotenv import load_dotenv | |
| from openai import OpenAI | |
| import assemblyai as aai | |
| from moviepy.editor import * | |
| # Load environment variables | |
| load_dotenv() | |
| aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY") | |
| OpenAI.api_key = os.getenv("OPENAI_API_KEY") | |
| client = OpenAI() | |
| def main(): | |
| st.title('Insightly Video Content Moderation') | |
| # Video upload section | |
| uploaded_video = st.file_uploader('Upload a video', type=["mp4", "avi", "mov"]) | |
| if uploaded_video is not None: | |
| # Save the video to a temp file | |
| tfile = tempfile.NamedTemporaryFile(delete=False) | |
| tfile.write(uploaded_video.read()) | |
| video_file_path = tfile.name | |
| tfile.close() | |
| transcriber = aai.Transcriber() | |
| transcript = transcriber.transcribe(tfile.name) | |
| # Process the video and display frames in a grid layout | |
| base64_frames = video_to_base64_frames(video_file_path) | |
| display_frame_grid(base64_frames[::30]) # Display every 30th frame in a 3-column grid | |
| st.write("Actions:") # Header for the actions/buttons section | |
| # Creating four columns to align the buttons | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| if st.button("Description"): | |
| st.session_state['description'] = generate_description(base64_frames) if 'description' not in st.session_state else st.session_state['description'] | |
| with col2: | |
| if st.button("Frame Description"): | |
| st.session_state['frame_description'] = generate_frame_description(base64_frames) if 'frame_description' not in st.session_state else st.session_state['frame_description'] | |
| with col3: | |
| if st.button("Generate Transcript"): | |
| st.session_state['transcript'] = transcript.text if 'transcript' not in st.session_state else st.session_state['transcript'] | |
| with col4: | |
| if st.button("Category of Video"): | |
| st.session_state['category'] = generate_category(base64_frames) if 'category' not in st.session_state else st.session_state['category'] | |
| # If any value exists in session state then display it | |
| if 'description' in st.session_state and st.session_state['description']: | |
| st.subheader("Video Description") | |
| st.write(st.session_state['description']) | |
| if 'frame_description' in st.session_state and st.session_state['frame_description']: | |
| st.subheader("Frame Description") | |
| st.write(st.session_state['frame_description']) | |
| if 'transcript' in st.session_state and st.session_state['transcript']: | |
| st.subheader("Video Transcript") | |
| st.write(st.session_state['transcript']) | |
| if 'category' in st.session_state and st.session_state['category']: | |
| st.subheader("Video Category") | |
| st.write(st.session_state['category']) | |
| def video_to_base64_frames(video_file_path): | |
| # Logic to extract all frames from the video and convert them to base64 | |
| video = cv2.VideoCapture(video_file_path) | |
| base64_frames = [] | |
| while video.isOpened(): | |
| success, frame = video.read() | |
| if not success: | |
| break | |
| _, buffer = cv2.imencode('.jpg', frame) | |
| base64_frame = base64.b64encode(buffer).decode('utf-8') | |
| base64_frames.append(base64_frame) | |
| video.release() | |
| return base64_frames | |
| ######################################### | |
| #Generate Video description | |
| def generate_description(base64_frames): | |
| prompt_messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| "1. Generate a description for this sequence of video frames in about 90 words.\ | |
| Return the following : 1. List of objects in the video 2. Any restrictive content or sensitive content and if so which frame.", | |
| *map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]), | |
| ], | |
| }, | |
| ] | |
| response = client.chat.completions.create( | |
| model="gpt-4-vision-preview", | |
| messages=prompt_messages, | |
| max_tokens=3000, | |
| ) | |
| return response.choices[0].message.content | |
| #Generate frame description | |
| def generate_frame_description(base64_frames): | |
| prompt_messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| "Describe what is happening in each frame.", | |
| *map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]), | |
| ], | |
| }, | |
| ] | |
| response = client.chat.completions.create( | |
| model="gpt-4-vision-preview", | |
| messages=prompt_messages, | |
| max_tokens=3000, | |
| ) | |
| return response.choices[0].message.content | |
| #Generate Category of Video | |
| def generate_category(base64_frames): | |
| prompt_messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| "What category can this video be tagged to?", | |
| *map(lambda x: {"image": x, "resize": 428}, base64_frames[0::30]), | |
| ], | |
| }, | |
| ] | |
| response = client.chat.completions.create( | |
| model="gpt-4-vision-preview", | |
| messages=prompt_messages, | |
| max_tokens=3000, | |
| ) | |
| return response.choices[0].message.content | |
| ######################## | |
| def display_frame_grid(base64_frames): | |
| cols_per_row = 3 | |
| n_frames = len(base64_frames) | |
| for idx in range(0, n_frames, cols_per_row): | |
| cols = st.columns(cols_per_row) | |
| for col_index in range(cols_per_row): | |
| frame_idx = idx + col_index | |
| if frame_idx < n_frames: | |
| with cols[col_index]: | |
| frame = base64_frames[frame_idx] | |
| st.image(base64.b64decode(frame), caption=f'Frame {frame_idx * 30 + 1}', width=200) | |
| if __name__ == '__main__': | |
| main() |