Spaces:

tpha4308
/

video-qa

Sleeping

App Files Files Community

Thao Pham commited on Mar 31, 2025

Commit

5de0912

1 Parent(s): 527422b

Change input to upload video

Browse files

Files changed (2) hide show

app.py +31 -33
video_utils.py +1 -1

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import video_utils
 import utils
 import embed
 import rag
 import os
 import uuid
 import numpy as np
@@ -18,7 +19,7 @@ from dotenv import load_dotenv
 load_dotenv()  # Load from .env
 UPLOAD_FOLDER = 'uploads'
-video_name = None
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
@@ -38,7 +39,11 @@ pc = Pinecone(
 # Connect to an index
 index_name = "multimodal-minilm"
 if index_name not in pc.list_indexes().names():
-    pinecone.create_index(index_name, dimension=384, metric="cosine")
 INDEX = pc.Index(index_name)
 MODEL_STACK = [TEXT_MODEL, VISION_MODEL, VISION_MODEL_PROCESSOR, VLM, VLM_PROCESSOR]
@@ -81,41 +86,36 @@ def check_exist_before_upsert(index, video_path):
 def chat(message, history):
     image_input_path = None
     if len(message['files']) > 0:
         assert len(message['files']) == 1
-        image_input_path = message['files'][0]
     message = message['text']
     if history is None:
         history = []
-    if message.startswith("https://"):
-        # Check valid URL
-        history.append((message, f"Checking if your provided URL at {message} is valid..."))
-        yield history
-        valid = is_valid_youtube_url(message)
-        if not valid:
-            history.append((None, "❌ Invalid YouTube URL. Please try again."))
-            yield history
-            return
         # Check metadata
-        history.append((None, "✅ URL is valid! Fetching video metadata..."))
         yield history
-        video_metadata = video_utils.get_video_metdata(message)
-        history.append((None, f"The video you want to process is: \nTitle: {video_metadata['title']} published by {video_metadata['author']} on {video_metadata['publish_date']}."))
-        yield history
-        history.append((None, "⏳ Downloading video..."))
-        yield history
-        output_folder_path = os.path.join(UPLOAD_FOLDER, video_metadata['title'])
-        path_to_video = os.path.join(output_folder_path, f"video.mp4")
         if not os.path.exists(path_to_video):
-            path_to_video = utils.download_video(message, path=output_folder_path)
         history.append((None, "⏳ Transcribing video..."))
         yield history
@@ -155,17 +155,17 @@ def chat(message, history):
         with open(os.path.join(output_folder_path, "summary.txt"), "w") as f:
             f.write(video_summary)
-        history.append((None, f"Video processing complete! You can now ask me questions about the video {video_metadata['title']}!"))
         yield history
-        global video_name
-        video_name = video_metadata['title']
     else:
         history.append((message, None))
         yield history
-        if video_name is None:
-            history.append((None, "You need to insert video URL before asking questions."))
             yield history
             return
@@ -186,18 +186,16 @@ def chat(message, history):
         yield history
 def clear_chat(history):
-    # return []
     history = []
-    history.append((None, "Please input a Youtube URL to get started!"))
-    # yield history
     return history
 def main():
-    initial_messages = [(None, "Please input a Youtube URL to get started!")]
     with gr.Blocks() as demo:
         chatbot = gr.Chatbot(value=initial_messages)
-        msg = gr.MultimodalTextbox(file_types=['image'], sources=['upload'])
         with gr.Row():
             with gr.Column():

 import utils
 import embed
 import rag
+import shutil
 import os
 import uuid
 import numpy as np
 load_dotenv()  # Load from .env
 UPLOAD_FOLDER = 'uploads'
+global_video_name = None
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
 # Connect to an index
 index_name = "multimodal-minilm"
 if index_name not in pc.list_indexes().names():
+    pc.create_index(index_name, dimension=384, metric="cosine",
+                    spec=ServerlessSpec(
+                            cloud="aws",
+                            region="us-east-1"
+                        ))
 INDEX = pc.Index(index_name)
 MODEL_STACK = [TEXT_MODEL, VISION_MODEL, VISION_MODEL_PROCESSOR, VLM, VLM_PROCESSOR]
 def chat(message, history):
     image_input_path = None
+    # print(message['files'])
+    video_name, video_input_path = None, None
     if len(message['files']) > 0:
         assert len(message['files']) == 1
+        if message['files'][0].endswith('.jpg'):
+            image_input_path = message['files'][0]
+        elif message['files'][0].endswith('.mp4'):
+            video_input_path = message['files'][0]
+            video_name = os.path.basename(video_input_path).split('.mp4')[0]
     message = message['text']
     if history is None:
         history = []
+    if video_name is not None:
         # Check metadata
+        history.append((None, f"✅ Video uploaded succesfully! Your video's title is {video_name}..."))
         yield history
+        output_folder_path = os.path.join(UPLOAD_FOLDER, video_name)
+        os.makedirs(output_folder_path, exist_ok=True)
+        path_to_video = os.path.join(output_folder_path, "video.mp4")
         if not os.path.exists(path_to_video):
+            shutil.move(video_input_path, path_to_video)
         history.append((None, "⏳ Transcribing video..."))
         yield history
         with open(os.path.join(output_folder_path, "summary.txt"), "w") as f:
             f.write(video_summary)
+        history.append((None, f"Video processing complete! You can now ask me questions about the video {video_name}!"))
         yield history
+        global global_video_name
+        global_video_name = video_name
     else:
         history.append((message, None))
         yield history
+        if global_video_name is None:
+            history.append((None, "You need to upload a video before asking questions."))
             yield history
             return
         yield history
 def clear_chat(history):
     history = []
+    history.append((None, "Please upload a video to get started!"))
     return history
 def main():
+    initial_messages = [(None, "Please upload a video to get started!")]
     with gr.Blocks() as demo:
         chatbot = gr.Chatbot(value=initial_messages)
+        msg = gr.MultimodalTextbox(file_types=['image', '.mp4'], sources=['upload'])
         with gr.Row():
             with gr.Column():

video_utils.py CHANGED Viewed

@@ -22,7 +22,7 @@ def extract_audio(path_to_video:str, output_folder:str):
     video_name = os.path.basename(path_to_video).replace('.mp4', '')
     # declare where to save .mp3 audio
-    path_to_extracted_audio_file = os.path.join(output_folder, f'{video_name}.mp3')
     # extract mp3 audio file from mp4 video video file
     clip = VideoFileClip(path_to_video)

     video_name = os.path.basename(path_to_video).replace('.mp4', '')
     # declare where to save .mp3 audio
+    path_to_extracted_audio_file = os.path.join(output_folder, 'audio.mp3')
     # extract mp3 audio file from mp4 video video file
     clip = VideoFileClip(path_to_video)