Spaces:

NEXAS
/

Multimodal_v2

No application file

App Files Files Community

NEXAS commited on Jan 22, 2025

Commit

32bc45c

verified ·

1 Parent(s): 1131ca9

Update user.py

Browse files

Files changed (1) hide show

user.py +55 -124

user.py CHANGED Viewed

@@ -1,31 +1,30 @@
-from utils.qa import chain
 import streamlit as st
 from langchain.memory import ConversationBufferWindowMemory
 from langchain_community.chat_message_histories import StreamlitChatMessageHistory
 path = "mm_vdb2"
 client = chromadb.PersistentClient(path=path)
 image_collection = client.get_collection(name="image")
 video_collection = client.get_collection(name='video_collection')
 memory_storage = StreamlitChatMessageHistory(key="chat_messages")
 memory = ConversationBufferWindowMemory(memory_key="chat_history", human_prefix="User", chat_memory=memory_storage, k=3)
 def get_answer(query):
     response = chain.invoke(query)
-    #return response["result"]
-    return response
 def display_images(image_collection, query_text, max_distance=None, debug=False):
-    """
-    Display images in a Streamlit app based on a query.
-    Args:
-        image_collection: The image collection object for querying.
-        query_text (str): The text query for images.
-        max_distance (float, optional): Maximum allowable distance for filtering.
-        debug (bool, optional): Whether to print debug information.
-    """
     results = image_collection.query(
         query_texts=[query_text],
         n_results=10,
@@ -35,160 +34,79 @@ def display_images(image_collection, query_text, max_distance=None, debug=False)
     uris = results['uris'][0]
     distances = results['distances'][0]
-    # Combine uris and distances, then sort by URI in ascending order
     sorted_results = sorted(zip(uris, distances), key=lambda x: x[0])
-    # Display images side by side, 3 images per row
-    cols = st.columns(3)  # Create 3 columns for the layout
     for i, (uri, distance) in enumerate(sorted_results):
         if max_distance is None or distance <= max_distance:
             try:
                 img = PILImage.open(uri)
-                with cols[i % 3]:  # Use modulo to cycle through columns
-                    st.image(img, use_container_width = True)
             except Exception as e:
                 st.error(f"Error loading image: {e}")
 def display_videos_streamlit(video_collection, query_text, max_distance=None, max_results=5, debug=False):
-    """
-    Display videos in a Streamlit app based on a query.
-    Args:
-        video_collection: The video collection object for querying.
-        query_text (str): The text query for videos.
-        max_distance (float, optional): Maximum allowable distance for filtering.
-        max_results (int, optional): Maximum number of results to display.
-        debug (bool, optional): Whether to print debug information.
-    """
-    # Deduplication set
     displayed_videos = set()
-    # Query the video collection with the specified text
     results = video_collection.query(
         query_texts=[query_text],
-        n_results=max_results,  # Adjust the number of results if needed
         include=['uris', 'distances', 'metadatas']
     )
-    # Extract URIs, distances, and metadatas from the result
     uris = results['uris'][0]
     distances = results['distances'][0]
     metadatas = results['metadatas'][0]
-    # Display the videos that meet the distance criteria
     for uri, distance, metadata in zip(uris, distances, metadatas):
         video_uri = metadata['video_uri']
-        # Check if a max_distance filter is applied and the distance is within the allowed range
         if (max_distance is None or distance <= max_distance) and video_uri not in displayed_videos:
             if debug:
                 st.write(f"URI: {uri} - Video URI: {video_uri} - Distance: {distance}")
-            st.video(video_uri)  # Display video in Streamlit
-            displayed_videos.add(video_uri)  # Add to the set to prevent duplication
         else:
             if debug:
                 st.write(f"URI: {uri} - Video URI: {video_uri} - Distance: {distance} (Filtered out)")
-def image_uris(image_collection,query_text, max_distance=None, max_results=5):
-    results = image_collection.query(
-        query_texts=[query_text],
-        n_results=max_results,
-        include=['uris', 'distances']
-    )
-    filtered_uris = []
-    for uri, distance in zip(results['uris'][0], results['distances'][0]):
-        if max_distance is None or distance <= max_distance:
-            filtered_uris.append(uri)
-    return filtered_uris
-def text_uris(text_collection,query_text, max_distance=None, max_results=5):
-    results = text_collection.query(
-        query_texts=[query_text],
-        n_results=max_results,
-        include=['documents', 'distances']
-    )
-    filtered_texts = []
-    for doc, distance in zip(results['documents'][0], results['distances'][0]):
-        if max_distance is None or distance <= max_distance:
-            filtered_texts.append(doc)
-    return filtered_texts
-def frame_uris(video_collection,query_text, max_distance=None, max_results=5):
-    results = video_collection.query(
-        query_texts=[query_text],
-        n_results=max_results,
-        include=['uris', 'distances']
-    )
-    filtered_uris = []
-    seen_folders = set()
-    for uri, distance in zip(results['uris'][0], results['distances'][0]):
-        if max_distance is None or distance <= max_distance:
-            folder = os.path.dirname(uri)
-            if folder not in seen_folders:
-                filtered_uris.append(uri)
-                seen_folders.add(folder)
-        if len(filtered_uris) == max_results:
-            break
-    return filtered_uris
-def image_uris2(image_collection2,query_text, max_distance=None, max_results=5):
-    results = image_collection2.query(
-        query_texts=[query_text],
-        n_results=max_results,
-        include=['uris', 'distances']
-    )
-    filtered_uris = []
-    for uri, distance in zip(results['uris'][0], results['distances'][0]):
-        if max_distance is None or distance <= max_distance:
-            filtered_uris.append(uri)
-    return filtered_uris
 def format_prompt_inputs(image_collection, video_collection, user_query):
-    # Get frame candidates from the video collection
     frame_candidates = frame_uris(video_collection, user_query, max_distance=1.55)
-    # Get image candidates from the image collection
     image_candidates = image_uris(image_collection, user_query, max_distance=1.5)
-    # Initialize the inputs dictionary with just the query
     inputs = {"query": user_query}
-    # Add the frame if found
     frame = frame_candidates[0] if frame_candidates else ""
     inputs["frame"] = frame
-    # If image candidates exist, process the first image
     if image_candidates:
         image = image_candidates[0]
         with PILImage.open(image) as img:
-            img = img.resize((img.width // 6, img.height // 6))  # Resize the image
-            img = img.convert("L")  # Convert to grayscale
             with io.BytesIO() as output:
-                img.save(output, format="JPEG", quality=60)  # Save as JPEG with compression
                 compressed_image_data = output.getvalue()
-        # Encode the compressed image as base64
         inputs["image_data_1"] = base64.b64encode(compressed_image_data).decode('utf-8')
     else:
         inputs["image_data_1"] = ""
     return inputs
 def home():
-    st.header("Welcome")
-    #st.set_page_config(layout='wide', page_title="Virtual Tutor")
     st.markdown("""
         <svg width="600" height="100">
             <text x="50%" y="50%" font-family="San serif" font-size="42px" fill="Black" text-anchor="middle" stroke="white"
@@ -197,11 +115,11 @@ def home():
         </svg>
     """, unsafe_allow_html=True)
     if "messages" not in st.session_state:
-        st.session_state.messages = [
-            {"role": "assistant", "content": "Hi! How may I assist you today?"}
-        ]
     st.markdown("""
         <style>
         .stChatInputContainer > div {
@@ -210,38 +128,51 @@ def home():
         </style>
         """, unsafe_allow_html=True)
-    for message in st.session_state.messages: # Display the prior chat messages
         with st.chat_message(message["role"]):
             st.write(message["content"])
     for i, msg in enumerate(memory_storage.messages):
         name = "user" if i % 2 == 0 else "assistant"
         st.chat_message(name).markdown(msg.content)
-    if user_input := st.chat_input("User Input"):
         with st.chat_message("user"):
             st.markdown(user_input)
         with st.spinner("Generating Response..."):
             with st.chat_message("assistant"):
                 response = get_answer(user_input)
-                answer = response['result']
                 st.markdown(answer)
                 message = {"role": "assistant", "content": answer}
                 message_u = {"role": "user", "content": user_input}
                 st.session_state.messages.append(message_u)
                 st.session_state.messages.append(message)
-                inputs = format_prompt_inputs(image_collection,video_collection, user_input)
                 st.markdown("### Images")
-                display_images(image_collection, query, max_distance=1.55, debug=False)
                 st.markdown("### Videos")
                 frame = inputs["frame"]
                 if frame:
-                    directory_name = frame.split('/')[1]
                     video_path = f"videos_flattened/{directory_name}.mp4"
                     if os.path.exists(video_path):
                         st.video(video_path)
                     else:
-                        st.write("No related videos found.")

+import chromadb
+from PIL import Image as PILImage
 import streamlit as st
+import os
+from utils.qa import chain
 from langchain.memory import ConversationBufferWindowMemory
 from langchain_community.chat_message_histories import StreamlitChatMessageHistory
+import base64
+import io
+# Initialize Chromadb client
 path = "mm_vdb2"
 client = chromadb.PersistentClient(path=path)
 image_collection = client.get_collection(name="image")
 video_collection = client.get_collection(name='video_collection')
+# Set up memory storage for the chat
 memory_storage = StreamlitChatMessageHistory(key="chat_messages")
 memory = ConversationBufferWindowMemory(memory_key="chat_history", human_prefix="User", chat_memory=memory_storage, k=3)
+# Function to get an answer from the chain
 def get_answer(query):
     response = chain.invoke(query)
+    return response.get("result", "No result found.")
+# Function to display images in the UI
 def display_images(image_collection, query_text, max_distance=None, debug=False):
     results = image_collection.query(
         query_texts=[query_text],
         n_results=10,
     uris = results['uris'][0]
     distances = results['distances'][0]
     sorted_results = sorted(zip(uris, distances), key=lambda x: x[0])
+    cols = st.columns(3)
     for i, (uri, distance) in enumerate(sorted_results):
         if max_distance is None or distance <= max_distance:
             try:
                 img = PILImage.open(uri)
+                with cols[i % 3]:
+                    st.image(img, use_container_width=True)
             except Exception as e:
                 st.error(f"Error loading image: {e}")
+# Function to display videos in the UI
 def display_videos_streamlit(video_collection, query_text, max_distance=None, max_results=5, debug=False):
     displayed_videos = set()
     results = video_collection.query(
         query_texts=[query_text],
+        n_results=max_results,
         include=['uris', 'distances', 'metadatas']
     )
     uris = results['uris'][0]
     distances = results['distances'][0]
     metadatas = results['metadatas'][0]
     for uri, distance, metadata in zip(uris, distances, metadatas):
         video_uri = metadata['video_uri']
         if (max_distance is None or distance <= max_distance) and video_uri not in displayed_videos:
             if debug:
                 st.write(f"URI: {uri} - Video URI: {video_uri} - Distance: {distance}")
+            st.video(video_uri)
+            displayed_videos.add(video_uri)
         else:
             if debug:
                 st.write(f"URI: {uri} - Video URI: {video_uri} - Distance: {distance} (Filtered out)")
+# Function to format the inputs for image and video processing
 def format_prompt_inputs(image_collection, video_collection, user_query):
     frame_candidates = frame_uris(video_collection, user_query, max_distance=1.55)
     image_candidates = image_uris(image_collection, user_query, max_distance=1.5)
     inputs = {"query": user_query}
     frame = frame_candidates[0] if frame_candidates else ""
     inputs["frame"] = frame
     if image_candidates:
         image = image_candidates[0]
         with PILImage.open(image) as img:
+            img = img.resize((img.width // 6, img.height // 6))
+            img = img.convert("L")
             with io.BytesIO() as output:
+                img.save(output, format="JPEG", quality=60)
                 compressed_image_data = output.getvalue()
         inputs["image_data_1"] = base64.b64encode(compressed_image_data).decode('utf-8')
     else:
         inputs["image_data_1"] = ""
     return inputs
+# Main function to initialize and run the UI
 def home():
+    # Set up the page layout
+    st.set_page_config(layout='wide', page_title="Virtual Tutor")
+    # Header
+    st.header("Welcome to Virtual Tutor - CHAT")
+    # SVG Banner for UI branding
     st.markdown("""
         <svg width="600" height="100">
             <text x="50%" y="50%" font-family="San serif" font-size="42px" fill="Black" text-anchor="middle" stroke="white"
         </svg>
     """, unsafe_allow_html=True)
+    # Initialize the chat session if not already initialized
     if "messages" not in st.session_state:
+        st.session_state.messages = [{"role": "assistant", "content": "Hi! How may I assist you today?"}]
+    # Styling for the chat input container
     st.markdown("""
         <style>
         .stChatInputContainer > div {
         </style>
         """, unsafe_allow_html=True)
+    # Display previous chat messages
+    for message in st.session_state.messages:
         with st.chat_message(message["role"]):
             st.write(message["content"])
+    # Display chat messages from memory
     for i, msg in enumerate(memory_storage.messages):
         name = "user" if i % 2 == 0 else "assistant"
         st.chat_message(name).markdown(msg.content)
+    # Handle user input and generate response
+    if user_input := st.chat_input("Enter your question here..."):
         with st.chat_message("user"):
             st.markdown(user_input)
         with st.spinner("Generating Response..."):
             with st.chat_message("assistant"):
                 response = get_answer(user_input)
+                answer = response
                 st.markdown(answer)
+                # Save user and assistant messages to session state
                 message = {"role": "assistant", "content": answer}
                 message_u = {"role": "user", "content": user_input}
                 st.session_state.messages.append(message_u)
                 st.session_state.messages.append(message)
+                # Process inputs for image/video
+                inputs = format_prompt_inputs(image_collection, video_collection, user_input)
+                # Display images
                 st.markdown("### Images")
+                display_images(image_collection, user_input, max_distance=1.55, debug=False)
+                # Display videos based on frames
                 st.markdown("### Videos")
                 frame = inputs["frame"]
                 if frame:
+                    directory_name = frame.split('/')[1]
                     video_path = f"videos_flattened/{directory_name}.mp4"
                     if os.path.exists(video_path):
                         st.video(video_path)
                     else:
+                        st.error("Video file not found.")
+# Call the home function to run the app
+if __name__ == "__main__":
+    home()