Spaces:
Sleeping
Sleeping
Commit ·
fe5df96
1
Parent(s): 8ed49ee
feat: add audio data input
Browse files
data_search/data_search_page.py
CHANGED
|
@@ -6,13 +6,13 @@ import streamlit as st
|
|
| 6 |
import sys
|
| 7 |
import torch
|
| 8 |
from vectordb import search_image_index, search_text_index, search_image_index_with_image, search_text_index_with_image
|
| 9 |
-
from utils import load_image_index, load_text_index, get_local_files
|
| 10 |
from data_search import adapter_utils
|
| 11 |
|
| 12 |
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 13 |
|
| 14 |
|
| 15 |
-
def data_search(clip_model, preprocess, text_embedding_model, device):
|
| 16 |
|
| 17 |
@st.cache_resource
|
| 18 |
def load_finetuned_model(file_name):
|
|
@@ -68,6 +68,8 @@ def data_search(clip_model, preprocess, text_embedding_model, device):
|
|
| 68 |
image_index, image_data = load_image_index()
|
| 69 |
if os.path.exists("./vectorstore/text_index.index"):
|
| 70 |
text_index, text_data = load_text_index()
|
|
|
|
|
|
|
| 71 |
with torch.no_grad():
|
| 72 |
if not os.path.exists("./vectorstore/image_data.csv"):
|
| 73 |
st.warning("No Image Index Found. So not searching for images.")
|
|
@@ -75,6 +77,8 @@ def data_search(clip_model, preprocess, text_embedding_model, device):
|
|
| 75 |
if not os.path.exists("./vectorstore/text_data.csv"):
|
| 76 |
st.warning("No Text Index Found. So not searching for text.")
|
| 77 |
text_index = None
|
|
|
|
|
|
|
| 78 |
if image_input:
|
| 79 |
image = Image.open(image_input)
|
| 80 |
image = preprocess(image).unsqueeze(0).to(device)
|
|
@@ -85,12 +89,16 @@ def data_search(clip_model, preprocess, text_embedding_model, device):
|
|
| 85 |
image_indices = search_image_index_with_image(image_features, image_index, clip_model, k=3)
|
| 86 |
if text_index is not None:
|
| 87 |
text_indices = search_text_index_with_image(adapted_text_embeddings, text_index, text_embedding_model, k=3)
|
|
|
|
|
|
|
| 88 |
else:
|
| 89 |
if image_index is not None:
|
| 90 |
image_indices = search_image_index(text_input, image_index, clip_model, k=3)
|
| 91 |
if text_index is not None:
|
| 92 |
text_indices = search_text_index(text_input, text_index, text_embedding_model, k=3)
|
| 93 |
-
|
|
|
|
|
|
|
| 94 |
st.error("No Data Found! Please add data to the database.")
|
| 95 |
st.subheader("Top 3 Results")
|
| 96 |
cols = st.columns(3)
|
|
@@ -111,4 +119,10 @@ def data_search(clip_model, preprocess, text_embedding_model, device):
|
|
| 111 |
with cols[i]:
|
| 112 |
if text_index:
|
| 113 |
text_content = text_data['content'].iloc[text_indices[0][i]]
|
| 114 |
-
st.write(text_content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import sys
|
| 7 |
import torch
|
| 8 |
from vectordb import search_image_index, search_text_index, search_image_index_with_image, search_text_index_with_image
|
| 9 |
+
from utils import load_image_index, load_text_index, load_audio_index, get_local_files
|
| 10 |
from data_search import adapter_utils
|
| 11 |
|
| 12 |
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 13 |
|
| 14 |
|
| 15 |
+
def data_search(clip_model, preprocess, text_embedding_model, whisper_model, device):
|
| 16 |
|
| 17 |
@st.cache_resource
|
| 18 |
def load_finetuned_model(file_name):
|
|
|
|
| 68 |
image_index, image_data = load_image_index()
|
| 69 |
if os.path.exists("./vectorstore/text_index.index"):
|
| 70 |
text_index, text_data = load_text_index()
|
| 71 |
+
if os.path.exists("./vectorstore/audio_index.index"):
|
| 72 |
+
audio_index, audio_data = load_audio_index()
|
| 73 |
with torch.no_grad():
|
| 74 |
if not os.path.exists("./vectorstore/image_data.csv"):
|
| 75 |
st.warning("No Image Index Found. So not searching for images.")
|
|
|
|
| 77 |
if not os.path.exists("./vectorstore/text_data.csv"):
|
| 78 |
st.warning("No Text Index Found. So not searching for text.")
|
| 79 |
text_index = None
|
| 80 |
+
if not os.path.exists("./vectorstore/audio_data.csv"):
|
| 81 |
+
st.warning("No Audio Index Found. So not searching for audio.")
|
| 82 |
if image_input:
|
| 83 |
image = Image.open(image_input)
|
| 84 |
image = preprocess(image).unsqueeze(0).to(device)
|
|
|
|
| 89 |
image_indices = search_image_index_with_image(image_features, image_index, clip_model, k=3)
|
| 90 |
if text_index is not None:
|
| 91 |
text_indices = search_text_index_with_image(adapted_text_embeddings, text_index, text_embedding_model, k=3)
|
| 92 |
+
if audio_index is not None:
|
| 93 |
+
audio_indices = search_text_index_with_image(adapted_text_embeddings, audio_index, text_embedding_model, k=3)
|
| 94 |
else:
|
| 95 |
if image_index is not None:
|
| 96 |
image_indices = search_image_index(text_input, image_index, clip_model, k=3)
|
| 97 |
if text_index is not None:
|
| 98 |
text_indices = search_text_index(text_input, text_index, text_embedding_model, k=3)
|
| 99 |
+
if audio_index is not None:
|
| 100 |
+
audio_indices = search_text_index(text_input, audio_index, text_embedding_model, k=3)
|
| 101 |
+
if not image_index and not text_index and not audio_index:
|
| 102 |
st.error("No Data Found! Please add data to the database.")
|
| 103 |
st.subheader("Top 3 Results")
|
| 104 |
cols = st.columns(3)
|
|
|
|
| 119 |
with cols[i]:
|
| 120 |
if text_index:
|
| 121 |
text_content = text_data['content'].iloc[text_indices[0][i]]
|
| 122 |
+
st.write(text_content)
|
| 123 |
+
cols = st.columns(3)
|
| 124 |
+
for i in range(3):
|
| 125 |
+
with cols[i]:
|
| 126 |
+
if audio_index:
|
| 127 |
+
audio_path = audio_data['path'].iloc[audio_indices[0][i]]
|
| 128 |
+
st.audio(audio_path)
|