Spaces:

ugaray96
/

neural-search

Runtime error

ugaray96 commited on Nov 28, 2022

Commit

710a34d

verified ·

1 Parent(s): 687697c

Adds audio to text converter and fixes tfidf

Files changed (6) hide show

app.py CHANGED Viewed

@@ -11,12 +11,16 @@ st.set_page_config(
 from streamlit_option_menu import option_menu
 from interface.config import session_state_variables, pages
 from interface.components import component_select_pipeline
 # Initialization of session state
 for key, value in session_state_variables.items():
     if key not in st.session_state:
         st.session_state[key] = value
 def run_demo():

 from streamlit_option_menu import option_menu
 from interface.config import session_state_variables, pages
 from interface.components import component_select_pipeline
+from interface.utils import load_audio_model
 # Initialization of session state
 for key, value in session_state_variables.items():
     if key not in st.session_state:
         st.session_state[key] = value
+# Init audio model
+st.session_state["audio_model"] = load_audio_model()
 def run_demo():

core/audio.py ADDED Viewed

+import whisper
+import pydub
+import os
+whisper_model = "medium"
+def load_model():
+    print("Loading audio model...")
+    return whisper.load_model(whisper_model)
+def audio_to_text(model, audio_file):
+    audio = pydub.AudioSegment.from_file(audio_file)
+    # Export for loading later
+    audio.export("audio_tmp")
+    try:
+        audio = whisper.load_audio("audio_tmp")
+        audio = whisper.pad_or_trim(audio)
+        mel = whisper.log_mel_spectrogram(audio).to(model.device)
+        options = whisper.DecodingOptions()
+        result = whisper.decode(model, mel, options)
+    finally:
+        os.remove("audio_tmp")
+    return result.text

core/pipelines.py CHANGED Viewed

@@ -85,9 +85,7 @@ def dense_passage_retrieval(
       - One BERT base model to encode queries
       - Ranking of documents done by dot product similarity between query and document embeddings
     """
-    global document_store
-    if index != document_store.index:
-        document_store = InMemoryDocumentStore(index=index)
     dpr_retriever = DensePassageRetriever(
         document_store=document_store,
         query_embedding_model=query_embedding_model,

       - One BERT base model to encode queries
       - Ranking of documents done by dot product similarity between query and document embeddings
     """
+    document_store = InMemoryDocumentStore(index=index)
     dpr_retriever = DensePassageRetriever(
         document_store=document_store,
         query_embedding_model=query_embedding_model,

interface/components.py CHANGED Viewed

@@ -51,6 +51,19 @@ def component_select_pipeline(container):
                     "doc": pipeline_funcs[index_pipe].__doc__,
                 }
                 reset_vars_data()
 def component_show_pipeline(pipeline, pipeline_name):
@@ -126,7 +139,7 @@ def component_file_input(container, doc_id):
         with st.expander("Enter Files"):
             while True:
                 file = st.file_uploader(
-                    "Upload a .txt, .pdf, .csv, image file", key=doc_id
                 )
                 if file != None:
                     extracted_text = extract_text_from_file(file)

                     "doc": pipeline_funcs[index_pipe].__doc__,
                 }
                 reset_vars_data()
+            # TODO: Use elasticsearch and remove this workaround for TFIDF
+            # Reload if Keyword Search is selected
+            elif st.session_state["pipeline"]["name"] == "Keyword Search":
+                st.session_state["pipeline_func_parameters"] = pipeline_func_parameters
+                (search_pipeline, index_pipeline,) = pipeline_funcs[
+                    index_pipe
+                ](**pipeline_func_parameters[index_pipe])
+                st.session_state["pipeline"] = {
+                    "name": selected_pipeline,
+                    "search_pipeline": search_pipeline,
+                    "index_pipeline": index_pipeline,
+                    "doc": pipeline_funcs[index_pipe].__doc__,
+                }
 def component_show_pipeline(pipeline, pipeline_name):
         with st.expander("Enter Files"):
             while True:
                 file = st.file_uploader(
+                    "Upload a .txt, .pdf, .csv, image file, audio file", key=doc_id
                 )
                 if file != None:
                     extracted_text = extract_text_from_file(file)

interface/utils.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import shutil
 import core.pipelines as pipelines_functions
 from core.pipelines import data_path
 from inspect import getmembers, isfunction, signature
 from newspaper import Article
 from PyPDF2 import PdfFileReader
@@ -96,9 +97,19 @@ def extract_text_from_file(file):
         return file_text
     # read image file (OCR)
-    elif file.type == "image/jpeg":
         return pytesseract.image_to_string(Image.open(file))
     else:
         st.warning(f"File type {file.type} not supported")
         return None

 import shutil
 import core.pipelines as pipelines_functions
 from core.pipelines import data_path
+from core.audio import audio_to_text, load_model
 from inspect import getmembers, isfunction, signature
 from newspaper import Article
 from PyPDF2 import PdfFileReader
         return file_text
     # read image file (OCR)
+    elif file.type in ["image/jpeg", "image/png"]:
         return pytesseract.image_to_string(Image.open(file))
+    # read audio file (AudoToText)
+    elif file.type in ["audio/mpeg", "audio/wav", "audio/aac", "audio/x-m4a"]:
+        text = audio_to_text(st.session_state["audio_model"], file)
+        return text
     else:
         st.warning(f"File type {file.type} not supported")
         return None
+@st.experimental_singleton
+def load_audio_model():
+    return load_model()

requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ pytesseract==0.3.10
 soundfile==0.10.3.post1
 espnet
 pydub==0.25.1
-espnet_model_zoo==0.1.7

 soundfile==0.10.3.post1
 espnet
 pydub==0.25.1
+espnet_model_zoo==0.1.7
+git+https://github.com/openai/whisper.git