Spaces:

notabaka
/

ASRtest

Runtime error

App Files Files Community

notabaka commited on Feb 23, 2024

Commit

011c6b2

1 Parent(s): dd5c5fa

tst

Browse files

Files changed (2) hide show

app.py +47 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+#document q&a app to run on hugging face space (not for automatic speech recognition)
+import streamlit as st
+import torch
+from transformers import AutoModelForCTC
+from transformers import AutoProcessor
+import faiss
+import numpy as np
+# Load text embeddings model (https://huggingface.co/Salesforce/SFR-Embedding-Mistral) using HF API key from environment variable "HF_KEY"
+embeddings_model = AutoModelForCTC.from_pretrained("Salesforce/SFR-Embedding-Mistral")
+processor = AutoProcessor.from_pretrained("Salesforce/SFR-Embedding-Mistral")
+# Use streamlit to select one or more files (documents like pdf, word or excel)
+uploaded_files = st.file_uploader("Choose a file", accept_multiple_files=True)
+# Create an index for storing the embeddings
+index = faiss.IndexFlatL2(768)  # Assuming the embeddings have a dimension of 768
+# Implement code to embed text from selected files in vector database using the text embeddings model
+success = True  # Assume success by default
+for file in uploaded_files:
+    # Read the content of the file
+    text = file.read().decode("utf-8")
+    # Tokenize the text
+    inputs = processor(text, return_tensors="pt", padding="max_length", truncation=True)
+    # Get the embeddings
+    with torch.no_grad():
+        embeddings = embeddings_model(**inputs).last_hidden_state.mean(dim=1)
+    # Add the embeddings to the index
+    try:
+        index.add(embeddings.numpy())
+    except Exception as e:
+        success = False  # Set success to False if an exception occurs
+        st.write(f"Failed to add embeddings to the index: {e}")
+        break
+if success:
+    st.write("Embeddings added to the index successfully")
+else:
+    st.write("Operation failed")

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit
+torch
+transformers
+librosa
+numpy
+soundfile
+faiss