ttss
Browse files- app.py +6 -5
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import streamlit as st
|
|
| 4 |
import torch
|
| 5 |
from transformers import AutoModelForCTC
|
| 6 |
from transformers import AutoProcessor
|
| 7 |
-
import
|
| 8 |
import numpy as np
|
| 9 |
|
| 10 |
# Load text embeddings model (https://huggingface.co/Salesforce/SFR-Embedding-Mistral) using HF API key from environment variable "HF_KEY"
|
|
@@ -15,7 +15,7 @@ processor = AutoProcessor.from_pretrained("Salesforce/SFR-Embedding-Mistral")
|
|
| 15 |
uploaded_files = st.file_uploader("Choose a file", accept_multiple_files=True)
|
| 16 |
|
| 17 |
# Create an index for storing the embeddings
|
| 18 |
-
index =
|
| 19 |
|
| 20 |
# Implement code to embed text from selected files in vector database using the text embeddings model
|
| 21 |
success = True # Assume success by default
|
|
@@ -32,11 +32,12 @@ for file in uploaded_files:
|
|
| 32 |
embeddings = embeddings_model(**inputs).last_hidden_state.mean(dim=1)
|
| 33 |
# Add the embeddings to the index
|
| 34 |
try:
|
| 35 |
-
|
|
|
|
|
|
|
| 36 |
except Exception as e:
|
| 37 |
-
success = False
|
| 38 |
st.write(f"Failed to add embeddings to the index: {e}")
|
| 39 |
-
break
|
| 40 |
|
| 41 |
if success:
|
| 42 |
st.write("Embeddings added to the index successfully")
|
|
|
|
| 4 |
import torch
|
| 5 |
from transformers import AutoModelForCTC
|
| 6 |
from transformers import AutoProcessor
|
| 7 |
+
import annoy
|
| 8 |
import numpy as np
|
| 9 |
|
| 10 |
# Load text embeddings model (https://huggingface.co/Salesforce/SFR-Embedding-Mistral) using HF API key from environment variable "HF_KEY"
|
|
|
|
| 15 |
uploaded_files = st.file_uploader("Choose a file", accept_multiple_files=True)
|
| 16 |
|
| 17 |
# Create an index for storing the embeddings
|
| 18 |
+
index = annoy.AnnoyIndex(768, 'angular') # Assuming the embeddings have a dimension of 768
|
| 19 |
|
| 20 |
# Implement code to embed text from selected files in vector database using the text embeddings model
|
| 21 |
success = True # Assume success by default
|
|
|
|
| 32 |
embeddings = embeddings_model(**inputs).last_hidden_state.mean(dim=1)
|
| 33 |
# Add the embeddings to the index
|
| 34 |
try:
|
| 35 |
+
for i, emb in enumerate(embeddings.numpy()):
|
| 36 |
+
index.add_item(i, emb)
|
| 37 |
+
index.build(10) # 10 trees for building the index
|
| 38 |
except Exception as e:
|
| 39 |
+
success = False
|
| 40 |
st.write(f"Failed to add embeddings to the index: {e}")
|
|
|
|
| 41 |
|
| 42 |
if success:
|
| 43 |
st.write("Embeddings added to the index successfully")
|
requirements.txt
CHANGED
|
@@ -3,4 +3,5 @@ torch
|
|
| 3 |
transformers
|
| 4 |
librosa
|
| 5 |
numpy
|
| 6 |
-
soundfile
|
|
|
|
|
|
| 3 |
transformers
|
| 4 |
librosa
|
| 5 |
numpy
|
| 6 |
+
soundfile
|
| 7 |
+
annoy
|