Spaces:
Running
Running
back to the huggingface API embeddings, remove non-usable embeddings
Browse files- document_qa/document_qa_engine.py +1 -1
- streamlit_app.py +7 -7
document_qa/document_qa_engine.py
CHANGED
|
@@ -423,7 +423,7 @@ class DocumentQAEngine:
|
|
| 423 |
if doc_id:
|
| 424 |
hash = doc_id
|
| 425 |
else:
|
| 426 |
-
hash = metadata[0]['hash']
|
| 427 |
|
| 428 |
self.data_storage.embed_document(hash, texts, metadata)
|
| 429 |
|
|
|
|
| 423 |
if doc_id:
|
| 424 |
hash = doc_id
|
| 425 |
else:
|
| 426 |
+
hash = metadata[0]['hash'] if len(metadata) > 0 and 'hash' in metadata[0] else ""
|
| 427 |
|
| 428 |
self.data_storage.embed_document(hash, texts, metadata)
|
| 429 |
|
streamlit_app.py
CHANGED
|
@@ -6,7 +6,7 @@ from tempfile import NamedTemporaryFile
|
|
| 6 |
import dotenv
|
| 7 |
from grobid_quantities.quantities import QuantitiesAPI
|
| 8 |
from langchain.memory import ConversationBufferMemory
|
| 9 |
-
from langchain_huggingface import HuggingFaceEmbeddings
|
| 10 |
from langchain_openai import ChatOpenAI
|
| 11 |
from streamlit_pdf_viewer import pdf_viewer
|
| 12 |
|
|
@@ -23,9 +23,7 @@ API_MODELS = {
|
|
| 23 |
}
|
| 24 |
|
| 25 |
API_EMBEDDINGS = {
|
| 26 |
-
'intfloat/e5-large-
|
| 27 |
-
'intfloat/multilingual-e5-large-instruct': 'intfloat/multilingual-e5-large-instruct:',
|
| 28 |
-
'Salesforce/SFR-Embedding-2_R': 'Salesforce/SFR-Embedding-2_R'
|
| 29 |
}
|
| 30 |
|
| 31 |
if 'rqa' not in st.session_state:
|
|
@@ -135,8 +133,9 @@ def init_qa(model_name, embeddings_name):
|
|
| 135 |
api_key=os.environ.get('API_KEY')
|
| 136 |
)
|
| 137 |
|
| 138 |
-
embeddings =
|
| 139 |
-
|
|
|
|
| 140 |
|
| 141 |
storage = DataStorage(embeddings)
|
| 142 |
return DocumentQAEngine(chat, storage, grobid_url=os.environ['GROBID_URL'], memory=st.session_state['memory'])
|
|
@@ -320,7 +319,8 @@ if uploaded_file and not st.session_state.loaded_embeddings:
|
|
| 320 |
st.session_state['doc_id'] = hash = st.session_state['rqa'][model].create_memory_embeddings(
|
| 321 |
tmp_file.name,
|
| 322 |
chunk_size=chunk_size,
|
| 323 |
-
perc_overlap=0.1
|
|
|
|
| 324 |
st.session_state['loaded_embeddings'] = True
|
| 325 |
st.session_state.messages = []
|
| 326 |
|
|
|
|
| 6 |
import dotenv
|
| 7 |
from grobid_quantities.quantities import QuantitiesAPI
|
| 8 |
from langchain.memory import ConversationBufferMemory
|
| 9 |
+
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpointEmbeddings
|
| 10 |
from langchain_openai import ChatOpenAI
|
| 11 |
from streamlit_pdf_viewer import pdf_viewer
|
| 12 |
|
|
|
|
| 23 |
}
|
| 24 |
|
| 25 |
API_EMBEDDINGS = {
|
| 26 |
+
'intfloat/multilingual-e5-large-instruct': 'intfloat/multilingual-e5-large-instruct'
|
|
|
|
|
|
|
| 27 |
}
|
| 28 |
|
| 29 |
if 'rqa' not in st.session_state:
|
|
|
|
| 133 |
api_key=os.environ.get('API_KEY')
|
| 134 |
)
|
| 135 |
|
| 136 |
+
embeddings = HuggingFaceEndpointEmbeddings(
|
| 137 |
+
repo_id=API_EMBEDDINGS[embeddings_name]
|
| 138 |
+
)
|
| 139 |
|
| 140 |
storage = DataStorage(embeddings)
|
| 141 |
return DocumentQAEngine(chat, storage, grobid_url=os.environ['GROBID_URL'], memory=st.session_state['memory'])
|
|
|
|
| 319 |
st.session_state['doc_id'] = hash = st.session_state['rqa'][model].create_memory_embeddings(
|
| 320 |
tmp_file.name,
|
| 321 |
chunk_size=chunk_size,
|
| 322 |
+
perc_overlap=0.1
|
| 323 |
+
)
|
| 324 |
st.session_state['loaded_embeddings'] = True
|
| 325 |
st.session_state.messages = []
|
| 326 |
|