Spaces:
Runtime error
Runtime error
Commit
Β·
653f0aa
1
Parent(s):
d619758
Added exception handling
Browse files- pages/Load_Documents.py +36 -27
pages/Load_Documents.py
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
|
|
| 1 |
from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data
|
| 2 |
import streamlit as st
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
|
| 5 |
def main():
|
| 6 |
load_dotenv()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store")
|
| 9 |
st.title("Please upload your files...π ")
|
|
@@ -12,34 +18,37 @@ def main():
|
|
| 12 |
uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
|
| 13 |
|
| 14 |
if uploaded_files:
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
# Process each PDF file
|
| 19 |
-
for pdf in uploaded_files:
|
| 20 |
-
st.write(f"Processing: {pdf.name}")
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
if __name__ == '__main__':
|
| 45 |
main()
|
|
|
|
| 1 |
+
import os
|
| 2 |
from utils._admin_util import create_embeddings, create_vector_store, read_pdf_data, split_data
|
| 3 |
import streamlit as st
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
|
| 6 |
def main():
|
| 7 |
load_dotenv()
|
| 8 |
+
|
| 9 |
+
# Add API key verification at the start
|
| 10 |
+
if not os.getenv("OPENAI_API_KEY"):
|
| 11 |
+
st.error("OpenAI API key not found! Please ensure it's set in the environment variables.")
|
| 12 |
+
st.stop()
|
| 13 |
|
| 14 |
st.set_page_config(page_title="Dump PDFs to QDrant - Vector Store")
|
| 15 |
st.title("Please upload your files...π ")
|
|
|
|
| 18 |
uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
|
| 19 |
|
| 20 |
if uploaded_files:
|
| 21 |
+
try:
|
| 22 |
+
with st.spinner('Processing PDF files...'):
|
| 23 |
+
all_chunks = []
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
# Process each PDF file
|
| 26 |
+
for pdf in uploaded_files:
|
| 27 |
+
st.write(f"Processing: {pdf.name}")
|
| 28 |
+
|
| 29 |
+
# Extract text from PDF
|
| 30 |
+
text = read_pdf_data(pdf)
|
| 31 |
+
st.write(f"π Reading {pdf.name} done")
|
| 32 |
+
|
| 33 |
+
# Create chunks for this PDF
|
| 34 |
+
chunks = split_data(text)
|
| 35 |
+
all_chunks.extend(chunks)
|
| 36 |
+
st.write(f"π Splitting {pdf.name} into chunks done")
|
| 37 |
+
|
| 38 |
+
# Create embeddings once for all chunks
|
| 39 |
+
st.write("Creating embeddings...")
|
| 40 |
+
embeddings = create_embeddings()
|
| 41 |
+
st.write("π Creating embeddings instance done")
|
| 42 |
+
|
| 43 |
+
# Create vector store with all chunks
|
| 44 |
+
vector_store = create_vector_store(embeddings, all_chunks)
|
| 45 |
+
st.session_state.vector_store = vector_store
|
| 46 |
+
|
| 47 |
+
st.success(f"Successfully processed {len(uploaded_files)} files and pushed embeddings to Qdrant")
|
| 48 |
+
st.write(f"Total chunks created: {len(all_chunks)}")
|
| 49 |
+
except Exception as e:
|
| 50 |
+
st.error(f"An error occurred: {str(e)}")
|
| 51 |
+
st.error("Please check your API key and permissions.")
|
| 52 |
|
| 53 |
if __name__ == '__main__':
|
| 54 |
main()
|