Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import faiss | |
| import os | |
| from PyPDF2 import PdfFileReader | |
| from sentence_transformers import SentenceTransformer | |
| import pickle | |
| st.title("File Upload and Vector Database Creation") | |
| dataset = st.selectbox("Select Dataset", ["Sales", "Marketing", "HR"]) | |
| uploaded_file = st.file_uploader("Upload your file", type=["txt", "pdf", "docx"]) | |
| # Function to extract text from PDF | |
| def extract_text_from_pdf(file): | |
| reader = PdfFileReader(file) | |
| text = "" | |
| for page in range(reader.getNumPages()): | |
| text += reader.getPage(page).extract_text() | |
| return text | |
| if uploaded_file is not None: | |
| if uploaded_file.type == "application/pdf": | |
| text = extract_text_from_pdf(uploaded_file) | |
| elif uploaded_file.type == "text/plain": | |
| text = str(uploaded_file.read(), "utf-8") | |
| st.write("File uploaded successfully!") | |
| # Load pre-trained model for embeddings | |
| model = SentenceTransformer('all-MiniLM-L6-v2') | |
| embeddings = model.encode([text]) | |
| # Create or load existing FAISS index | |
| dimension = 384 # Example dimension size for the MiniLM model | |
| index_file = f'vector_db_{dataset}.index' | |
| if os.path.exists(index_file): | |
| index = faiss.read_index(index_file) | |
| else: | |
| index = faiss.IndexFlatL2(dimension) | |
| # Add embeddings to the index | |
| index.add(embeddings) | |
| # Save the index | |
| faiss.write_index(index, index_file) | |
| # Save metadata | |
| metadata_file = f'metadata_{dataset}.pkl' | |
| if os.path.exists(metadata_file): | |
| with open(metadata_file, 'rb') as f: | |
| metadata = pickle.load(f) | |
| else: | |
| metadata = [] | |
| metadata.append(text) | |
| with open(metadata_file, 'wb') as f: | |
| pickle.dump(metadata, f) | |
| st.write("Vector database updated and saved successfully!") | |
| # Option to download the vector database file | |
| with open(index_file, 'rb') as f: | |
| st.download_button( | |
| label=f"Download {index_file}", | |
| data=f, | |
| file_name=index_file | |
| ) | |
| # Option to download the metadata file | |
| with open(metadata_file, 'rb') as f: | |
| st.download_button( | |
| label=f"Download {metadata_file}", | |
| data=f, | |
| file_name=metadata_file | |
| ) |