Spaces:
Build error
Build error
| from langchain_community.vectorstores import FAISS | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders import TextLoader | |
| import pickle | |
| import streamlit as st | |
| import os | |
| CACHE_DIR = "./cache" | |
| CACHE_FILE = os.path.join(CACHE_DIR, "vectorstore_cache.pkl") | |
| def load_or_create_vectorstore(): | |
| """Load vectorstore from cache if it exists, otherwise create and cache it""" | |
| embedder_model = "hiiamsid/sentence_similarity_spanish_es" | |
| embeddings = HuggingFaceEmbeddings(model_name=embedder_model) | |
| # Try to load from cache first | |
| if os.path.exists(CACHE_FILE): | |
| try: | |
| with open(CACHE_FILE, 'rb') as f: | |
| vectorstore = pickle.load(f) | |
| st.success("Successfully loaded vectorstore from cache") | |
| return vectorstore | |
| except Exception as e: | |
| st.warning(f"Failed to load cache: {str(e)}. Creating new vectorstore...") | |
| # If cache doesn't exist or loading failed, create new vectorstore | |
| txt_dir = "./rag_documents/" | |
| txt_files = [f for f in os.listdir(txt_dir) if f.endswith('.txt')] | |
| all_documents = [] | |
| for txt_file in txt_files: | |
| file_path = os.path.join(txt_dir, txt_file) | |
| try: | |
| loader = TextLoader(file_path) | |
| documents = loader.load() | |
| all_documents.extend(documents) | |
| except Exception as e: | |
| st.error(f"Error loading {txt_file}: {str(e)}") | |
| continue | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=2500, chunk_overlap=10) | |
| texts = text_splitter.split_documents(all_documents) | |
| vectorstore = FAISS.from_documents(texts, embeddings) | |
| # Create cache directory if it doesn't exist | |
| os.makedirs(CACHE_DIR, exist_ok=True) | |
| # Save to cache | |
| try: | |
| with open(CACHE_FILE, 'wb') as f: | |
| pickle.dump(vectorstore, f) | |
| st.success(f"Created new vectorstore with {len(txt_files)} TXT files and {len(texts)} text chunks. Cached for future use.") | |
| except Exception as e: | |
| st.warning(f"Failed to cache vectorstore: {str(e)}") | |
| return vectorstore | |