Spaces:
Sleeping
Sleeping
| import os | |
| import pickle | |
| from langchain_community.document_loaders.sitemap import SitemapLoader | |
| def save_documents_to_disk(docs, file_path): | |
| """Save the documents to a file using pickle.""" | |
| with open(file_path, 'wb') as file: | |
| pickle.dump(docs, file) | |
| def load_documents_from_disk(file_path): | |
| """Load the documents from a file if it exists.""" | |
| if os.path.exists(file_path): | |
| with open(file_path, 'rb') as file: | |
| return pickle.load(file) | |
| return None | |
| def load_documents_from_sitemap(sitemap_url): | |
| """Load documents from a sitemap URL using SitemapLoader.""" | |
| sitemap_loader = SitemapLoader(web_path=sitemap_url) | |
| return sitemap_loader.load() | |