AISVIZ-BOT / services /scraper.py
vaishnav
make mistral default model
a856301
from langchain_core.documents import Document
import configs.config as config
from processing.documents import load_documents, split_documents
from processing.texts import clean_text
class Service:
def __init__(self, store):
self.store = store
def scrape_and_get_store_vector_retriever(self, urls: list[str]):
"""
Scrapes website content from fetched schemes and creates a VectorStore retriever.
"""
documents: list[Document] = []
for url in urls:
try:
website_documents = load_documents(url)
for doc in website_documents:
doc.page_content = clean_text(doc.page_content)
doc.metadata["source"] = url
documents.append(doc)
except Exception as e:
raise Exception(f"Error processing {url}: {e}")
self.store.store_embeddings(
split_documents(documents, chunk_size=config.CHUNK_SIZE, chunk_overlap=config.CHUNK_OVERLAP)
)