Spaces:

Rulga
/

status-law-gbot

Running

Translate comments and error messages to English for consistency and clarity

0bb77b3 9 months ago

863 Bytes

	import requests
	from bs4 import BeautifulSoup
	from langchain_community.document_loaders import WebBaseLoader
	from langchain_core.documents import Document
	from config.constants import URLS

	def load_documents():
	"""Load documents from website"""
	documents = []

	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
	}

	for url in URLS:
	try:
	loader = WebBaseLoader(
	web_paths=[url],
	header_template=headers
	)
	docs = loader.load()
	if docs:
	documents.extend(docs)
	print(f"Loaded {url}: {len(docs)} documents")
	except Exception as e:
	print(f"Error loading {url}: {str(e)}")

	return documents