Spaces:

zhangyi617
/

webui

Runtime error

App Files Files Community

webui / langchain /document_loaders /base.py

zhangyi617

Upload folder using huggingface_hub

129cd69 about 2 years ago

raw

history blame contribute delete

3.05 kB

	"""Abstract interface for document loader implementations."""
	from abc import ABC, abstractmethod
	from typing import Iterator, List, Optional

	from langchain_core.documents import Document

	from langchain.document_loaders.blob_loaders import Blob
	from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter


	class BaseLoader(ABC):
	"""Interface for Document Loader.

	Implementations should implement the lazy-loading method using generators
	to avoid loading all Documents into memory at once.

	The `load` method will remain as is for backwards compatibility, but its
	implementation should be just `list(self.lazy_load())`.
	"""

	# Sub-classes should implement this method
	# as return list(self.lazy_load()).
	# This method returns a List which is materialized in memory.
	@abstractmethod
	def load(self) -> List[Document]:
	"""Load data into Document objects."""

	def load_and_split(
	self, text_splitter: Optional[TextSplitter] = None
	) -> List[Document]:
	"""Load Documents and split into chunks. Chunks are returned as Documents.

	Args:
	text_splitter: TextSplitter instance to use for splitting documents.
	Defaults to RecursiveCharacterTextSplitter.

	Returns:
	List of Documents.
	"""
	if text_splitter is None:
	_text_splitter: TextSplitter = RecursiveCharacterTextSplitter()
	else:
	_text_splitter = text_splitter
	docs = self.load()
	return _text_splitter.split_documents(docs)

	# Attention: This method will be upgraded into an abstractmethod once it's
	# implemented in all the existing subclasses.
	def lazy_load(
	self,
	) -> Iterator[Document]:
	"""A lazy loader for Documents."""
	raise NotImplementedError(
	f"{self.__class__.__name__} does not implement lazy_load()"
	)


	class BaseBlobParser(ABC):
	"""Abstract interface for blob parsers.

	A blob parser provides a way to parse raw data stored in a blob into one
	or more documents.

	The parser can be composed with blob loaders, making it easy to reuse
	a parser independent of how the blob was originally loaded.
	"""

	@abstractmethod
	def lazy_parse(self, blob: Blob) -> Iterator[Document]:
	"""Lazy parsing interface.

	Subclasses are required to implement this method.

	Args:
	blob: Blob instance

	Returns:
	Generator of documents
	"""

	def parse(self, blob: Blob) -> List[Document]:
	"""Eagerly parse the blob into a document or documents.

	This is a convenience method for interactive development environment.

	Production applications should favor the lazy_parse method instead.

	Subclasses should generally not over-ride this parse method.

	Args:
	blob: Blob instance

	Returns:
	List of documents
	"""
	return list(self.lazy_parse(blob))