Spaces:
Runtime error
Runtime error
| """**Document Loaders** are classes to load Documents. | |
| **Document Loaders** are usually used to load a lot of Documents in a single run. | |
| **Class hierarchy:** | |
| .. code-block:: | |
| BaseLoader --> <name>Loader # Examples: TextLoader, UnstructuredFileLoader | |
| **Main helpers:** | |
| .. code-block:: | |
| Document, <name>TextSplitter | |
| """ | |
| from langchain.document_loaders.acreom import AcreomLoader | |
| from langchain.document_loaders.airbyte import ( | |
| AirbyteCDKLoader, | |
| AirbyteGongLoader, | |
| AirbyteHubspotLoader, | |
| AirbyteSalesforceLoader, | |
| AirbyteShopifyLoader, | |
| AirbyteStripeLoader, | |
| AirbyteTypeformLoader, | |
| AirbyteZendeskSupportLoader, | |
| ) | |
| from langchain.document_loaders.airbyte_json import AirbyteJSONLoader | |
| from langchain.document_loaders.airtable import AirtableLoader | |
| from langchain.document_loaders.apify_dataset import ApifyDatasetLoader | |
| from langchain.document_loaders.arcgis_loader import ArcGISLoader | |
| from langchain.document_loaders.arxiv import ArxivLoader | |
| from langchain.document_loaders.assemblyai import AssemblyAIAudioTranscriptLoader | |
| from langchain.document_loaders.async_html import AsyncHtmlLoader | |
| from langchain.document_loaders.azlyrics import AZLyricsLoader | |
| from langchain.document_loaders.azure_blob_storage_container import ( | |
| AzureBlobStorageContainerLoader, | |
| ) | |
| from langchain.document_loaders.azure_blob_storage_file import ( | |
| AzureBlobStorageFileLoader, | |
| ) | |
| from langchain.document_loaders.bibtex import BibtexLoader | |
| from langchain.document_loaders.bigquery import BigQueryLoader | |
| from langchain.document_loaders.bilibili import BiliBiliLoader | |
| from langchain.document_loaders.blackboard import BlackboardLoader | |
| from langchain.document_loaders.blob_loaders import ( | |
| Blob, | |
| BlobLoader, | |
| FileSystemBlobLoader, | |
| YoutubeAudioLoader, | |
| ) | |
| from langchain.document_loaders.blockchain import BlockchainDocumentLoader | |
| from langchain.document_loaders.brave_search import BraveSearchLoader | |
| from langchain.document_loaders.browserless import BrowserlessLoader | |
| from langchain.document_loaders.chatgpt import ChatGPTLoader | |
| from langchain.document_loaders.chromium import AsyncChromiumLoader | |
| from langchain.document_loaders.college_confidential import CollegeConfidentialLoader | |
| from langchain.document_loaders.concurrent import ConcurrentLoader | |
| from langchain.document_loaders.confluence import ConfluenceLoader | |
| from langchain.document_loaders.conllu import CoNLLULoader | |
| from langchain.document_loaders.csv_loader import CSVLoader, UnstructuredCSVLoader | |
| from langchain.document_loaders.cube_semantic import CubeSemanticLoader | |
| from langchain.document_loaders.datadog_logs import DatadogLogsLoader | |
| from langchain.document_loaders.dataframe import DataFrameLoader | |
| from langchain.document_loaders.diffbot import DiffbotLoader | |
| from langchain.document_loaders.directory import DirectoryLoader | |
| from langchain.document_loaders.discord import DiscordChatLoader | |
| from langchain.document_loaders.docugami import DocugamiLoader | |
| from langchain.document_loaders.docusaurus import DocusaurusLoader | |
| from langchain.document_loaders.dropbox import DropboxLoader | |
| from langchain.document_loaders.duckdb_loader import DuckDBLoader | |
| from langchain.document_loaders.email import ( | |
| OutlookMessageLoader, | |
| UnstructuredEmailLoader, | |
| ) | |
| from langchain.document_loaders.embaas import EmbaasBlobLoader, EmbaasLoader | |
| from langchain.document_loaders.epub import UnstructuredEPubLoader | |
| from langchain.document_loaders.etherscan import EtherscanLoader | |
| from langchain.document_loaders.evernote import EverNoteLoader | |
| from langchain.document_loaders.excel import UnstructuredExcelLoader | |
| from langchain.document_loaders.facebook_chat import FacebookChatLoader | |
| from langchain.document_loaders.fauna import FaunaLoader | |
| from langchain.document_loaders.figma import FigmaFileLoader | |
| from langchain.document_loaders.gcs_directory import GCSDirectoryLoader | |
| from langchain.document_loaders.gcs_file import GCSFileLoader | |
| from langchain.document_loaders.geodataframe import GeoDataFrameLoader | |
| from langchain.document_loaders.git import GitLoader | |
| from langchain.document_loaders.gitbook import GitbookLoader | |
| from langchain.document_loaders.github import GitHubIssuesLoader | |
| from langchain.document_loaders.google_speech_to_text import GoogleSpeechToTextLoader | |
| from langchain.document_loaders.googledrive import GoogleDriveLoader | |
| from langchain.document_loaders.gutenberg import GutenbergLoader | |
| from langchain.document_loaders.hn import HNLoader | |
| from langchain.document_loaders.html import UnstructuredHTMLLoader | |
| from langchain.document_loaders.html_bs import BSHTMLLoader | |
| from langchain.document_loaders.hugging_face_dataset import HuggingFaceDatasetLoader | |
| from langchain.document_loaders.ifixit import IFixitLoader | |
| from langchain.document_loaders.image import UnstructuredImageLoader | |
| from langchain.document_loaders.image_captions import ImageCaptionLoader | |
| from langchain.document_loaders.imsdb import IMSDbLoader | |
| from langchain.document_loaders.iugu import IuguLoader | |
| from langchain.document_loaders.joplin import JoplinLoader | |
| from langchain.document_loaders.json_loader import JSONLoader | |
| from langchain.document_loaders.lakefs import LakeFSLoader | |
| from langchain.document_loaders.larksuite import LarkSuiteDocLoader | |
| from langchain.document_loaders.markdown import UnstructuredMarkdownLoader | |
| from langchain.document_loaders.mastodon import MastodonTootsLoader | |
| from langchain.document_loaders.max_compute import MaxComputeLoader | |
| from langchain.document_loaders.mediawikidump import MWDumpLoader | |
| from langchain.document_loaders.merge import MergedDataLoader | |
| from langchain.document_loaders.mhtml import MHTMLLoader | |
| from langchain.document_loaders.modern_treasury import ModernTreasuryLoader | |
| from langchain.document_loaders.mongodb import MongodbLoader | |
| from langchain.document_loaders.news import NewsURLLoader | |
| from langchain.document_loaders.notebook import NotebookLoader | |
| from langchain.document_loaders.notion import NotionDirectoryLoader | |
| from langchain.document_loaders.notiondb import NotionDBLoader | |
| from langchain.document_loaders.obs_directory import OBSDirectoryLoader | |
| from langchain.document_loaders.obs_file import OBSFileLoader | |
| from langchain.document_loaders.obsidian import ObsidianLoader | |
| from langchain.document_loaders.odt import UnstructuredODTLoader | |
| from langchain.document_loaders.onedrive import OneDriveLoader | |
| from langchain.document_loaders.onedrive_file import OneDriveFileLoader | |
| from langchain.document_loaders.open_city_data import OpenCityDataLoader | |
| from langchain.document_loaders.org_mode import UnstructuredOrgModeLoader | |
| from langchain.document_loaders.pdf import ( | |
| AmazonTextractPDFLoader, | |
| MathpixPDFLoader, | |
| OnlinePDFLoader, | |
| PDFMinerLoader, | |
| PDFMinerPDFasHTMLLoader, | |
| PDFPlumberLoader, | |
| PyMuPDFLoader, | |
| PyPDFDirectoryLoader, | |
| PyPDFium2Loader, | |
| PyPDFLoader, | |
| UnstructuredPDFLoader, | |
| ) | |
| from langchain.document_loaders.polars_dataframe import PolarsDataFrameLoader | |
| from langchain.document_loaders.powerpoint import UnstructuredPowerPointLoader | |
| from langchain.document_loaders.psychic import PsychicLoader | |
| from langchain.document_loaders.pubmed import PubMedLoader | |
| from langchain.document_loaders.pyspark_dataframe import PySparkDataFrameLoader | |
| from langchain.document_loaders.python import PythonLoader | |
| from langchain.document_loaders.readthedocs import ReadTheDocsLoader | |
| from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader | |
| from langchain.document_loaders.reddit import RedditPostsLoader | |
| from langchain.document_loaders.roam import RoamLoader | |
| from langchain.document_loaders.rocksetdb import RocksetLoader | |
| from langchain.document_loaders.rss import RSSFeedLoader | |
| from langchain.document_loaders.rst import UnstructuredRSTLoader | |
| from langchain.document_loaders.rtf import UnstructuredRTFLoader | |
| from langchain.document_loaders.s3_directory import S3DirectoryLoader | |
| from langchain.document_loaders.s3_file import S3FileLoader | |
| from langchain.document_loaders.sharepoint import SharePointLoader | |
| from langchain.document_loaders.sitemap import SitemapLoader | |
| from langchain.document_loaders.slack_directory import SlackDirectoryLoader | |
| from langchain.document_loaders.snowflake_loader import SnowflakeLoader | |
| from langchain.document_loaders.spreedly import SpreedlyLoader | |
| from langchain.document_loaders.srt import SRTLoader | |
| from langchain.document_loaders.stripe import StripeLoader | |
| from langchain.document_loaders.telegram import ( | |
| TelegramChatApiLoader, | |
| TelegramChatFileLoader, | |
| ) | |
| from langchain.document_loaders.tencent_cos_directory import TencentCOSDirectoryLoader | |
| from langchain.document_loaders.tencent_cos_file import TencentCOSFileLoader | |
| from langchain.document_loaders.tensorflow_datasets import TensorflowDatasetLoader | |
| from langchain.document_loaders.text import TextLoader | |
| from langchain.document_loaders.tomarkdown import ToMarkdownLoader | |
| from langchain.document_loaders.toml import TomlLoader | |
| from langchain.document_loaders.trello import TrelloLoader | |
| from langchain.document_loaders.tsv import UnstructuredTSVLoader | |
| from langchain.document_loaders.twitter import TwitterTweetLoader | |
| from langchain.document_loaders.unstructured import ( | |
| UnstructuredAPIFileIOLoader, | |
| UnstructuredAPIFileLoader, | |
| UnstructuredFileIOLoader, | |
| UnstructuredFileLoader, | |
| ) | |
| from langchain.document_loaders.url import UnstructuredURLLoader | |
| from langchain.document_loaders.url_playwright import PlaywrightURLLoader | |
| from langchain.document_loaders.url_selenium import SeleniumURLLoader | |
| from langchain.document_loaders.weather import WeatherDataLoader | |
| from langchain.document_loaders.web_base import WebBaseLoader | |
| from langchain.document_loaders.whatsapp_chat import WhatsAppChatLoader | |
| from langchain.document_loaders.wikipedia import WikipediaLoader | |
| from langchain.document_loaders.word_document import ( | |
| Docx2txtLoader, | |
| UnstructuredWordDocumentLoader, | |
| ) | |
| from langchain.document_loaders.xml import UnstructuredXMLLoader | |
| from langchain.document_loaders.xorbits import XorbitsLoader | |
| from langchain.document_loaders.youtube import ( | |
| GoogleApiClient, | |
| GoogleApiYoutubeLoader, | |
| YoutubeLoader, | |
| ) | |
| # Legacy: only for backwards compatibility. Use PyPDFLoader instead | |
| PagedPDFSplitter = PyPDFLoader | |
| # For backwards compatibility | |
| TelegramChatLoader = TelegramChatFileLoader | |
| __all__ = [ | |
| "AcreomLoader", | |
| "AsyncHtmlLoader", | |
| "AsyncChromiumLoader", | |
| "AZLyricsLoader", | |
| "AcreomLoader", | |
| "AirbyteCDKLoader", | |
| "AirbyteGongLoader", | |
| "AirbyteJSONLoader", | |
| "AirbyteHubspotLoader", | |
| "AirbyteSalesforceLoader", | |
| "AirbyteShopifyLoader", | |
| "AirbyteStripeLoader", | |
| "AirbyteTypeformLoader", | |
| "AirbyteZendeskSupportLoader", | |
| "AirtableLoader", | |
| "AmazonTextractPDFLoader", | |
| "ApifyDatasetLoader", | |
| "ArcGISLoader", | |
| "ArxivLoader", | |
| "AssemblyAIAudioTranscriptLoader", | |
| "AsyncHtmlLoader", | |
| "AzureBlobStorageContainerLoader", | |
| "AzureBlobStorageFileLoader", | |
| "BSHTMLLoader", | |
| "BibtexLoader", | |
| "BigQueryLoader", | |
| "BiliBiliLoader", | |
| "BlackboardLoader", | |
| "Blob", | |
| "BlobLoader", | |
| "BlockchainDocumentLoader", | |
| "BraveSearchLoader", | |
| "BrowserlessLoader", | |
| "CSVLoader", | |
| "ChatGPTLoader", | |
| "CoNLLULoader", | |
| "CollegeConfidentialLoader", | |
| "ConcurrentLoader", | |
| "ConfluenceLoader", | |
| "CubeSemanticLoader", | |
| "DataFrameLoader", | |
| "DatadogLogsLoader", | |
| "DiffbotLoader", | |
| "DirectoryLoader", | |
| "DiscordChatLoader", | |
| "DocugamiLoader", | |
| "DocusaurusLoader", | |
| "Docx2txtLoader", | |
| "DropboxLoader", | |
| "DuckDBLoader", | |
| "EmbaasBlobLoader", | |
| "EmbaasLoader", | |
| "EtherscanLoader", | |
| "EverNoteLoader", | |
| "FacebookChatLoader", | |
| "FaunaLoader", | |
| "FigmaFileLoader", | |
| "FileSystemBlobLoader", | |
| "GCSDirectoryLoader", | |
| "GCSFileLoader", | |
| "GeoDataFrameLoader", | |
| "GitHubIssuesLoader", | |
| "GitLoader", | |
| "GitbookLoader", | |
| "GoogleApiClient", | |
| "GoogleApiYoutubeLoader", | |
| "GoogleSpeechToTextLoader", | |
| "GoogleDriveLoader", | |
| "GutenbergLoader", | |
| "HNLoader", | |
| "HuggingFaceDatasetLoader", | |
| "IFixitLoader", | |
| "IMSDbLoader", | |
| "ImageCaptionLoader", | |
| "IuguLoader", | |
| "JSONLoader", | |
| "JoplinLoader", | |
| "LarkSuiteDocLoader", | |
| "LakeFSLoader", | |
| "MHTMLLoader", | |
| "MWDumpLoader", | |
| "MastodonTootsLoader", | |
| "MathpixPDFLoader", | |
| "MaxComputeLoader", | |
| "MergedDataLoader", | |
| "ModernTreasuryLoader", | |
| "MongodbLoader", | |
| "NewsURLLoader", | |
| "NotebookLoader", | |
| "NotionDBLoader", | |
| "NotionDirectoryLoader", | |
| "OBSDirectoryLoader", | |
| "OBSFileLoader", | |
| "ObsidianLoader", | |
| "OneDriveFileLoader", | |
| "OneDriveLoader", | |
| "OnlinePDFLoader", | |
| "OpenCityDataLoader", | |
| "OutlookMessageLoader", | |
| "PDFMinerLoader", | |
| "PDFMinerPDFasHTMLLoader", | |
| "PDFPlumberLoader", | |
| "PagedPDFSplitter", | |
| "PlaywrightURLLoader", | |
| "PolarsDataFrameLoader", | |
| "PsychicLoader", | |
| "PubMedLoader", | |
| "PyMuPDFLoader", | |
| "PyPDFDirectoryLoader", | |
| "PyPDFLoader", | |
| "PyPDFium2Loader", | |
| "PySparkDataFrameLoader", | |
| "PythonLoader", | |
| "RSSFeedLoader", | |
| "ReadTheDocsLoader", | |
| "RecursiveUrlLoader", | |
| "RedditPostsLoader", | |
| "RoamLoader", | |
| "RocksetLoader", | |
| "S3DirectoryLoader", | |
| "S3FileLoader", | |
| "SRTLoader", | |
| "SeleniumURLLoader", | |
| "SharePointLoader", | |
| "SitemapLoader", | |
| "SlackDirectoryLoader", | |
| "SnowflakeLoader", | |
| "SpreedlyLoader", | |
| "StripeLoader", | |
| "TelegramChatApiLoader", | |
| "TelegramChatFileLoader", | |
| "TelegramChatLoader", | |
| "TensorflowDatasetLoader", | |
| "TencentCOSDirectoryLoader", | |
| "TencentCOSFileLoader", | |
| "TextLoader", | |
| "ToMarkdownLoader", | |
| "TomlLoader", | |
| "TrelloLoader", | |
| "TwitterTweetLoader", | |
| "UnstructuredAPIFileIOLoader", | |
| "UnstructuredAPIFileLoader", | |
| "UnstructuredCSVLoader", | |
| "UnstructuredEPubLoader", | |
| "UnstructuredEmailLoader", | |
| "UnstructuredExcelLoader", | |
| "UnstructuredFileIOLoader", | |
| "UnstructuredFileLoader", | |
| "UnstructuredHTMLLoader", | |
| "UnstructuredImageLoader", | |
| "UnstructuredMarkdownLoader", | |
| "UnstructuredODTLoader", | |
| "UnstructuredOrgModeLoader", | |
| "UnstructuredPDFLoader", | |
| "UnstructuredPowerPointLoader", | |
| "UnstructuredRSTLoader", | |
| "UnstructuredRTFLoader", | |
| "UnstructuredTSVLoader", | |
| "UnstructuredURLLoader", | |
| "UnstructuredWordDocumentLoader", | |
| "UnstructuredXMLLoader", | |
| "WeatherDataLoader", | |
| "WebBaseLoader", | |
| "WhatsAppChatLoader", | |
| "WikipediaLoader", | |
| "XorbitsLoader", | |
| "YoutubeAudioLoader", | |
| "YoutubeLoader", | |
| ] | |