File size: 3,101 Bytes
a80f6e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import sys
from pathlib import Path

from langchain_community import document_loaders
from langchain_core.document_loaders.base import BaseLoader

DOCUMENT_LOADER_TEMPLATE = """\
---
sidebar_position: 0
sidebar_class_name: hidden
keywords: [compatibility]
custom_edit_url:
hide_table_of_contents: true
---

# Document loaders

:::info

If you'd like to write your own document loader, see [this how-to](/docs/how_to/document_loader_custom/).
If you'd like to contribute an integration, see [Contributing integrations](/docs/contributing/how_to/integrations/).

:::


## Features

The following table shows the feature support for all document loaders.

{table}

"""

DEPRECATED = [
    "AirbyteCDKLoader",
    "AirbyteGongLoader",
    "AirbyteHubspotLoader",
    "AirbyteJSONLoader",
    "AirbyteSalesforceLoader",
    "AirbyteShopifyLoader",
    "AirbyteStripeLoader",
    "AirbyteTypeformLoader",
    "AirbyteZendeskSupportLoader",
]


def get_document_loader_table() -> str:
    """Get the table of document loaders."""

    doc_loaders_feat_table = {}
    for cm in document_loaders.__all__:
        doc_loaders_feat_table[cm] = {}
        cls = getattr(document_loaders, cm)
        if issubclass(cls, BaseLoader):
            for feat in ("aload", "alazy_load", ("lazy_load", "lazy_loading")):
                if isinstance(feat, tuple):
                    feat, name = feat
                else:
                    feat, name = feat, feat
                doc_loaders_feat_table[cm][name] = getattr(cls, feat) != getattr(
                    BaseLoader, feat
                )
            native_async = (
                doc_loaders_feat_table[cm]["aload"]
                or doc_loaders_feat_table[cm]["alazy_load"]
            )
            del doc_loaders_feat_table[cm]["aload"]
            del doc_loaders_feat_table[cm]["alazy_load"]
            doc_loaders_feat_table[cm]["native_async"] = native_async
            doc_loaders_feat_table[cm]["description"] = (cls.__doc__ or "").split("\n")[
                0
            ]

    header = ["loader", "description", "lazy_loading", "native_async"]
    title = ["Document Loader", "Description", "Lazy loading", "Native async support"]
    rows = [title, [":-"] * 2 + [":-:"] * (len(title) - 2)]
    for loader, feats in sorted(doc_loaders_feat_table.items()):
        if not feats or loader in DEPRECATED:
            continue
        rows += [
            [loader, feats["description"]]
            + ["✅" if feats.get(h) else "❌" for h in header[2:]]
        ]
    return "\n".join(["|".join(row) for row in rows])


if __name__ == "__main__":
    output_dir = Path(sys.argv[1])
    output_integrations_dir = output_dir / "integrations"
    output_integrations_dir_doc_loaders = output_integrations_dir / "document_loaders"
    output_integrations_dir_doc_loaders.mkdir(parents=True, exist_ok=True)

    document_loader_page = DOCUMENT_LOADER_TEMPLATE.format(
        table=get_document_loader_table()
    )
    with open(output_integrations_dir / "document_loaders" / "index.mdx", "w") as f:
        f.write(document_loader_page)