Vik Paruchuri
commited on
Commit
·
de660ff
1
Parent(s):
8cfeabd
Fix bug, improve quality
Browse files- README.md +14 -0
- marker/layout/layout.py +7 -1
- poetry.lock +187 -186
- pyproject.toml +4 -4
README.md
CHANGED
|
@@ -147,6 +147,20 @@ METADATA_FILE=../pdf_meta.json NUM_DEVICES=4 NUM_WORKERS=15 marker_chunk_convert
|
|
| 147 |
|
| 148 |
Note that the env variables above are specific to this script, and cannot be set in `local.env`.
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
# Output format
|
| 151 |
|
| 152 |
The output will be a markdown file, but there will also be a metadata json file that gives information about the conversion process. It has these fields:
|
|
|
|
| 147 |
|
| 148 |
Note that the env variables above are specific to this script, and cannot be set in `local.env`.
|
| 149 |
|
| 150 |
+
|
| 151 |
+
## Use from python
|
| 152 |
+
|
| 153 |
+
See the `convert_single_pdf` function for additional arguments that can be passed.
|
| 154 |
+
|
| 155 |
+
```python
|
| 156 |
+
from marker.convert import convert_single_pdf
|
| 157 |
+
from marker.models import load_all_models
|
| 158 |
+
|
| 159 |
+
fpath = "FILEPATH"
|
| 160 |
+
model_lst = load_all_models()
|
| 161 |
+
full_text, images, out_meta = convert_single_pdf(fpath, model_lst)
|
| 162 |
+
```
|
| 163 |
+
|
| 164 |
# Output format
|
| 165 |
|
| 166 |
The output will be a markdown file, but there will also be a metadata json file that gives information about the conversion process. It has these fields:
|
marker/layout/layout.py
CHANGED
|
@@ -63,12 +63,18 @@ def annotate_block_types(pages: List[Page]):
|
|
| 63 |
if min_dist_idx is None or dist < min_dist:
|
| 64 |
min_dist = dist
|
| 65 |
min_dist_idx = j
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
if min_dist_idx is not None:
|
| 67 |
block.block_type = page.blocks[min_dist_idx].block_type
|
| 68 |
|
| 69 |
for i, block in enumerate(page.blocks):
|
| 70 |
if block.block_type is None:
|
| 71 |
-
block.block_type = "
|
| 72 |
|
| 73 |
# Merge blocks together, preserving pdf order
|
| 74 |
curr_layout_idx = None
|
|
|
|
| 63 |
if min_dist_idx is None or dist < min_dist:
|
| 64 |
min_dist = dist
|
| 65 |
min_dist_idx = j
|
| 66 |
+
for line in block2.lines:
|
| 67 |
+
dist = block2.distance(line.bbox)
|
| 68 |
+
if min_dist_idx is None or dist < min_dist:
|
| 69 |
+
min_dist = dist
|
| 70 |
+
min_dist_idx = j
|
| 71 |
+
|
| 72 |
if min_dist_idx is not None:
|
| 73 |
block.block_type = page.blocks[min_dist_idx].block_type
|
| 74 |
|
| 75 |
for i, block in enumerate(page.blocks):
|
| 76 |
if block.block_type is None:
|
| 77 |
+
block.block_type = "Text"
|
| 78 |
|
| 79 |
# Merge blocks together, preserving pdf order
|
| 80 |
curr_layout_idx = None
|
poetry.lock
CHANGED
|
@@ -1120,13 +1120,13 @@ zstd = ["zstandard (>=0.18.0)"]
|
|
| 1120 |
|
| 1121 |
[[package]]
|
| 1122 |
name = "huggingface-hub"
|
| 1123 |
-
version = "0.
|
| 1124 |
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
|
| 1125 |
optional = false
|
| 1126 |
python-versions = ">=3.8.0"
|
| 1127 |
files = [
|
| 1128 |
-
{file = "huggingface_hub-0.
|
| 1129 |
-
{file = "huggingface_hub-0.
|
| 1130 |
]
|
| 1131 |
|
| 1132 |
[package.dependencies]
|
|
@@ -1139,16 +1139,16 @@ tqdm = ">=4.42.1"
|
|
| 1139 |
typing-extensions = ">=3.7.4.3"
|
| 1140 |
|
| 1141 |
[package.extras]
|
| 1142 |
-
all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "
|
| 1143 |
cli = ["InquirerPy (==0.3.4)"]
|
| 1144 |
-
dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio", "jedi", "
|
| 1145 |
fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
|
| 1146 |
hf-transfer = ["hf-transfer (>=0.1.4)"]
|
| 1147 |
-
inference = ["aiohttp"
|
| 1148 |
-
quality = ["mypy (==1.5.1)", "ruff (>=0.5.0)"]
|
| 1149 |
tensorflow = ["graphviz", "pydot", "tensorflow"]
|
| 1150 |
tensorflow-testing = ["keras (<3.0)", "tensorflow"]
|
| 1151 |
-
testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio
|
| 1152 |
torch = ["safetensors[torch]", "torch"]
|
| 1153 |
typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
|
| 1154 |
|
|
@@ -1684,72 +1684,72 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
|
|
| 1684 |
|
| 1685 |
[[package]]
|
| 1686 |
name = "markupsafe"
|
| 1687 |
-
version = "3.0.
|
| 1688 |
description = "Safely add untrusted strings to HTML/XML markup."
|
| 1689 |
optional = false
|
| 1690 |
python-versions = ">=3.9"
|
| 1691 |
files = [
|
| 1692 |
-
{file = "MarkupSafe-3.0.
|
| 1693 |
-
{file = "MarkupSafe-3.0.
|
| 1694 |
-
{file = "MarkupSafe-3.0.
|
| 1695 |
-
{file = "MarkupSafe-3.0.
|
| 1696 |
-
{file = "MarkupSafe-3.0.
|
| 1697 |
-
{file = "MarkupSafe-3.0.
|
| 1698 |
-
{file = "MarkupSafe-3.0.
|
| 1699 |
-
{file = "MarkupSafe-3.0.
|
| 1700 |
-
{file = "MarkupSafe-3.0.
|
| 1701 |
-
{file = "MarkupSafe-3.0.
|
| 1702 |
-
{file = "MarkupSafe-3.0.
|
| 1703 |
-
{file = "MarkupSafe-3.0.
|
| 1704 |
-
{file = "MarkupSafe-3.0.
|
| 1705 |
-
{file = "MarkupSafe-3.0.
|
| 1706 |
-
{file = "MarkupSafe-3.0.
|
| 1707 |
-
{file = "MarkupSafe-3.0.
|
| 1708 |
-
{file = "MarkupSafe-3.0.
|
| 1709 |
-
{file = "MarkupSafe-3.0.
|
| 1710 |
-
{file = "MarkupSafe-3.0.
|
| 1711 |
-
{file = "MarkupSafe-3.0.
|
| 1712 |
-
{file = "MarkupSafe-3.0.
|
| 1713 |
-
{file = "MarkupSafe-3.0.
|
| 1714 |
-
{file = "MarkupSafe-3.0.
|
| 1715 |
-
{file = "MarkupSafe-3.0.
|
| 1716 |
-
{file = "MarkupSafe-3.0.
|
| 1717 |
-
{file = "MarkupSafe-3.0.
|
| 1718 |
-
{file = "MarkupSafe-3.0.
|
| 1719 |
-
{file = "MarkupSafe-3.0.
|
| 1720 |
-
{file = "MarkupSafe-3.0.
|
| 1721 |
-
{file = "MarkupSafe-3.0.
|
| 1722 |
-
{file = "MarkupSafe-3.0.
|
| 1723 |
-
{file = "MarkupSafe-3.0.
|
| 1724 |
-
{file = "MarkupSafe-3.0.
|
| 1725 |
-
{file = "MarkupSafe-3.0.
|
| 1726 |
-
{file = "MarkupSafe-3.0.
|
| 1727 |
-
{file = "MarkupSafe-3.0.
|
| 1728 |
-
{file = "MarkupSafe-3.0.
|
| 1729 |
-
{file = "MarkupSafe-3.0.
|
| 1730 |
-
{file = "MarkupSafe-3.0.
|
| 1731 |
-
{file = "MarkupSafe-3.0.
|
| 1732 |
-
{file = "MarkupSafe-3.0.
|
| 1733 |
-
{file = "MarkupSafe-3.0.
|
| 1734 |
-
{file = "MarkupSafe-3.0.
|
| 1735 |
-
{file = "MarkupSafe-3.0.
|
| 1736 |
-
{file = "MarkupSafe-3.0.
|
| 1737 |
-
{file = "MarkupSafe-3.0.
|
| 1738 |
-
{file = "MarkupSafe-3.0.
|
| 1739 |
-
{file = "MarkupSafe-3.0.
|
| 1740 |
-
{file = "MarkupSafe-3.0.
|
| 1741 |
-
{file = "MarkupSafe-3.0.
|
| 1742 |
-
{file = "MarkupSafe-3.0.
|
| 1743 |
-
{file = "MarkupSafe-3.0.
|
| 1744 |
-
{file = "MarkupSafe-3.0.
|
| 1745 |
-
{file = "MarkupSafe-3.0.
|
| 1746 |
-
{file = "MarkupSafe-3.0.
|
| 1747 |
-
{file = "MarkupSafe-3.0.
|
| 1748 |
-
{file = "MarkupSafe-3.0.
|
| 1749 |
-
{file = "MarkupSafe-3.0.
|
| 1750 |
-
{file = "MarkupSafe-3.0.
|
| 1751 |
-
{file = "MarkupSafe-3.0.
|
| 1752 |
-
{file = "markupsafe-3.0.
|
| 1753 |
]
|
| 1754 |
|
| 1755 |
[[package]]
|
|
@@ -2510,13 +2510,13 @@ testing = ["docopt", "pytest"]
|
|
| 2510 |
|
| 2511 |
[[package]]
|
| 2512 |
name = "pdftext"
|
| 2513 |
-
version = "0.3.
|
| 2514 |
description = "Extract structured text from pdfs quickly"
|
| 2515 |
optional = false
|
| 2516 |
python-versions = "<4.0,>=3.10"
|
| 2517 |
files = [
|
| 2518 |
-
{file = "pdftext-0.3.
|
| 2519 |
-
{file = "pdftext-0.3.
|
| 2520 |
]
|
| 2521 |
|
| 2522 |
[package.dependencies]
|
|
@@ -2809,32 +2809,33 @@ files = [
|
|
| 2809 |
|
| 2810 |
[[package]]
|
| 2811 |
name = "psutil"
|
| 2812 |
-
version = "6.
|
| 2813 |
description = "Cross-platform lib for process and system monitoring in Python."
|
| 2814 |
optional = false
|
| 2815 |
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
|
| 2816 |
files = [
|
| 2817 |
-
{file = "psutil-6.
|
| 2818 |
-
{file = "psutil-6.
|
| 2819 |
-
{file = "psutil-6.
|
| 2820 |
-
{file = "psutil-6.
|
| 2821 |
-
{file = "psutil-6.
|
| 2822 |
-
{file = "psutil-6.
|
| 2823 |
-
{file = "psutil-6.
|
| 2824 |
-
{file = "psutil-6.
|
| 2825 |
-
{file = "psutil-6.
|
| 2826 |
-
{file = "psutil-6.
|
| 2827 |
-
{file = "psutil-6.
|
| 2828 |
-
{file = "psutil-6.
|
| 2829 |
-
{file = "psutil-6.
|
| 2830 |
-
{file = "psutil-6.
|
| 2831 |
-
{file = "psutil-6.
|
| 2832 |
-
{file = "psutil-6.
|
| 2833 |
-
{file = "psutil-6.
|
| 2834 |
]
|
| 2835 |
|
| 2836 |
[package.extras]
|
| 2837 |
-
|
|
|
|
| 2838 |
|
| 2839 |
[[package]]
|
| 2840 |
name = "ptyprocess"
|
|
@@ -4174,27 +4175,27 @@ snowflake = ["snowflake-connector-python (>=2.8.0)", "snowflake-snowpark-python[
|
|
| 4174 |
|
| 4175 |
[[package]]
|
| 4176 |
name = "surya-ocr"
|
| 4177 |
-
version = "0.6.
|
| 4178 |
description = "OCR, layout, reading order, and table recognition in 90+ languages"
|
| 4179 |
optional = false
|
| 4180 |
-
python-versions = "
|
| 4181 |
files = [
|
| 4182 |
-
{file = "surya_ocr-0.6.
|
| 4183 |
-
{file = "surya_ocr-0.6.
|
| 4184 |
]
|
| 4185 |
|
| 4186 |
[package.dependencies]
|
| 4187 |
filetype = ">=1.2.0,<2.0.0"
|
| 4188 |
ftfy = ">=6.1.3,<7.0.0"
|
| 4189 |
opencv-python = ">=4.9.0.80,<5.0.0.0"
|
| 4190 |
-
pdftext = ">=0.3.
|
| 4191 |
pillow = ">=10.2.0,<11.0.0"
|
| 4192 |
pydantic = ">=2.5.3,<3.0.0"
|
| 4193 |
pydantic-settings = ">=2.1.0,<3.0.0"
|
| 4194 |
pypdfium2 = ">=4.25.0,<5.0.0"
|
| 4195 |
python-dotenv = ">=1.0.0,<2.0.0"
|
| 4196 |
tabulate = ">=0.9.0,<0.10.0"
|
| 4197 |
-
torch = ">=2.
|
| 4198 |
transformers = ">=4.41.0,<5.0.0"
|
| 4199 |
|
| 4200 |
[[package]]
|
|
@@ -4216,13 +4217,13 @@ dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"]
|
|
| 4216 |
|
| 4217 |
[[package]]
|
| 4218 |
name = "tabled-pdf"
|
| 4219 |
-
version = "0.1.
|
| 4220 |
description = "Detect and recognize tables in PDFs and images."
|
| 4221 |
optional = false
|
| 4222 |
python-versions = "<4.0,>=3.10"
|
| 4223 |
files = [
|
| 4224 |
-
{file = "tabled_pdf-0.1.
|
| 4225 |
-
{file = "tabled_pdf-0.1.
|
| 4226 |
]
|
| 4227 |
|
| 4228 |
[package.dependencies]
|
|
@@ -4232,7 +4233,7 @@ pydantic-settings = ">=2.5.2,<3.0.0"
|
|
| 4232 |
pypdfium2 = ">=4.30.0,<5.0.0"
|
| 4233 |
python-dotenv = ">=1.0.1,<2.0.0"
|
| 4234 |
scikit-learn = ">=1.5.2,<2.0.0"
|
| 4235 |
-
surya-ocr = ">=0.6.
|
| 4236 |
tabulate = ">=0.9.0,<0.10.0"
|
| 4237 |
|
| 4238 |
[[package]]
|
|
@@ -4978,93 +4979,93 @@ files = [
|
|
| 4978 |
|
| 4979 |
[[package]]
|
| 4980 |
name = "yarl"
|
| 4981 |
-
version = "1.15.
|
| 4982 |
description = "Yet another URL library"
|
| 4983 |
optional = false
|
| 4984 |
python-versions = ">=3.9"
|
| 4985 |
files = [
|
| 4986 |
-
{file = "yarl-1.15.
|
| 4987 |
-
{file = "yarl-1.15.
|
| 4988 |
-
{file = "yarl-1.15.
|
| 4989 |
-
{file = "yarl-1.15.
|
| 4990 |
-
{file = "yarl-1.15.
|
| 4991 |
-
{file = "yarl-1.15.
|
| 4992 |
-
{file = "yarl-1.15.
|
| 4993 |
-
{file = "yarl-1.15.
|
| 4994 |
-
{file = "yarl-1.15.
|
| 4995 |
-
{file = "yarl-1.15.
|
| 4996 |
-
{file = "yarl-1.15.
|
| 4997 |
-
{file = "yarl-1.15.
|
| 4998 |
-
{file = "yarl-1.15.
|
| 4999 |
-
{file = "yarl-1.15.
|
| 5000 |
-
{file = "yarl-1.15.
|
| 5001 |
-
{file = "yarl-1.15.
|
| 5002 |
-
{file = "yarl-1.15.
|
| 5003 |
-
{file = "yarl-1.15.
|
| 5004 |
-
{file = "yarl-1.15.
|
| 5005 |
-
{file = "yarl-1.15.
|
| 5006 |
-
{file = "yarl-1.15.
|
| 5007 |
-
{file = "yarl-1.15.
|
| 5008 |
-
{file = "yarl-1.15.
|
| 5009 |
-
{file = "yarl-1.15.
|
| 5010 |
-
{file = "yarl-1.15.
|
| 5011 |
-
{file = "yarl-1.15.
|
| 5012 |
-
{file = "yarl-1.15.
|
| 5013 |
-
{file = "yarl-1.15.
|
| 5014 |
-
{file = "yarl-1.15.
|
| 5015 |
-
{file = "yarl-1.15.
|
| 5016 |
-
{file = "yarl-1.15.
|
| 5017 |
-
{file = "yarl-1.15.
|
| 5018 |
-
{file = "yarl-1.15.
|
| 5019 |
-
{file = "yarl-1.15.
|
| 5020 |
-
{file = "yarl-1.15.
|
| 5021 |
-
{file = "yarl-1.15.
|
| 5022 |
-
{file = "yarl-1.15.
|
| 5023 |
-
{file = "yarl-1.15.
|
| 5024 |
-
{file = "yarl-1.15.
|
| 5025 |
-
{file = "yarl-1.15.
|
| 5026 |
-
{file = "yarl-1.15.
|
| 5027 |
-
{file = "yarl-1.15.
|
| 5028 |
-
{file = "yarl-1.15.
|
| 5029 |
-
{file = "yarl-1.15.
|
| 5030 |
-
{file = "yarl-1.15.
|
| 5031 |
-
{file = "yarl-1.15.
|
| 5032 |
-
{file = "yarl-1.15.
|
| 5033 |
-
{file = "yarl-1.15.
|
| 5034 |
-
{file = "yarl-1.15.
|
| 5035 |
-
{file = "yarl-1.15.
|
| 5036 |
-
{file = "yarl-1.15.
|
| 5037 |
-
{file = "yarl-1.15.
|
| 5038 |
-
{file = "yarl-1.15.
|
| 5039 |
-
{file = "yarl-1.15.
|
| 5040 |
-
{file = "yarl-1.15.
|
| 5041 |
-
{file = "yarl-1.15.
|
| 5042 |
-
{file = "yarl-1.15.
|
| 5043 |
-
{file = "yarl-1.15.
|
| 5044 |
-
{file = "yarl-1.15.
|
| 5045 |
-
{file = "yarl-1.15.
|
| 5046 |
-
{file = "yarl-1.15.
|
| 5047 |
-
{file = "yarl-1.15.
|
| 5048 |
-
{file = "yarl-1.15.
|
| 5049 |
-
{file = "yarl-1.15.
|
| 5050 |
-
{file = "yarl-1.15.
|
| 5051 |
-
{file = "yarl-1.15.
|
| 5052 |
-
{file = "yarl-1.15.
|
| 5053 |
-
{file = "yarl-1.15.
|
| 5054 |
-
{file = "yarl-1.15.
|
| 5055 |
-
{file = "yarl-1.15.
|
| 5056 |
-
{file = "yarl-1.15.
|
| 5057 |
-
{file = "yarl-1.15.
|
| 5058 |
-
{file = "yarl-1.15.
|
| 5059 |
-
{file = "yarl-1.15.
|
| 5060 |
-
{file = "yarl-1.15.
|
| 5061 |
-
{file = "yarl-1.15.
|
| 5062 |
-
{file = "yarl-1.15.
|
| 5063 |
-
{file = "yarl-1.15.
|
| 5064 |
-
{file = "yarl-1.15.
|
| 5065 |
-
{file = "yarl-1.15.
|
| 5066 |
-
{file = "yarl-1.15.
|
| 5067 |
-
{file = "yarl-1.15.
|
| 5068 |
]
|
| 5069 |
|
| 5070 |
[package.dependencies]
|
|
@@ -5075,4 +5076,4 @@ propcache = ">=0.2.0"
|
|
| 5075 |
[metadata]
|
| 5076 |
lock-version = "2.0"
|
| 5077 |
python-versions = "^3.10"
|
| 5078 |
-
content-hash = "
|
|
|
|
| 1120 |
|
| 1121 |
[[package]]
|
| 1122 |
name = "huggingface-hub"
|
| 1123 |
+
version = "0.26.0"
|
| 1124 |
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
|
| 1125 |
optional = false
|
| 1126 |
python-versions = ">=3.8.0"
|
| 1127 |
files = [
|
| 1128 |
+
{file = "huggingface_hub-0.26.0-py3-none-any.whl", hash = "sha256:e43b8f36042b2103b48dea822535e08f5f089c4aa7013a067fca7b4ebf7f85a3"},
|
| 1129 |
+
{file = "huggingface_hub-0.26.0.tar.gz", hash = "sha256:524fe9281b015b76aa73ff1a83bf1cbe8cab851c9ac5ae5fcd2a25d5173ce629"},
|
| 1130 |
]
|
| 1131 |
|
| 1132 |
[package.dependencies]
|
|
|
|
| 1139 |
typing-extensions = ">=3.7.4.3"
|
| 1140 |
|
| 1141 |
[package.extras]
|
| 1142 |
+
all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
|
| 1143 |
cli = ["InquirerPy (==0.3.4)"]
|
| 1144 |
+
dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
|
| 1145 |
fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
|
| 1146 |
hf-transfer = ["hf-transfer (>=0.1.4)"]
|
| 1147 |
+
inference = ["aiohttp"]
|
| 1148 |
+
quality = ["libcst (==1.4.0)", "mypy (==1.5.1)", "ruff (>=0.5.0)"]
|
| 1149 |
tensorflow = ["graphviz", "pydot", "tensorflow"]
|
| 1150 |
tensorflow-testing = ["keras (<3.0)", "tensorflow"]
|
| 1151 |
+
testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
|
| 1152 |
torch = ["safetensors[torch]", "torch"]
|
| 1153 |
typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
|
| 1154 |
|
|
|
|
| 1684 |
|
| 1685 |
[[package]]
|
| 1686 |
name = "markupsafe"
|
| 1687 |
+
version = "3.0.2"
|
| 1688 |
description = "Safely add untrusted strings to HTML/XML markup."
|
| 1689 |
optional = false
|
| 1690 |
python-versions = ">=3.9"
|
| 1691 |
files = [
|
| 1692 |
+
{file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"},
|
| 1693 |
+
{file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"},
|
| 1694 |
+
{file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579"},
|
| 1695 |
+
{file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d"},
|
| 1696 |
+
{file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb"},
|
| 1697 |
+
{file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b"},
|
| 1698 |
+
{file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c"},
|
| 1699 |
+
{file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171"},
|
| 1700 |
+
{file = "MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50"},
|
| 1701 |
+
{file = "MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a"},
|
| 1702 |
+
{file = "MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d"},
|
| 1703 |
+
{file = "MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93"},
|
| 1704 |
+
{file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832"},
|
| 1705 |
+
{file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84"},
|
| 1706 |
+
{file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca"},
|
| 1707 |
+
{file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798"},
|
| 1708 |
+
{file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e"},
|
| 1709 |
+
{file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4"},
|
| 1710 |
+
{file = "MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d"},
|
| 1711 |
+
{file = "MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b"},
|
| 1712 |
+
{file = "MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf"},
|
| 1713 |
+
{file = "MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225"},
|
| 1714 |
+
{file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028"},
|
| 1715 |
+
{file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8"},
|
| 1716 |
+
{file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c"},
|
| 1717 |
+
{file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557"},
|
| 1718 |
+
{file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22"},
|
| 1719 |
+
{file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48"},
|
| 1720 |
+
{file = "MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30"},
|
| 1721 |
+
{file = "MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87"},
|
| 1722 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd"},
|
| 1723 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430"},
|
| 1724 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094"},
|
| 1725 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396"},
|
| 1726 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79"},
|
| 1727 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a"},
|
| 1728 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca"},
|
| 1729 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c"},
|
| 1730 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1"},
|
| 1731 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f"},
|
| 1732 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c"},
|
| 1733 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb"},
|
| 1734 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c"},
|
| 1735 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d"},
|
| 1736 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe"},
|
| 1737 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5"},
|
| 1738 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a"},
|
| 1739 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9"},
|
| 1740 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6"},
|
| 1741 |
+
{file = "MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f"},
|
| 1742 |
+
{file = "MarkupSafe-3.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a"},
|
| 1743 |
+
{file = "MarkupSafe-3.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff"},
|
| 1744 |
+
{file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13"},
|
| 1745 |
+
{file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144"},
|
| 1746 |
+
{file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29"},
|
| 1747 |
+
{file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0"},
|
| 1748 |
+
{file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0"},
|
| 1749 |
+
{file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178"},
|
| 1750 |
+
{file = "MarkupSafe-3.0.2-cp39-cp39-win32.whl", hash = "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f"},
|
| 1751 |
+
{file = "MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a"},
|
| 1752 |
+
{file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
|
| 1753 |
]
|
| 1754 |
|
| 1755 |
[[package]]
|
|
|
|
| 2510 |
|
| 2511 |
[[package]]
|
| 2512 |
name = "pdftext"
|
| 2513 |
+
version = "0.3.17"
|
| 2514 |
description = "Extract structured text from pdfs quickly"
|
| 2515 |
optional = false
|
| 2516 |
python-versions = "<4.0,>=3.10"
|
| 2517 |
files = [
|
| 2518 |
+
{file = "pdftext-0.3.17-py3-none-any.whl", hash = "sha256:fa43626d589aba4b5d04d7007cd5b3742731b69770b215fb515967a879a801da"},
|
| 2519 |
+
{file = "pdftext-0.3.17.tar.gz", hash = "sha256:199bf03337ccb19b6af8cc9c60ad490cc4d72f49a4f3db6de53eb561121a7f10"},
|
| 2520 |
]
|
| 2521 |
|
| 2522 |
[package.dependencies]
|
|
|
|
| 2809 |
|
| 2810 |
[[package]]
|
| 2811 |
name = "psutil"
|
| 2812 |
+
version = "6.1.0"
|
| 2813 |
description = "Cross-platform lib for process and system monitoring in Python."
|
| 2814 |
optional = false
|
| 2815 |
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
|
| 2816 |
files = [
|
| 2817 |
+
{file = "psutil-6.1.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0"},
|
| 2818 |
+
{file = "psutil-6.1.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:c0e0c00aa18ca2d3b2b991643b799a15fc8f0563d2ebb6040f64ce8dc027b942"},
|
| 2819 |
+
{file = "psutil-6.1.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:000d1d1ebd634b4efb383f4034437384e44a6d455260aaee2eca1e9c1b55f047"},
|
| 2820 |
+
{file = "psutil-6.1.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:5cd2bcdc75b452ba2e10f0e8ecc0b57b827dd5d7aaffbc6821b2a9a242823a76"},
|
| 2821 |
+
{file = "psutil-6.1.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:045f00a43c737f960d273a83973b2511430d61f283a44c96bf13a6e829ba8fdc"},
|
| 2822 |
+
{file = "psutil-6.1.0-cp27-none-win32.whl", hash = "sha256:9118f27452b70bb1d9ab3198c1f626c2499384935aaf55388211ad982611407e"},
|
| 2823 |
+
{file = "psutil-6.1.0-cp27-none-win_amd64.whl", hash = "sha256:a8506f6119cff7015678e2bce904a4da21025cc70ad283a53b099e7620061d85"},
|
| 2824 |
+
{file = "psutil-6.1.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6e2dcd475ce8b80522e51d923d10c7871e45f20918e027ab682f94f1c6351688"},
|
| 2825 |
+
{file = "psutil-6.1.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:0895b8414afafc526712c498bd9de2b063deaac4021a3b3c34566283464aff8e"},
|
| 2826 |
+
{file = "psutil-6.1.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dcbfce5d89f1d1f2546a2090f4fcf87c7f669d1d90aacb7d7582addece9fb38"},
|
| 2827 |
+
{file = "psutil-6.1.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:498c6979f9c6637ebc3a73b3f87f9eb1ec24e1ce53a7c5173b8508981614a90b"},
|
| 2828 |
+
{file = "psutil-6.1.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d905186d647b16755a800e7263d43df08b790d709d575105d419f8b6ef65423a"},
|
| 2829 |
+
{file = "psutil-6.1.0-cp36-cp36m-win32.whl", hash = "sha256:6d3fbbc8d23fcdcb500d2c9f94e07b1342df8ed71b948a2649b5cb060a7c94ca"},
|
| 2830 |
+
{file = "psutil-6.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1209036fbd0421afde505a4879dee3b2fd7b1e14fee81c0069807adcbbcca747"},
|
| 2831 |
+
{file = "psutil-6.1.0-cp37-abi3-win32.whl", hash = "sha256:1ad45a1f5d0b608253b11508f80940985d1d0c8f6111b5cb637533a0e6ddc13e"},
|
| 2832 |
+
{file = "psutil-6.1.0-cp37-abi3-win_amd64.whl", hash = "sha256:a8fb3752b491d246034fa4d279ff076501588ce8cbcdbb62c32fd7a377d996be"},
|
| 2833 |
+
{file = "psutil-6.1.0.tar.gz", hash = "sha256:353815f59a7f64cdaca1c0307ee13558a0512f6db064e92fe833784f08539c7a"},
|
| 2834 |
]
|
| 2835 |
|
| 2836 |
[package.extras]
|
| 2837 |
+
dev = ["black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "wheel"]
|
| 2838 |
+
test = ["pytest", "pytest-xdist", "setuptools"]
|
| 2839 |
|
| 2840 |
[[package]]
|
| 2841 |
name = "ptyprocess"
|
|
|
|
| 4175 |
|
| 4176 |
[[package]]
|
| 4177 |
name = "surya-ocr"
|
| 4178 |
+
version = "0.6.6"
|
| 4179 |
description = "OCR, layout, reading order, and table recognition in 90+ languages"
|
| 4180 |
optional = false
|
| 4181 |
+
python-versions = "<4.0,>=3.10"
|
| 4182 |
files = [
|
| 4183 |
+
{file = "surya_ocr-0.6.6-py3-none-any.whl", hash = "sha256:78ae448e5a753ac1b5df1ab57e708c25c5076fbbaaa6cfe06404daa4ee05dbd2"},
|
| 4184 |
+
{file = "surya_ocr-0.6.6.tar.gz", hash = "sha256:df0c394f7ee268ec1527dff6fa025b294660e6e95eb573226eb793fb11e214f3"},
|
| 4185 |
]
|
| 4186 |
|
| 4187 |
[package.dependencies]
|
| 4188 |
filetype = ">=1.2.0,<2.0.0"
|
| 4189 |
ftfy = ">=6.1.3,<7.0.0"
|
| 4190 |
opencv-python = ">=4.9.0.80,<5.0.0.0"
|
| 4191 |
+
pdftext = ">=0.3.17,<0.4.0"
|
| 4192 |
pillow = ">=10.2.0,<11.0.0"
|
| 4193 |
pydantic = ">=2.5.3,<3.0.0"
|
| 4194 |
pydantic-settings = ">=2.1.0,<3.0.0"
|
| 4195 |
pypdfium2 = ">=4.25.0,<5.0.0"
|
| 4196 |
python-dotenv = ">=1.0.0,<2.0.0"
|
| 4197 |
tabulate = ">=0.9.0,<0.10.0"
|
| 4198 |
+
torch = ">=2.4.1,<3.0.0"
|
| 4199 |
transformers = ">=4.41.0,<5.0.0"
|
| 4200 |
|
| 4201 |
[[package]]
|
|
|
|
| 4217 |
|
| 4218 |
[[package]]
|
| 4219 |
name = "tabled-pdf"
|
| 4220 |
+
version = "0.1.1"
|
| 4221 |
description = "Detect and recognize tables in PDFs and images."
|
| 4222 |
optional = false
|
| 4223 |
python-versions = "<4.0,>=3.10"
|
| 4224 |
files = [
|
| 4225 |
+
{file = "tabled_pdf-0.1.1-py3-none-any.whl", hash = "sha256:ec40cef5d5348127ccfbbf519ec83a8f54511986944cbf391521af316904789b"},
|
| 4226 |
+
{file = "tabled_pdf-0.1.1.tar.gz", hash = "sha256:1db0518881473fe33f402c59555d3d460ceed86a92b5fe0ecb07841f82760c5c"},
|
| 4227 |
]
|
| 4228 |
|
| 4229 |
[package.dependencies]
|
|
|
|
| 4233 |
pypdfium2 = ">=4.30.0,<5.0.0"
|
| 4234 |
python-dotenv = ">=1.0.1,<2.0.0"
|
| 4235 |
scikit-learn = ">=1.5.2,<2.0.0"
|
| 4236 |
+
surya-ocr = ">=0.6.6,<0.7.0"
|
| 4237 |
tabulate = ">=0.9.0,<0.10.0"
|
| 4238 |
|
| 4239 |
[[package]]
|
|
|
|
| 4979 |
|
| 4980 |
[[package]]
|
| 4981 |
name = "yarl"
|
| 4982 |
+
version = "1.15.5"
|
| 4983 |
description = "Yet another URL library"
|
| 4984 |
optional = false
|
| 4985 |
python-versions = ">=3.9"
|
| 4986 |
files = [
|
| 4987 |
+
{file = "yarl-1.15.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b6c57972a406ea0f61e3f28f2b3a780fb71fbe1d82d267afe5a2f889a83ee7e7"},
|
| 4988 |
+
{file = "yarl-1.15.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5c3ac5bdcc1375c8ee52784adf94edbce37c471dd2100a117cfef56fe8dbc2b4"},
|
| 4989 |
+
{file = "yarl-1.15.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:68d21d0563d82aaf46163eac529adac301b20be3181b8a2811f7bd5615466055"},
|
| 4990 |
+
{file = "yarl-1.15.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7d317fb80bc17ed4b34a9aad8b80cef34bea0993654f3e8566daf323def7ef9"},
|
| 4991 |
+
{file = "yarl-1.15.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed9c72d5361cfd5af5ccadffa8f8077f4929640e1f938aa0f4b92c5a24996ac5"},
|
| 4992 |
+
{file = "yarl-1.15.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bb707859218e8335447b210f41a755e7b1367c33e87add884128bba144694a7f"},
|
| 4993 |
+
{file = "yarl-1.15.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6563394492c96cb57f4dff0c69c63d2b28b5469c59c66f35a1e6451583cd0ab4"},
|
| 4994 |
+
{file = "yarl-1.15.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c2d1109c8d92059314cc34dd8f0a31f74b720dc140744923ed7ca228bf9b491"},
|
| 4995 |
+
{file = "yarl-1.15.5-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8fc727f0fb388debc771eaa7091c092bd2e8b6b4741b73354b8efadcf96d6031"},
|
| 4996 |
+
{file = "yarl-1.15.5-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:94189746c5ad62e1014a16298130e696fe593d031d442ef135fb7787b7a1f820"},
|
| 4997 |
+
{file = "yarl-1.15.5-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b06d8b05d0fafef204d635a4711283ddbf19c7c0facdc61b4b775f6e47e2d4be"},
|
| 4998 |
+
{file = "yarl-1.15.5-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:de6917946dc6bc237d4b354e38aa13a232e0c7948fdbdb160edee3862e9d735f"},
|
| 4999 |
+
{file = "yarl-1.15.5-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:34816f1d833433a16c4832562a050b0a60eac53dcb71b2032e6ebff82d74b6a7"},
|
| 5000 |
+
{file = "yarl-1.15.5-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:19e2a4b2935f95fad0949f420514c5d862f5f18058fbbfd8854f496a97d9fd87"},
|
| 5001 |
+
{file = "yarl-1.15.5-cp310-cp310-win32.whl", hash = "sha256:30ca64521f1a96b72886dd9e8652f16eab11891b4572dcfcfc1ad6d6ccb27abd"},
|
| 5002 |
+
{file = "yarl-1.15.5-cp310-cp310-win_amd64.whl", hash = "sha256:86648c53b10c53db8b967a75fb41e0c89dbec7398f6525e34af2b6c456bb0ac0"},
|
| 5003 |
+
{file = "yarl-1.15.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e652aa9f8dfa808bc5b2da4d1f4e286cf1d640570fdfa72ffc0c1d16ba114651"},
|
| 5004 |
+
{file = "yarl-1.15.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:21050b6cd569980fe20ceeab4baeb900d3f7247270475e42bafe117416a5496c"},
|
| 5005 |
+
{file = "yarl-1.15.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:18940191ec9a83bbfe63eea61c3e9d12474bb910d5613bce8fa46e84a80b75b2"},
|
| 5006 |
+
{file = "yarl-1.15.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a082dc948045606f62dca0228ab24f13737180b253378d6443f5b2b9ef8beefe"},
|
| 5007 |
+
{file = "yarl-1.15.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0a843e692f9d5402b3455653f4607dc521de2385f01c5cad7ba4a87c46e2ea8d"},
|
| 5008 |
+
{file = "yarl-1.15.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5093a453176a4fad4f9c3006f507cf300546190bb3e27944275a37cfd6323a65"},
|
| 5009 |
+
{file = "yarl-1.15.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2597a589859b94d0a5e2f5d30fee95081867926e57cb751f8b44a7dd92da4e79"},
|
| 5010 |
+
{file = "yarl-1.15.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f5a1ca6eaabfe62718b87eac06d9a47b30cf92ffa065fee9196d3ecd24a3cf1"},
|
| 5011 |
+
{file = "yarl-1.15.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4ac83b307cc4b8907345b52994055c6c3c2601ceb6fcb94c5ed6a93c6b4e8257"},
|
| 5012 |
+
{file = "yarl-1.15.5-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:325e2beb2cd8654b276e7686a3cd203628dd3fe32d5c616e632bc35a2901fb16"},
|
| 5013 |
+
{file = "yarl-1.15.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:75d04ba8ed335042328086e643e01165e0c24598216f72da709b375930ae3bdb"},
|
| 5014 |
+
{file = "yarl-1.15.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7abd7d15aedb3961a967cc65f8144dbbca42e3626a21c5f4f29919cf43eeafb9"},
|
| 5015 |
+
{file = "yarl-1.15.5-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:294c742a273f44511f14b03a9e06b66094dcdf4bbb75a5e23fead548fd5310ae"},
|
| 5016 |
+
{file = "yarl-1.15.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:63d46606b20f80a6476f1044bab78e1a69c2e0747f174583e2f12fc70bad2170"},
|
| 5017 |
+
{file = "yarl-1.15.5-cp311-cp311-win32.whl", hash = "sha256:b1217102a455e3ac9ac293081093f21f0183e978c7692171ff669fee5296fa28"},
|
| 5018 |
+
{file = "yarl-1.15.5-cp311-cp311-win_amd64.whl", hash = "sha256:5848500b6a01497560969e8c3a7eb1b2570853c74a0ca6f67ebaf6064106c49b"},
|
| 5019 |
+
{file = "yarl-1.15.5-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d3309ee667f2d9c7ac9ecf44620d6b274bfdd8065b8c5019ff6795dd887b8fed"},
|
| 5020 |
+
{file = "yarl-1.15.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:96ce879799fee124d241ea3b84448378f638e290c49493d00b706f3fd57ec22b"},
|
| 5021 |
+
{file = "yarl-1.15.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c884dfa56b050f718ea3cbbfd972e29a6f07f63a7449b10d9a20d64f7eec92e2"},
|
| 5022 |
+
{file = "yarl-1.15.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0327081978fe186c3390dd4f73f95f825d0bb9c74967e22c2a1a87735974d8f5"},
|
| 5023 |
+
{file = "yarl-1.15.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:524b3bb7dff320e305bc979c65eddc0342548c56ea9241502f907853fe53c408"},
|
| 5024 |
+
{file = "yarl-1.15.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd56de8b645421ff09c993fdb0ee9c5a3b50d290a8f55793b500d99b34d0c1ce"},
|
| 5025 |
+
{file = "yarl-1.15.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c166ad987265bb343be58cdf4fbc4478cc1d81f2246d2be9a15f94393b269faa"},
|
| 5026 |
+
{file = "yarl-1.15.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d56980374a10c74255fcea6ebcfb0aeca7166d212ee9fd7e823ddef35fb62ad0"},
|
| 5027 |
+
{file = "yarl-1.15.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cbf36099a9b407e1456dbf55844743a98603fcba32d2a46fb3a698d926facf1b"},
|
| 5028 |
+
{file = "yarl-1.15.5-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:d7fa4b033e2f267e37aabcc36949fa89f9f1716a723395912147f9cf3fb437c7"},
|
| 5029 |
+
{file = "yarl-1.15.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:bb129f77ddaea2d8e6e00417b8d907448de3407af4eddacca0a515574ad71493"},
|
| 5030 |
+
{file = "yarl-1.15.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:68e837b3edfcd037f9706157e7cb8efda832de6248c7d9e893e2638356dfae5d"},
|
| 5031 |
+
{file = "yarl-1.15.5-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5b8af4165e097ff84d9bbb97bb4f4d7f71b9c1c9565a2d0e27d93e5f92dae220"},
|
| 5032 |
+
{file = "yarl-1.15.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:70d074d5a96e0954fe6db81ff356f4361397da1cda3f7c127fc0902f671a087e"},
|
| 5033 |
+
{file = "yarl-1.15.5-cp312-cp312-win32.whl", hash = "sha256:362da97ad4360e4ef1dd24ccdd3bceb18332da7f40026a42f49b7edd686e31c3"},
|
| 5034 |
+
{file = "yarl-1.15.5-cp312-cp312-win_amd64.whl", hash = "sha256:9aa054d97033beac9cb9b19b7c0b8784b85b12cd17879087ca6bffba57884e02"},
|
| 5035 |
+
{file = "yarl-1.15.5-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5fadcf532fd9f6cbad71485ef8c2462dd9a91d3efc72ca01eb0970792c92552a"},
|
| 5036 |
+
{file = "yarl-1.15.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8b7dd6983c81523f9de0ae6334c3b7a3cb33283936e0525f80c4f713f54a9bb6"},
|
| 5037 |
+
{file = "yarl-1.15.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fcfd663dc88465ebe41c7c938bdc91c4b01cda96a0d64bf38fd66c1877323771"},
|
| 5038 |
+
{file = "yarl-1.15.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd529e637cd23204bd82072f6637cff7af2516ad2c132e8f3342cbc84871f7d1"},
|
| 5039 |
+
{file = "yarl-1.15.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b30f13fac56598474071a4f1ecd66c78fdaf2f8619042d7ca135f72dbb348cf"},
|
| 5040 |
+
{file = "yarl-1.15.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:44088ec0be82fba118ed29b6b429f80bf295297727adae4c257ac297e01e8bcd"},
|
| 5041 |
+
{file = "yarl-1.15.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:607683991bab8607e5158cd290dd8fdaa613442aeab802fe1c237d3a3eee7358"},
|
| 5042 |
+
{file = "yarl-1.15.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:da48cdff56b01ea4282a6d04b83b07a2088351a4a3ff7aacc1e7e9b6b04b90b9"},
|
| 5043 |
+
{file = "yarl-1.15.5-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9162ea117ce8bad8ebc95b7376b4135988acd888d2cf4702f8281e3c11f8b81f"},
|
| 5044 |
+
{file = "yarl-1.15.5-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:e8aa19c39cb20bfb16f0266df175a6004943122cf20707fbf0cacc21f6468a25"},
|
| 5045 |
+
{file = "yarl-1.15.5-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5d6be369488d503c8edc14e2f63d71ab2a607041ad216a8ad444fa18e8dea792"},
|
| 5046 |
+
{file = "yarl-1.15.5-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6e2c674cfe4c03ad7a4d536b1f808221f0d11a360486b4b032d2557c0bd633ad"},
|
| 5047 |
+
{file = "yarl-1.15.5-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:041bafaa82b77fd4ec2826d42a55461ec86d999adf7ed9644eef7e8a9febb366"},
|
| 5048 |
+
{file = "yarl-1.15.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2eeb9ba53c055740cd282ae9d34eb7970d65e73a46f15adec4b0c1b0f2e55cc2"},
|
| 5049 |
+
{file = "yarl-1.15.5-cp313-cp313-win32.whl", hash = "sha256:73143dd279e641543da52c55652ad7b4c7c5f79e797f124f58f04cc060f14271"},
|
| 5050 |
+
{file = "yarl-1.15.5-cp313-cp313-win_amd64.whl", hash = "sha256:94ab1185900f43760d5487c8e49f5f1a66f864e36092f282f1813597479b9dfa"},
|
| 5051 |
+
{file = "yarl-1.15.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6b3d2767bd64c62909ea33525b954ba05c8f9726bfdf2141d175da4e344f19ae"},
|
| 5052 |
+
{file = "yarl-1.15.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:44359c52af9c383e5107f3b6301446fc8269599721fa42fafb2afb5f31a42dcb"},
|
| 5053 |
+
{file = "yarl-1.15.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6493da9ba5c551978c679ab04856c2cf8f79c316e8ec8c503460a135705edc3b"},
|
| 5054 |
+
{file = "yarl-1.15.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a6b6e95bc621c11cf9ff21012173337e789f2461ebc3b4e5bf65c74ef69adb8"},
|
| 5055 |
+
{file = "yarl-1.15.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7983290ede3aaa2c9620879530849532529b4dcbf5b12a0b6a91163a773eadb9"},
|
| 5056 |
+
{file = "yarl-1.15.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:07a4b53abe85813c538b9cdbb02909ebe3734e3af466a587df516e960d500cc8"},
|
| 5057 |
+
{file = "yarl-1.15.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5882faa2a6e684f65ee44f18c701768749a950cbd5e72db452fc07805f6bdec0"},
|
| 5058 |
+
{file = "yarl-1.15.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e27861251d9c094f641d39a8a78dd2371fb9a252ea2f689d1ad353a31d46a0bc"},
|
| 5059 |
+
{file = "yarl-1.15.5-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8669a110f655c9eb22f16fb68a7d4942020aeaa09f1def584a80183e3e89953c"},
|
| 5060 |
+
{file = "yarl-1.15.5-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:10bfe0bef4cf5ea0383886beda004071faadedf2647048b9f876664284c5b60d"},
|
| 5061 |
+
{file = "yarl-1.15.5-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f7de0d4b6b4d8a77e422eb54d765255c0ec6883ee03b8fd537101633948619d7"},
|
| 5062 |
+
{file = "yarl-1.15.5-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:00bb3a559d7bd006a5302ecd7e409916939106a8cdbe31f4eb5e5b9ffcca57ea"},
|
| 5063 |
+
{file = "yarl-1.15.5-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:06ec070a2d71415f90dbe9d70af3158e7da97a128519dba2d1581156ee27fb92"},
|
| 5064 |
+
{file = "yarl-1.15.5-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b997a806846c00d1f41d6a251803732837771b2091bead7566f68820e317bfe7"},
|
| 5065 |
+
{file = "yarl-1.15.5-cp39-cp39-win32.whl", hash = "sha256:7825506fbee4055265528ec3532a8197ff26fc53d4978917a4c8ddbb4c1667d7"},
|
| 5066 |
+
{file = "yarl-1.15.5-cp39-cp39-win_amd64.whl", hash = "sha256:71730658be0b5de7c570a9795d7404c577b2313c1db370407092c66f70e04ccb"},
|
| 5067 |
+
{file = "yarl-1.15.5-py3-none-any.whl", hash = "sha256:625f31d6650829fba4030b4e7bdb2d69e41510dddfa29a1da27076c199521757"},
|
| 5068 |
+
{file = "yarl-1.15.5.tar.gz", hash = "sha256:8249147ee81c1cf4d1dc6f26ba28a1b9d92751529f83c308ad02164bb93abd0d"},
|
| 5069 |
]
|
| 5070 |
|
| 5071 |
[package.dependencies]
|
|
|
|
| 5076 |
[metadata]
|
| 5077 |
lock-version = "2.0"
|
| 5078 |
python-versions = "^3.10"
|
| 5079 |
+
content-hash = "b5016a59dd59840e90f8e59b8d019868648d447905122c690c37dbda8369d726"
|
pyproject.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
[tool.poetry]
|
| 2 |
name = "marker-pdf"
|
| 3 |
-
version = "0.3.
|
| 4 |
description = "Convert PDF to markdown with high speed and accuracy."
|
| 5 |
authors = ["Vik Paruchuri <github@vikas.sh>"]
|
| 6 |
readme = "README.md"
|
|
@@ -32,11 +32,11 @@ tabulate = "^0.9.0"
|
|
| 32 |
ftfy = "^6.1.1"
|
| 33 |
texify = "^0.2.0"
|
| 34 |
rapidfuzz = "^3.8.1"
|
| 35 |
-
surya-ocr = "^0.6.
|
| 36 |
filetype = "^1.2.0"
|
| 37 |
regex = "^2024.4.28"
|
| 38 |
-
pdftext = "^0.3.
|
| 39 |
-
tabled-pdf = "^0.1.
|
| 40 |
|
| 41 |
[tool.poetry.group.dev.dependencies]
|
| 42 |
jupyter = "^1.0.0"
|
|
|
|
| 1 |
[tool.poetry]
|
| 2 |
name = "marker-pdf"
|
| 3 |
+
version = "0.3.1"
|
| 4 |
description = "Convert PDF to markdown with high speed and accuracy."
|
| 5 |
authors = ["Vik Paruchuri <github@vikas.sh>"]
|
| 6 |
readme = "README.md"
|
|
|
|
| 32 |
ftfy = "^6.1.1"
|
| 33 |
texify = "^0.2.0"
|
| 34 |
rapidfuzz = "^3.8.1"
|
| 35 |
+
surya-ocr = "^0.6.6"
|
| 36 |
filetype = "^1.2.0"
|
| 37 |
regex = "^2024.4.28"
|
| 38 |
+
pdftext = "^0.3.17"
|
| 39 |
+
tabled-pdf = "^0.1.1"
|
| 40 |
|
| 41 |
[tool.poetry.group.dev.dependencies]
|
| 42 |
jupyter = "^1.0.0"
|