Vik Paruchuri
commited on
Commit
·
71a77dd
1
Parent(s):
0fd5e0c
Bump to newer google client lib
Browse files- benchmarks/overall/scorers/llm.py +18 -19
- benchmarks/table/gemini.py +17 -18
- marker/builders/llm_layout.py +15 -16
- marker/converters/pdf.py +0 -1
- marker/processors/llm/llm_complex.py +6 -14
- marker/processors/llm/llm_equation.py +6 -13
- marker/processors/llm/llm_form.py +7 -14
- marker/processors/llm/llm_handwriting.py +6 -14
- marker/processors/llm/llm_image_description.py +6 -13
- marker/processors/llm/llm_table.py +5 -13
- marker/processors/llm/llm_table_merge.py +11 -28
- marker/processors/llm/llm_text.py +0 -153
- marker/processors/llm/utils.py +37 -20
- poetry.lock +216 -380
- pyproject.toml +1 -1
- tests/processors/test_llm_processors.py +0 -20
benchmarks/overall/scorers/llm.py
CHANGED
|
@@ -4,8 +4,8 @@ import time
|
|
| 4 |
from typing import List
|
| 5 |
|
| 6 |
from PIL import Image
|
| 7 |
-
from google.
|
| 8 |
-
from google
|
| 9 |
import pypdfium2 as pdfium
|
| 10 |
|
| 11 |
from benchmarks.overall.scorers import BaseScorer, BlockScores
|
|
@@ -106,15 +106,14 @@ class LLMScorer(BaseScorer):
|
|
| 106 |
req_keys = text_keys + score_keys
|
| 107 |
properties = {}
|
| 108 |
for key in req_keys:
|
| 109 |
-
content_type =
|
| 110 |
-
properties[key] =
|
| 111 |
-
|
| 112 |
-
response_schema = content.Schema(
|
| 113 |
-
type=content.Type.OBJECT,
|
| 114 |
-
required=req_keys,
|
| 115 |
-
properties=properties
|
| 116 |
-
)
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
prompt = rating_prompt.replace("{{markdown}}", markdown)
|
| 119 |
response = self.llm_response_wrapper([img, prompt], response_schema)
|
| 120 |
assert all([k in response for k in req_keys]), f"Missing keys in response: {response}"
|
|
@@ -124,23 +123,23 @@ class LLMScorer(BaseScorer):
|
|
| 124 |
}
|
| 125 |
|
| 126 |
def llm_response_wrapper(self, prompt, response_schema, depth=0):
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
|
|
|
| 130 |
try:
|
| 131 |
-
responses =
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
"temperature": 0,
|
| 136 |
"response_schema": response_schema,
|
| 137 |
"response_mime_type": "application/json",
|
| 138 |
},
|
| 139 |
-
request_options={'timeout': 60}
|
| 140 |
)
|
| 141 |
output = responses.candidates[0].content.parts[0].text
|
| 142 |
return json.loads(output)
|
| 143 |
-
except
|
| 144 |
print(f"Hit Gemini rate limit, waiting 120 seconds")
|
| 145 |
time.sleep(120)
|
| 146 |
if depth > 2:
|
|
|
|
| 4 |
from typing import List
|
| 5 |
|
| 6 |
from PIL import Image
|
| 7 |
+
from google.genai.errors import APIError
|
| 8 |
+
from google import genai
|
| 9 |
import pypdfium2 as pdfium
|
| 10 |
|
| 11 |
from benchmarks.overall.scorers import BaseScorer, BlockScores
|
|
|
|
| 106 |
req_keys = text_keys + score_keys
|
| 107 |
properties = {}
|
| 108 |
for key in req_keys:
|
| 109 |
+
content_type = "INTEGER" if key in score_keys else "STRING"
|
| 110 |
+
properties[key] = {"type": content_type}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
+
response_schema = {
|
| 113 |
+
"required": req_keys,
|
| 114 |
+
"properties": properties,
|
| 115 |
+
"type": "OBJECT"
|
| 116 |
+
}
|
| 117 |
prompt = rating_prompt.replace("{{markdown}}", markdown)
|
| 118 |
response = self.llm_response_wrapper([img, prompt], response_schema)
|
| 119 |
assert all([k in response for k in req_keys]), f"Missing keys in response: {response}"
|
|
|
|
| 123 |
}
|
| 124 |
|
| 125 |
def llm_response_wrapper(self, prompt, response_schema, depth=0):
|
| 126 |
+
client = genai.Client(
|
| 127 |
+
api_key=settings.GOOGLE_API_KEY,
|
| 128 |
+
http_options={"timeout": 60000}
|
| 129 |
+
)
|
| 130 |
try:
|
| 131 |
+
responses = client.models.generate_content(
|
| 132 |
+
model="gemini-2.0-flash",
|
| 133 |
+
contents=prompt,
|
| 134 |
+
config={
|
| 135 |
"temperature": 0,
|
| 136 |
"response_schema": response_schema,
|
| 137 |
"response_mime_type": "application/json",
|
| 138 |
},
|
|
|
|
| 139 |
)
|
| 140 |
output = responses.candidates[0].content.parts[0].text
|
| 141 |
return json.loads(output)
|
| 142 |
+
except APIError as e:
|
| 143 |
print(f"Hit Gemini rate limit, waiting 120 seconds")
|
| 144 |
time.sleep(120)
|
| 145 |
if depth > 2:
|
benchmarks/table/gemini.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
| 1 |
import json
|
| 2 |
from PIL import Image
|
| 3 |
-
|
| 4 |
-
from google.
|
|
|
|
|
|
|
|
|
|
| 5 |
from marker.settings import settings
|
| 6 |
|
| 7 |
prompt = """
|
|
@@ -19,30 +22,26 @@ Guidelines:
|
|
| 19 |
3. Output only the HTML for the table, starting with the <table> tag and ending with the </table> tag.
|
| 20 |
""".strip()
|
| 21 |
|
| 22 |
-
|
|
|
|
| 23 |
|
| 24 |
def gemini_table_rec(image: Image.Image):
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
properties={
|
| 29 |
-
"table_html": content.Schema(
|
| 30 |
-
type=content.Type.STRING,
|
| 31 |
-
)
|
| 32 |
-
}
|
| 33 |
)
|
| 34 |
|
| 35 |
-
|
|
|
|
| 36 |
|
| 37 |
-
responses =
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
"temperature": 0,
|
| 42 |
-
"response_schema":
|
| 43 |
"response_mime_type": "application/json",
|
| 44 |
},
|
| 45 |
-
request_options={'timeout': 60}
|
| 46 |
)
|
| 47 |
|
| 48 |
output = responses.candidates[0].content.parts[0].text
|
|
|
|
| 1 |
import json
|
| 2 |
from PIL import Image
|
| 3 |
+
from google import genai
|
| 4 |
+
from google.genai import types
|
| 5 |
+
from io import BytesIO
|
| 6 |
+
from pydantic import BaseModel
|
| 7 |
+
|
| 8 |
from marker.settings import settings
|
| 9 |
|
| 10 |
prompt = """
|
|
|
|
| 22 |
3. Output only the HTML for the table, starting with the <table> tag and ending with the </table> tag.
|
| 23 |
""".strip()
|
| 24 |
|
| 25 |
+
class TableSchema(BaseModel):
|
| 26 |
+
table_html: str
|
| 27 |
|
| 28 |
def gemini_table_rec(image: Image.Image):
|
| 29 |
+
client = genai.Client(
|
| 30 |
+
api_key=settings.GOOGLE_API_KEY,
|
| 31 |
+
http_options={"timeout": 60000}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
)
|
| 33 |
|
| 34 |
+
image_bytes = BytesIO()
|
| 35 |
+
image.save(image_bytes, format="PNG")
|
| 36 |
|
| 37 |
+
responses = client.models.generate_content(
|
| 38 |
+
model="gemini-2.0-flash",
|
| 39 |
+
contents=[types.Part.from_bytes(data=image_bytes.getvalue(), mime_type="image/png"), prompt], # According to gemini docs, it performs better if the image is the first element
|
| 40 |
+
config={
|
| 41 |
"temperature": 0,
|
| 42 |
+
"response_schema": TableSchema,
|
| 43 |
"response_mime_type": "application/json",
|
| 44 |
},
|
|
|
|
| 45 |
)
|
| 46 |
|
| 47 |
output = responses.candidates[0].content.parts[0].text
|
marker/builders/llm_layout.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 2 |
from typing import Annotated
|
| 3 |
|
| 4 |
-
from google.ai.generativelanguage_v1beta.types import content
|
| 5 |
from surya.layout import LayoutPredictor
|
| 6 |
from surya.ocr_error import OCRErrorPredictor
|
| 7 |
from tqdm import tqdm
|
|
|
|
| 8 |
|
| 9 |
from marker.builders.layout import LayoutBuilder
|
| 10 |
from marker.processors.llm import GoogleModel
|
|
@@ -41,7 +41,7 @@ class LLMLayoutBuilder(LayoutBuilder):
|
|
| 41 |
max_retries: Annotated[
|
| 42 |
int,
|
| 43 |
"The maximum number of retries to use for the Gemini model.",
|
| 44 |
-
] =
|
| 45 |
max_concurrency: Annotated[
|
| 46 |
int,
|
| 47 |
"The maximum number of concurrent requests to make to the Gemini model.",
|
|
@@ -158,21 +158,15 @@ Respond only with one of `Figure`, `Picture`, `ComplexRegion`, `Table`, or `Form
|
|
| 158 |
|
| 159 |
def process_block_relabeling(self, document: Document, page: PageGroup, block: Block, prompt: str):
|
| 160 |
image = self.extract_image(document, block)
|
| 161 |
-
response_schema = content.Schema(
|
| 162 |
-
type=content.Type.OBJECT,
|
| 163 |
-
enum=[],
|
| 164 |
-
required=["image_description", "label"],
|
| 165 |
-
properties={
|
| 166 |
-
"image_description": content.Schema(
|
| 167 |
-
type=content.Type.STRING,
|
| 168 |
-
),
|
| 169 |
-
"label": content.Schema(
|
| 170 |
-
type=content.Type.STRING,
|
| 171 |
-
),
|
| 172 |
-
},
|
| 173 |
-
)
|
| 174 |
|
| 175 |
-
response = self.model.generate_response(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
generated_label = None
|
| 177 |
if response and "label" in response:
|
| 178 |
generated_label = response["label"]
|
|
@@ -188,3 +182,8 @@ Respond only with one of `Figure`, `Picture`, `ComplexRegion`, `Table`, or `Form
|
|
| 188 |
|
| 189 |
def extract_image(self, document: Document, image_block: Block, expand: float = 0.01):
|
| 190 |
return image_block.get_image(document, highres=False, expansion=(expand, expand))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 2 |
from typing import Annotated
|
| 3 |
|
|
|
|
| 4 |
from surya.layout import LayoutPredictor
|
| 5 |
from surya.ocr_error import OCRErrorPredictor
|
| 6 |
from tqdm import tqdm
|
| 7 |
+
from pydantic import BaseModel
|
| 8 |
|
| 9 |
from marker.builders.layout import LayoutBuilder
|
| 10 |
from marker.processors.llm import GoogleModel
|
|
|
|
| 41 |
max_retries: Annotated[
|
| 42 |
int,
|
| 43 |
"The maximum number of retries to use for the Gemini model.",
|
| 44 |
+
] = 2
|
| 45 |
max_concurrency: Annotated[
|
| 46 |
int,
|
| 47 |
"The maximum number of concurrent requests to make to the Gemini model.",
|
|
|
|
| 158 |
|
| 159 |
def process_block_relabeling(self, document: Document, page: PageGroup, block: Block, prompt: str):
|
| 160 |
image = self.extract_image(document, block)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
+
response = self.model.generate_response(
|
| 163 |
+
prompt,
|
| 164 |
+
image,
|
| 165 |
+
block,
|
| 166 |
+
LayoutSchema,
|
| 167 |
+
max_retries=self.max_retries,
|
| 168 |
+
timeout=self.timeout
|
| 169 |
+
)
|
| 170 |
generated_label = None
|
| 171 |
if response and "label" in response:
|
| 172 |
generated_label = response["label"]
|
|
|
|
| 182 |
|
| 183 |
def extract_image(self, document: Document, image_block: Block, expand: float = 0.01):
|
| 184 |
return image_block.get_image(document, highres=False, expansion=(expand, expand))
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
class LayoutSchema(BaseModel):
|
| 188 |
+
image_description: str
|
| 189 |
+
label: str
|
marker/converters/pdf.py
CHANGED
|
@@ -76,7 +76,6 @@ class PdfConverter(BaseConverter):
|
|
| 76 |
LLMTableMergeProcessor,
|
| 77 |
LLMFormProcessor,
|
| 78 |
TextProcessor,
|
| 79 |
-
LLMTextProcessor,
|
| 80 |
LLMComplexRegionProcessor,
|
| 81 |
LLMImageDescriptionProcessor,
|
| 82 |
LLMEquationProcessor,
|
|
|
|
| 76 |
LLMTableMergeProcessor,
|
| 77 |
LLMFormProcessor,
|
| 78 |
TextProcessor,
|
|
|
|
| 79 |
LLMComplexRegionProcessor,
|
| 80 |
LLMImageDescriptionProcessor,
|
| 81 |
LLMEquationProcessor,
|
marker/processors/llm/llm_complex.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
import markdown2
|
|
|
|
| 2 |
|
| 3 |
from marker.processors.llm import BaseLLMProcessor
|
| 4 |
|
| 5 |
-
from google.ai.generativelanguage_v1beta.types import content
|
| 6 |
-
|
| 7 |
from marker.schema import BlockTypes
|
| 8 |
from marker.schema.blocks import Block
|
| 9 |
from marker.schema.document import Document
|
|
@@ -55,18 +54,8 @@ Output:
|
|
| 55 |
text = block.raw_text(document)
|
| 56 |
prompt = self.complex_region_prompt.replace("{extracted_text}", text)
|
| 57 |
image = self.extract_image(document, block)
|
| 58 |
-
response_schema = content.Schema(
|
| 59 |
-
type=content.Type.OBJECT,
|
| 60 |
-
enum=[],
|
| 61 |
-
required=["corrected_markdown"],
|
| 62 |
-
properties={
|
| 63 |
-
"corrected_markdown": content.Schema(
|
| 64 |
-
type=content.Type.STRING
|
| 65 |
-
)
|
| 66 |
-
},
|
| 67 |
-
)
|
| 68 |
|
| 69 |
-
response = self.model.generate_response(prompt, image, block,
|
| 70 |
|
| 71 |
if not response or "corrected_markdown" not in response:
|
| 72 |
block.update_metadata(llm_error_count=1)
|
|
@@ -85,4 +74,7 @@ Output:
|
|
| 85 |
|
| 86 |
# Convert LLM markdown to html
|
| 87 |
corrected_markdown = corrected_markdown.strip().lstrip("```markdown").rstrip("```").strip()
|
| 88 |
-
block.html = markdown2.markdown(corrected_markdown, extras=["tables"])
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import markdown2
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
|
| 4 |
from marker.processors.llm import BaseLLMProcessor
|
| 5 |
|
|
|
|
|
|
|
| 6 |
from marker.schema import BlockTypes
|
| 7 |
from marker.schema.blocks import Block
|
| 8 |
from marker.schema.document import Document
|
|
|
|
| 54 |
text = block.raw_text(document)
|
| 55 |
prompt = self.complex_region_prompt.replace("{extracted_text}", text)
|
| 56 |
image = self.extract_image(document, block)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
response = self.model.generate_response(prompt, image, block, ComplexSchema)
|
| 59 |
|
| 60 |
if not response or "corrected_markdown" not in response:
|
| 61 |
block.update_metadata(llm_error_count=1)
|
|
|
|
| 74 |
|
| 75 |
# Convert LLM markdown to html
|
| 76 |
corrected_markdown = corrected_markdown.strip().lstrip("```markdown").rstrip("```").strip()
|
| 77 |
+
block.html = markdown2.markdown(corrected_markdown, extras=["tables"])
|
| 78 |
+
|
| 79 |
+
class ComplexSchema(BaseModel):
|
| 80 |
+
corrected_markdown: str
|
marker/processors/llm/llm_equation.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
from
|
| 2 |
|
| 3 |
-
from
|
| 4 |
|
| 5 |
from marker.schema import BlockTypes
|
| 6 |
from marker.schema.blocks import Equation
|
|
@@ -67,18 +67,8 @@ Output:
|
|
| 67 |
prompt = self.equation_latex_prompt.replace("{equation}", text)
|
| 68 |
|
| 69 |
image = self.extract_image(document, block)
|
| 70 |
-
response_schema = content.Schema(
|
| 71 |
-
type=content.Type.OBJECT,
|
| 72 |
-
enum=[],
|
| 73 |
-
required=["html_equation"],
|
| 74 |
-
properties={
|
| 75 |
-
"html_equation": content.Schema(
|
| 76 |
-
type=content.Type.STRING
|
| 77 |
-
)
|
| 78 |
-
},
|
| 79 |
-
)
|
| 80 |
|
| 81 |
-
response = self.model.generate_response(prompt, image, block,
|
| 82 |
|
| 83 |
if not response or "html_equation" not in response:
|
| 84 |
block.update_metadata(llm_error_count=1)
|
|
@@ -89,3 +79,6 @@ Output:
|
|
| 89 |
block.update_metadata(llm_error_count=1)
|
| 90 |
return
|
| 91 |
block.html = html_equation
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
|
| 3 |
+
from marker.processors.llm import BaseLLMProcessor
|
| 4 |
|
| 5 |
from marker.schema import BlockTypes
|
| 6 |
from marker.schema.blocks import Equation
|
|
|
|
| 67 |
prompt = self.equation_latex_prompt.replace("{equation}", text)
|
| 68 |
|
| 69 |
image = self.extract_image(document, block)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
+
response = self.model.generate_response(prompt, image, block, EquationSchema)
|
| 72 |
|
| 73 |
if not response or "html_equation" not in response:
|
| 74 |
block.update_metadata(llm_error_count=1)
|
|
|
|
| 79 |
block.update_metadata(llm_error_count=1)
|
| 80 |
return
|
| 81 |
block.html = html_equation
|
| 82 |
+
|
| 83 |
+
class EquationSchema(BaseModel):
|
| 84 |
+
html_equation: str
|
marker/processors/llm/llm_form.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
from
|
| 2 |
|
| 3 |
-
from
|
| 4 |
|
| 5 |
from marker.schema import BlockTypes
|
| 6 |
from marker.schema.blocks import Block
|
|
@@ -73,18 +73,8 @@ Output:
|
|
| 73 |
prompt = self.form_rewriting_prompt.replace("{block_html}", block_html)
|
| 74 |
|
| 75 |
image = self.extract_image(document, block)
|
| 76 |
-
response_schema = content.Schema(
|
| 77 |
-
type=content.Type.OBJECT,
|
| 78 |
-
enum=[],
|
| 79 |
-
required=["corrected_html"],
|
| 80 |
-
properties={
|
| 81 |
-
"corrected_html": content.Schema(
|
| 82 |
-
type=content.Type.STRING
|
| 83 |
-
)
|
| 84 |
-
},
|
| 85 |
-
)
|
| 86 |
|
| 87 |
-
response = self.model.generate_response(prompt, image, block,
|
| 88 |
|
| 89 |
if not response or "corrected_html" not in response:
|
| 90 |
block.update_metadata(llm_error_count=1)
|
|
@@ -102,4 +92,7 @@ Output:
|
|
| 102 |
return
|
| 103 |
|
| 104 |
corrected_html = corrected_html.strip().lstrip("```html").rstrip("```").strip()
|
| 105 |
-
block.html = corrected_html
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
|
| 3 |
+
from marker.processors.llm import BaseLLMProcessor
|
| 4 |
|
| 5 |
from marker.schema import BlockTypes
|
| 6 |
from marker.schema.blocks import Block
|
|
|
|
| 73 |
prompt = self.form_rewriting_prompt.replace("{block_html}", block_html)
|
| 74 |
|
| 75 |
image = self.extract_image(document, block)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
+
response = self.model.generate_response(prompt, image, block, FormSchema)
|
| 78 |
|
| 79 |
if not response or "corrected_html" not in response:
|
| 80 |
block.update_metadata(llm_error_count=1)
|
|
|
|
| 92 |
return
|
| 93 |
|
| 94 |
corrected_html = corrected_html.strip().lstrip("```html").rstrip("```").strip()
|
| 95 |
+
block.html = corrected_html
|
| 96 |
+
|
| 97 |
+
class FormSchema(BaseModel):
|
| 98 |
+
corrected_html: str
|
marker/processors/llm/llm_handwriting.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
import markdown2
|
|
|
|
| 2 |
|
| 3 |
from marker.processors.llm import BaseLLMProcessor
|
| 4 |
|
| 5 |
-
from google.ai.generativelanguage_v1beta.types import content
|
| 6 |
-
|
| 7 |
from marker.schema import BlockTypes
|
| 8 |
from marker.schema.blocks import Handwriting, Text
|
| 9 |
from marker.schema.document import Document
|
|
@@ -49,18 +48,8 @@ Formatting should be in markdown, with the following rules:
|
|
| 49 |
prompt = self.handwriting_generation_prompt
|
| 50 |
|
| 51 |
image = self.extract_image(document, block)
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
enum=[],
|
| 55 |
-
required=["markdown"],
|
| 56 |
-
properties={
|
| 57 |
-
"markdown": content.Schema(
|
| 58 |
-
type=content.Type.STRING
|
| 59 |
-
)
|
| 60 |
-
},
|
| 61 |
-
)
|
| 62 |
-
|
| 63 |
-
response = self.model.generate_response(prompt, image, block, response_schema)
|
| 64 |
|
| 65 |
if not response or "markdown" not in response:
|
| 66 |
block.update_metadata(llm_error_count=1)
|
|
@@ -73,3 +62,6 @@ Formatting should be in markdown, with the following rules:
|
|
| 73 |
|
| 74 |
markdown = markdown.strip().lstrip("```markdown").rstrip("```").strip()
|
| 75 |
block.html = markdown2.markdown(markdown, extras=["tables"])
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import markdown2
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
|
| 4 |
from marker.processors.llm import BaseLLMProcessor
|
| 5 |
|
|
|
|
|
|
|
| 6 |
from marker.schema import BlockTypes
|
| 7 |
from marker.schema.blocks import Handwriting, Text
|
| 8 |
from marker.schema.document import Document
|
|
|
|
| 48 |
prompt = self.handwriting_generation_prompt
|
| 49 |
|
| 50 |
image = self.extract_image(document, block)
|
| 51 |
+
|
| 52 |
+
response = self.model.generate_response(prompt, image, block, HandwritingSchema)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
if not response or "markdown" not in response:
|
| 55 |
block.update_metadata(llm_error_count=1)
|
|
|
|
| 62 |
|
| 63 |
markdown = markdown.strip().lstrip("```markdown").rstrip("```").strip()
|
| 64 |
block.html = markdown2.markdown(markdown, extras=["tables"])
|
| 65 |
+
|
| 66 |
+
class HandwritingSchema(BaseModel):
|
| 67 |
+
markdown: str
|
marker/processors/llm/llm_image_description.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
from
|
| 2 |
|
| 3 |
-
from
|
| 4 |
|
| 5 |
from marker.schema import BlockTypes
|
| 6 |
from marker.schema.blocks import Block
|
|
@@ -49,18 +49,8 @@ In this figure, a bar chart titled "Fruit Preference Survey" is showing the numb
|
|
| 49 |
|
| 50 |
prompt = self.image_description_prompt.replace("{raw_text}", block.raw_text(document))
|
| 51 |
image = self.extract_image(document, block)
|
| 52 |
-
response_schema = content.Schema(
|
| 53 |
-
type=content.Type.OBJECT,
|
| 54 |
-
enum=[],
|
| 55 |
-
required=["image_description"],
|
| 56 |
-
properties={
|
| 57 |
-
"image_description": content.Schema(
|
| 58 |
-
type=content.Type.STRING
|
| 59 |
-
)
|
| 60 |
-
},
|
| 61 |
-
)
|
| 62 |
|
| 63 |
-
response = self.model.generate_response(prompt, image, block,
|
| 64 |
|
| 65 |
if not response or "image_description" not in response:
|
| 66 |
block.update_metadata(llm_error_count=1)
|
|
@@ -72,3 +62,6 @@ In this figure, a bar chart titled "Fruit Preference Survey" is showing the numb
|
|
| 72 |
return
|
| 73 |
|
| 74 |
block.description = image_description
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
|
| 3 |
+
from marker.processors.llm import BaseLLMProcessor
|
| 4 |
|
| 5 |
from marker.schema import BlockTypes
|
| 6 |
from marker.schema.blocks import Block
|
|
|
|
| 49 |
|
| 50 |
prompt = self.image_description_prompt.replace("{raw_text}", block.raw_text(document))
|
| 51 |
image = self.extract_image(document, block)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
+
response = self.model.generate_response(prompt, image, block, ImageSchema)
|
| 54 |
|
| 55 |
if not response or "image_description" not in response:
|
| 56 |
block.update_metadata(llm_error_count=1)
|
|
|
|
| 62 |
return
|
| 63 |
|
| 64 |
block.description = image_description
|
| 65 |
+
|
| 66 |
+
class ImageSchema(BaseModel):
|
| 67 |
+
image_description: str
|
marker/processors/llm/llm_table.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
from typing import Annotated, List, Tuple
|
| 2 |
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
-
from google.ai.generativelanguage_v1beta.types import content
|
| 5 |
from PIL import Image
|
|
|
|
| 6 |
|
| 7 |
from marker.processors.llm import BaseLLMProcessor
|
| 8 |
from marker.schema import BlockTypes
|
|
@@ -133,18 +133,7 @@ No corrections needed.
|
|
| 133 |
def rewrite_single_chunk(self, page: PageGroup, block: Block, block_html: str, children: List[TableCell], image: Image.Image):
|
| 134 |
prompt = self.table_rewriting_prompt.replace("{block_html}", block_html)
|
| 135 |
|
| 136 |
-
|
| 137 |
-
type=content.Type.OBJECT,
|
| 138 |
-
enum=[],
|
| 139 |
-
required=["corrected_html"],
|
| 140 |
-
properties={
|
| 141 |
-
"corrected_html": content.Schema(
|
| 142 |
-
type=content.Type.STRING
|
| 143 |
-
)
|
| 144 |
-
},
|
| 145 |
-
)
|
| 146 |
-
|
| 147 |
-
response = self.model.generate_response(prompt, image, block, response_schema)
|
| 148 |
|
| 149 |
if not response or "corrected_html" not in response:
|
| 150 |
block.update_metadata(llm_error_count=1)
|
|
@@ -246,3 +235,6 @@ No corrections needed.
|
|
| 246 |
cur_col += colspan
|
| 247 |
|
| 248 |
return cells
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from typing import Annotated, List, Tuple
|
| 2 |
|
| 3 |
from bs4 import BeautifulSoup
|
|
|
|
| 4 |
from PIL import Image
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
|
| 7 |
from marker.processors.llm import BaseLLMProcessor
|
| 8 |
from marker.schema import BlockTypes
|
|
|
|
| 133 |
def rewrite_single_chunk(self, page: PageGroup, block: Block, block_html: str, children: List[TableCell], image: Image.Image):
|
| 134 |
prompt = self.table_rewriting_prompt.replace("{block_html}", block_html)
|
| 135 |
|
| 136 |
+
response = self.model.generate_response(prompt, image, block, TableSchema)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
if not response or "corrected_html" not in response:
|
| 139 |
block.update_metadata(llm_error_count=1)
|
|
|
|
| 235 |
cur_col += colspan
|
| 236 |
|
| 237 |
return cells
|
| 238 |
+
|
| 239 |
+
class TableSchema(BaseModel):
|
| 240 |
+
correct_html: str
|
marker/processors/llm/llm_table_merge.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 2 |
from typing import Annotated, List, Tuple, Literal
|
| 3 |
|
| 4 |
-
from
|
| 5 |
from tqdm import tqdm
|
| 6 |
from PIL import Image
|
| 7 |
|
|
@@ -234,36 +234,11 @@ Table 2
|
|
| 234 |
|
| 235 |
prompt = self.table_merge_prompt.replace("{{table1}}", start_html).replace("{{table2}}", curr_html)
|
| 236 |
|
| 237 |
-
response_schema = content.Schema(
|
| 238 |
-
type=content.Type.OBJECT,
|
| 239 |
-
enum=[],
|
| 240 |
-
required=["table1_description", "table2_description", "explanation", "merge", "direction"],
|
| 241 |
-
properties={
|
| 242 |
-
"table1_description": content.Schema(
|
| 243 |
-
type=content.Type.STRING
|
| 244 |
-
),
|
| 245 |
-
"table2_description": content.Schema(
|
| 246 |
-
type=content.Type.STRING
|
| 247 |
-
),
|
| 248 |
-
"explanation": content.Schema(
|
| 249 |
-
type=content.Type.STRING
|
| 250 |
-
),
|
| 251 |
-
"merge": content.Schema(
|
| 252 |
-
type=content.Type.STRING,
|
| 253 |
-
enum=["true", "false"]
|
| 254 |
-
),
|
| 255 |
-
"direction": content.Schema(
|
| 256 |
-
type=content.Type.STRING,
|
| 257 |
-
enum=["bottom", "right"]
|
| 258 |
-
),
|
| 259 |
-
},
|
| 260 |
-
)
|
| 261 |
-
|
| 262 |
response = self.model.generate_response(
|
| 263 |
prompt,
|
| 264 |
[start_image, curr_image],
|
| 265 |
curr_block,
|
| 266 |
-
|
| 267 |
)
|
| 268 |
|
| 269 |
if not response or ("direction" not in response or "merge" not in response):
|
|
@@ -335,4 +310,12 @@ Table 2
|
|
| 335 |
new_img = Image.new('RGB', (new_width, new_height), 'white')
|
| 336 |
new_img.paste(image1, (0, 0))
|
| 337 |
new_img.paste(image2, (0, h1))
|
| 338 |
-
return new_img
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 2 |
from typing import Annotated, List, Tuple, Literal
|
| 3 |
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
from tqdm import tqdm
|
| 6 |
from PIL import Image
|
| 7 |
|
|
|
|
| 234 |
|
| 235 |
prompt = self.table_merge_prompt.replace("{{table1}}", start_html).replace("{{table2}}", curr_html)
|
| 236 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
response = self.model.generate_response(
|
| 238 |
prompt,
|
| 239 |
[start_image, curr_image],
|
| 240 |
curr_block,
|
| 241 |
+
MergeSchema,
|
| 242 |
)
|
| 243 |
|
| 244 |
if not response or ("direction" not in response or "merge" not in response):
|
|
|
|
| 310 |
new_img = Image.new('RGB', (new_width, new_height), 'white')
|
| 311 |
new_img.paste(image1, (0, 0))
|
| 312 |
new_img.paste(image2, (0, h1))
|
| 313 |
+
return new_img
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
class MergeSchema(BaseModel):
|
| 317 |
+
table1_description: str
|
| 318 |
+
table2_description: str
|
| 319 |
+
explanation: str
|
| 320 |
+
merge: Literal["true", "false"]
|
| 321 |
+
direction: Literal["bottom", "right"]
|
marker/processors/llm/llm_text.py
DELETED
|
@@ -1,153 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
import textwrap
|
| 3 |
-
|
| 4 |
-
from marker.processors.llm import BaseLLMProcessor
|
| 5 |
-
from bs4 import BeautifulSoup
|
| 6 |
-
from google.ai.generativelanguage_v1beta.types import content
|
| 7 |
-
from marker.schema import BlockTypes
|
| 8 |
-
from marker.schema.blocks import Block
|
| 9 |
-
from marker.schema.document import Document
|
| 10 |
-
from marker.schema.groups.page import PageGroup
|
| 11 |
-
from marker.schema.registry import get_block_class
|
| 12 |
-
from marker.schema.text.span import Span
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
class LLMTextProcessor(BaseLLMProcessor):
|
| 16 |
-
block_types = (BlockTypes.TextInlineMath,)
|
| 17 |
-
text_math_rewriting_prompt = """You are a text correction expert specializing in accurately reproducing text from images.
|
| 18 |
-
You will receive an image of a text block and a set of extracted lines corresponding to the text in the image.
|
| 19 |
-
Your task is to correct any errors in the extracted lines, including math, formatting, and other inaccuracies, and output the corrected lines in a JSON format.
|
| 20 |
-
The number of output lines MUST match the number of input lines. Stay as faithful to the original text as possible.
|
| 21 |
-
|
| 22 |
-
**Instructions:**
|
| 23 |
-
|
| 24 |
-
1. Carefully examine the provided text block image .
|
| 25 |
-
2. Analyze the extracted lines.
|
| 26 |
-
3. For each extracted line, compare it to the corresponding line in the image.
|
| 27 |
-
4. Correct any errors in the extracted line, including:
|
| 28 |
-
* Inline math: Ensure all mathematical expressions are correctly formatted and rendered.
|
| 29 |
-
* Formatting: Maintain consistent formatting with the text block image, including spacing, indentation, and special characters.
|
| 30 |
-
* Other inaccuracies: If the image is handwritten then you may correct any spelling errors, or other discrepancies.
|
| 31 |
-
5. Do not remove any formatting i.e bold, italics, etc from the extracted lines unless it is necessary to correct the error.
|
| 32 |
-
6. Ensure that inline math is properly with inline math tags.
|
| 33 |
-
7. The number of corrected lines in the output MUST equal the number of extracted lines provided in the input. Do not add or remove lines.
|
| 34 |
-
8. Output the corrected lines in JSON format with a "lines" field, as shown in the example below.
|
| 35 |
-
|
| 36 |
-
**Example:**
|
| 37 |
-
|
| 38 |
-
Input:
|
| 39 |
-
```
|
| 40 |
-
{
|
| 41 |
-
"extracted_lines": [
|
| 42 |
-
"Adversarial training (AT) [23], which aims to minimize\n",
|
| 43 |
-
"the model's risk under the worst-case perturbations, is cur-\n",
|
| 44 |
-
"rently the most effective approach for improving the robust-\n",
|
| 45 |
-
"ness of deep neural networks. For a given neural network\n",
|
| 46 |
-
"f(x, w) with parameters w, the optimization objective of\n",
|
| 47 |
-
"AT can be formulated as follows:\n"
|
| 48 |
-
]
|
| 49 |
-
}
|
| 50 |
-
```
|
| 51 |
-
|
| 52 |
-
Output:
|
| 53 |
-
|
| 54 |
-
```json
|
| 55 |
-
{
|
| 56 |
-
"corrected_lines": [
|
| 57 |
-
"Adversarial training (AT) [23], which aims to minimize\n",
|
| 58 |
-
"the model's risk under the worst-case perturbations, is cur-\n",
|
| 59 |
-
"rently the most effective approach for improving the robust-\n",
|
| 60 |
-
"ness of deep neural networks. For a given neural network\n",
|
| 61 |
-
"<math>f(x, w)</math> with parameters <math>w</math>, the optimization objective of\n",
|
| 62 |
-
"AT can be formulated as follows:\n"
|
| 63 |
-
]
|
| 64 |
-
}
|
| 65 |
-
```
|
| 66 |
-
|
| 67 |
-
**Input:**
|
| 68 |
-
```json
|
| 69 |
-
{extracted_lines}
|
| 70 |
-
```
|
| 71 |
-
"""
|
| 72 |
-
|
| 73 |
-
def process_rewriting(self, document: Document, page: PageGroup, block: Block):
|
| 74 |
-
SpanClass: Span = get_block_class(BlockTypes.Span)
|
| 75 |
-
|
| 76 |
-
text_lines = block.contained_blocks(document, (BlockTypes.Line,))
|
| 77 |
-
extracted_lines = [line.formatted_text(document) for line in text_lines]
|
| 78 |
-
|
| 79 |
-
prompt = self.text_math_rewriting_prompt.replace("{extracted_lines}", json.dumps({"extracted_lines": extracted_lines}, indent=2))
|
| 80 |
-
image = self.extract_image(document, block)
|
| 81 |
-
response_schema = content.Schema(
|
| 82 |
-
type=content.Type.OBJECT,
|
| 83 |
-
enum=[],
|
| 84 |
-
required=["corrected_lines"],
|
| 85 |
-
properties={
|
| 86 |
-
"corrected_lines": content.Schema(
|
| 87 |
-
type=content.Type.ARRAY,
|
| 88 |
-
items=content.Schema(
|
| 89 |
-
type=content.Type.STRING,
|
| 90 |
-
),
|
| 91 |
-
)
|
| 92 |
-
},
|
| 93 |
-
)
|
| 94 |
-
|
| 95 |
-
response = self.model.generate_response(prompt, image, block, response_schema)
|
| 96 |
-
if not response or "corrected_lines" not in response:
|
| 97 |
-
block.update_metadata(llm_error_count=1)
|
| 98 |
-
return
|
| 99 |
-
|
| 100 |
-
corrected_lines = response["corrected_lines"]
|
| 101 |
-
if not corrected_lines or len(corrected_lines) != len(extracted_lines):
|
| 102 |
-
block.update_metadata(llm_error_count=1)
|
| 103 |
-
return
|
| 104 |
-
|
| 105 |
-
for text_line, corrected_text in zip(text_lines, corrected_lines):
|
| 106 |
-
text_line.structure = []
|
| 107 |
-
corrected_spans = self.text_to_spans(corrected_text)
|
| 108 |
-
|
| 109 |
-
for span_idx, span in enumerate(corrected_spans):
|
| 110 |
-
if span_idx == len(corrected_spans) - 1:
|
| 111 |
-
span['content'] += "\n"
|
| 112 |
-
|
| 113 |
-
span_block = page.add_full_block(
|
| 114 |
-
SpanClass(
|
| 115 |
-
polygon=text_line.polygon,
|
| 116 |
-
text=span['content'],
|
| 117 |
-
font='Unknown',
|
| 118 |
-
font_weight=0,
|
| 119 |
-
font_size=0,
|
| 120 |
-
minimum_position=0,
|
| 121 |
-
maximum_position=0,
|
| 122 |
-
formats=[span['type']],
|
| 123 |
-
page_id=text_line.page_id,
|
| 124 |
-
text_extraction_method="gemini",
|
| 125 |
-
)
|
| 126 |
-
)
|
| 127 |
-
text_line.structure.append(span_block.id)
|
| 128 |
-
|
| 129 |
-
def text_to_spans(self, text):
|
| 130 |
-
soup = BeautifulSoup(text, 'html.parser')
|
| 131 |
-
|
| 132 |
-
tag_types = {
|
| 133 |
-
'b': 'bold',
|
| 134 |
-
'i': 'italic',
|
| 135 |
-
'math': 'math'
|
| 136 |
-
}
|
| 137 |
-
spans = []
|
| 138 |
-
|
| 139 |
-
for element in soup.descendants:
|
| 140 |
-
if not len(list(element.parents)) == 1:
|
| 141 |
-
continue
|
| 142 |
-
if element.name in tag_types:
|
| 143 |
-
spans.append({
|
| 144 |
-
'type': tag_types[element.name],
|
| 145 |
-
'content': element.get_text()
|
| 146 |
-
})
|
| 147 |
-
elif element.string:
|
| 148 |
-
spans.append({
|
| 149 |
-
'type': 'plain',
|
| 150 |
-
'content': element.string
|
| 151 |
-
})
|
| 152 |
-
|
| 153 |
-
return spans
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
marker/processors/llm/utils.py
CHANGED
|
@@ -1,13 +1,16 @@
|
|
| 1 |
import json
|
| 2 |
import time
|
|
|
|
| 3 |
from typing import List
|
| 4 |
|
| 5 |
import PIL
|
| 6 |
-
|
| 7 |
-
from google.
|
| 8 |
-
from google.
|
|
|
|
| 9 |
|
| 10 |
from marker.schema.blocks import Block
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
class GoogleModel:
|
|
@@ -17,45 +20,59 @@ class GoogleModel:
|
|
| 17 |
|
| 18 |
self.api_key = api_key
|
| 19 |
self.model_name = model_name
|
| 20 |
-
self.model = self.configure_google_model()
|
| 21 |
|
| 22 |
-
def
|
| 23 |
-
genai.
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def generate_response(
|
| 27 |
self,
|
| 28 |
prompt: str,
|
| 29 |
image: PIL.Image.Image | List[PIL.Image.Image],
|
| 30 |
block: Block,
|
| 31 |
-
response_schema:
|
| 32 |
-
max_retries: int =
|
| 33 |
timeout: int = 60
|
| 34 |
):
|
| 35 |
if not isinstance(image, list):
|
| 36 |
image = [image]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
tries = 0
|
| 38 |
while tries < max_retries:
|
| 39 |
try:
|
| 40 |
-
responses =
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
"temperature": 0,
|
| 45 |
"response_schema": response_schema,
|
| 46 |
"response_mime_type": "application/json",
|
| 47 |
-
}
|
| 48 |
-
request_options={'timeout': timeout}
|
| 49 |
)
|
| 50 |
output = responses.candidates[0].content.parts[0].text
|
| 51 |
total_tokens = responses.usage_metadata.total_token_count
|
| 52 |
block.update_metadata(llm_tokens_used=total_tokens, llm_request_count=1)
|
| 53 |
return json.loads(output)
|
| 54 |
-
except
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
except Exception as e:
|
| 60 |
print(e)
|
| 61 |
break
|
|
|
|
| 1 |
import json
|
| 2 |
import time
|
| 3 |
+
from io import BytesIO
|
| 4 |
from typing import List
|
| 5 |
|
| 6 |
import PIL
|
| 7 |
+
from google import genai
|
| 8 |
+
from google.genai import types
|
| 9 |
+
from google.genai.errors import APIError
|
| 10 |
+
from pydantic import BaseModel
|
| 11 |
|
| 12 |
from marker.schema.blocks import Block
|
| 13 |
+
from marker.settings import settings
|
| 14 |
|
| 15 |
|
| 16 |
class GoogleModel:
|
|
|
|
| 20 |
|
| 21 |
self.api_key = api_key
|
| 22 |
self.model_name = model_name
|
|
|
|
| 23 |
|
| 24 |
+
def get_google_client(self, timeout: int = 60):
|
| 25 |
+
return genai.Client(
|
| 26 |
+
api_key=settings.GOOGLE_API_KEY,
|
| 27 |
+
http_options={"timeout": timeout * 1000} # Convert to milliseconds
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
def img_to_bytes(self, img: PIL.Image.Image):
|
| 31 |
+
image_bytes = BytesIO()
|
| 32 |
+
img.save(image_bytes, format="PNG")
|
| 33 |
+
return image_bytes.getvalue()
|
| 34 |
|
| 35 |
def generate_response(
|
| 36 |
self,
|
| 37 |
prompt: str,
|
| 38 |
image: PIL.Image.Image | List[PIL.Image.Image],
|
| 39 |
block: Block,
|
| 40 |
+
response_schema: type[BaseModel],
|
| 41 |
+
max_retries: int = 2,
|
| 42 |
timeout: int = 60
|
| 43 |
):
|
| 44 |
if not isinstance(image, list):
|
| 45 |
image = [image]
|
| 46 |
+
|
| 47 |
+
client = self.get_google_client(timeout=timeout)
|
| 48 |
+
image_parts = [types.Part.from_bytes(data=self.img_to_bytes(img), mime_type="image/png") for img in image]
|
| 49 |
+
|
| 50 |
tries = 0
|
| 51 |
while tries < max_retries:
|
| 52 |
try:
|
| 53 |
+
responses = client.models.generate_content(
|
| 54 |
+
model="gemini-2.0-flash",
|
| 55 |
+
contents=image_parts + [prompt], # According to gemini docs, it performs better if the image is the first element
|
| 56 |
+
config={
|
| 57 |
"temperature": 0,
|
| 58 |
"response_schema": response_schema,
|
| 59 |
"response_mime_type": "application/json",
|
| 60 |
+
}
|
|
|
|
| 61 |
)
|
| 62 |
output = responses.candidates[0].content.parts[0].text
|
| 63 |
total_tokens = responses.usage_metadata.total_token_count
|
| 64 |
block.update_metadata(llm_tokens_used=total_tokens, llm_request_count=1)
|
| 65 |
return json.loads(output)
|
| 66 |
+
except APIError as e:
|
| 67 |
+
if e.code == 429:
|
| 68 |
+
# Rate limit exceeded
|
| 69 |
+
tries += 1
|
| 70 |
+
wait_time = tries * 3
|
| 71 |
+
print(f"APIError: {e}. Retrying in {wait_time} seconds... (Attempt {tries}/{max_retries})")
|
| 72 |
+
time.sleep(wait_time)
|
| 73 |
+
else:
|
| 74 |
+
print(e)
|
| 75 |
+
break
|
| 76 |
except Exception as e:
|
| 77 |
print(e)
|
| 78 |
break
|
poetry.lock
CHANGED
|
@@ -13,87 +13,92 @@ files = [
|
|
| 13 |
|
| 14 |
[[package]]
|
| 15 |
name = "aiohttp"
|
| 16 |
-
version = "3.11.
|
| 17 |
description = "Async http client/server framework (asyncio)"
|
| 18 |
optional = false
|
| 19 |
python-versions = ">=3.9"
|
| 20 |
files = [
|
| 21 |
-
{file = "aiohttp-3.11.
|
| 22 |
-
{file = "aiohttp-3.11.
|
| 23 |
-
{file = "aiohttp-3.11.
|
| 24 |
-
{file = "aiohttp-3.11.
|
| 25 |
-
{file = "aiohttp-3.11.
|
| 26 |
-
{file = "aiohttp-3.11.
|
| 27 |
-
{file = "aiohttp-3.11.
|
| 28 |
-
{file = "aiohttp-3.11.
|
| 29 |
-
{file = "aiohttp-3.11.
|
| 30 |
-
{file = "aiohttp-3.11.
|
| 31 |
-
{file = "aiohttp-3.11.
|
| 32 |
-
{file = "aiohttp-3.11.
|
| 33 |
-
{file = "aiohttp-3.11.
|
| 34 |
-
{file = "aiohttp-3.11.
|
| 35 |
-
{file = "aiohttp-3.11.
|
| 36 |
-
{file = "aiohttp-3.11.
|
| 37 |
-
{file = "aiohttp-3.11.
|
| 38 |
-
{file = "aiohttp-3.11.
|
| 39 |
-
{file = "aiohttp-3.11.
|
| 40 |
-
{file = "aiohttp-3.11.
|
| 41 |
-
{file = "aiohttp-3.11.
|
| 42 |
-
{file = "aiohttp-3.11.
|
| 43 |
-
{file = "aiohttp-3.11.
|
| 44 |
-
{file = "aiohttp-3.11.
|
| 45 |
-
{file = "aiohttp-3.11.
|
| 46 |
-
{file = "aiohttp-3.11.
|
| 47 |
-
{file = "aiohttp-3.11.
|
| 48 |
-
{file = "aiohttp-3.11.
|
| 49 |
-
{file = "aiohttp-3.11.
|
| 50 |
-
{file = "aiohttp-3.11.
|
| 51 |
-
{file = "aiohttp-3.11.
|
| 52 |
-
{file = "aiohttp-3.11.
|
| 53 |
-
{file = "aiohttp-3.11.
|
| 54 |
-
{file = "aiohttp-3.11.
|
| 55 |
-
{file = "aiohttp-3.11.
|
| 56 |
-
{file = "aiohttp-3.11.
|
| 57 |
-
{file = "aiohttp-3.11.
|
| 58 |
-
{file = "aiohttp-3.11.
|
| 59 |
-
{file = "aiohttp-3.11.
|
| 60 |
-
{file = "aiohttp-3.11.
|
| 61 |
-
{file = "aiohttp-3.11.
|
| 62 |
-
{file = "aiohttp-3.11.
|
| 63 |
-
{file = "aiohttp-3.11.
|
| 64 |
-
{file = "aiohttp-3.11.
|
| 65 |
-
{file = "aiohttp-3.11.
|
| 66 |
-
{file = "aiohttp-3.11.
|
| 67 |
-
{file = "aiohttp-3.11.
|
| 68 |
-
{file = "aiohttp-3.11.
|
| 69 |
-
{file = "aiohttp-3.11.
|
| 70 |
-
{file = "aiohttp-3.11.
|
| 71 |
-
{file = "aiohttp-3.11.
|
| 72 |
-
{file = "aiohttp-3.11.
|
| 73 |
-
{file = "aiohttp-3.11.
|
| 74 |
-
{file = "aiohttp-3.11.
|
| 75 |
-
{file = "aiohttp-3.11.
|
| 76 |
-
{file = "aiohttp-3.11.
|
| 77 |
-
{file = "aiohttp-3.11.
|
| 78 |
-
{file = "aiohttp-3.11.
|
| 79 |
-
{file = "aiohttp-3.11.
|
| 80 |
-
{file = "aiohttp-3.11.
|
| 81 |
-
{file = "aiohttp-3.11.
|
| 82 |
-
{file = "aiohttp-3.11.
|
| 83 |
-
{file = "aiohttp-3.11.
|
| 84 |
-
{file = "aiohttp-3.11.
|
| 85 |
-
{file = "aiohttp-3.11.
|
| 86 |
-
{file = "aiohttp-3.11.
|
| 87 |
-
{file = "aiohttp-3.11.
|
| 88 |
-
{file = "aiohttp-3.11.
|
| 89 |
-
{file = "aiohttp-3.11.
|
| 90 |
-
{file = "aiohttp-3.11.
|
| 91 |
-
{file = "aiohttp-3.11.
|
| 92 |
-
{file = "aiohttp-3.11.
|
| 93 |
-
{file = "aiohttp-3.11.
|
| 94 |
-
{file = "aiohttp-3.11.
|
| 95 |
-
{file = "aiohttp-3.11.
|
| 96 |
-
{file = "aiohttp-3.11.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
]
|
| 98 |
|
| 99 |
[package.dependencies]
|
|
@@ -339,31 +344,32 @@ tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
|
|
| 339 |
|
| 340 |
[[package]]
|
| 341 |
name = "babel"
|
| 342 |
-
version = "2.
|
| 343 |
description = "Internationalization utilities"
|
| 344 |
optional = false
|
| 345 |
python-versions = ">=3.8"
|
| 346 |
files = [
|
| 347 |
-
{file = "babel-2.
|
| 348 |
-
{file = "babel-2.
|
| 349 |
]
|
| 350 |
|
| 351 |
[package.extras]
|
| 352 |
-
dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
|
| 353 |
|
| 354 |
[[package]]
|
| 355 |
name = "beautifulsoup4"
|
| 356 |
-
version = "4.
|
| 357 |
description = "Screen-scraping library"
|
| 358 |
optional = false
|
| 359 |
-
python-versions = ">=3.
|
| 360 |
files = [
|
| 361 |
-
{file = "beautifulsoup4-4.
|
| 362 |
-
{file = "beautifulsoup4-4.
|
| 363 |
]
|
| 364 |
|
| 365 |
[package.dependencies]
|
| 366 |
soupsieve = ">1.2"
|
|
|
|
| 367 |
|
| 368 |
[package.extras]
|
| 369 |
cchardet = ["cchardet"]
|
|
@@ -1060,79 +1066,6 @@ gitdb = ">=4.0.1,<5"
|
|
| 1060 |
doc = ["sphinx (>=7.1.2,<7.2)", "sphinx-autodoc-typehints", "sphinx_rtd_theme"]
|
| 1061 |
test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"]
|
| 1062 |
|
| 1063 |
-
[[package]]
|
| 1064 |
-
name = "google-ai-generativelanguage"
|
| 1065 |
-
version = "0.6.15"
|
| 1066 |
-
description = "Google Ai Generativelanguage API client library"
|
| 1067 |
-
optional = false
|
| 1068 |
-
python-versions = ">=3.7"
|
| 1069 |
-
files = [
|
| 1070 |
-
{file = "google_ai_generativelanguage-0.6.15-py3-none-any.whl", hash = "sha256:5a03ef86377aa184ffef3662ca28f19eeee158733e45d7947982eb953c6ebb6c"},
|
| 1071 |
-
{file = "google_ai_generativelanguage-0.6.15.tar.gz", hash = "sha256:8f6d9dc4c12b065fe2d0289026171acea5183ebf2d0b11cefe12f3821e159ec3"},
|
| 1072 |
-
]
|
| 1073 |
-
|
| 1074 |
-
[package.dependencies]
|
| 1075 |
-
google-api-core = {version = ">=1.34.1,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]}
|
| 1076 |
-
google-auth = ">=2.14.1,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0dev"
|
| 1077 |
-
proto-plus = [
|
| 1078 |
-
{version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""},
|
| 1079 |
-
{version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
|
| 1080 |
-
]
|
| 1081 |
-
protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0dev"
|
| 1082 |
-
|
| 1083 |
-
[[package]]
|
| 1084 |
-
name = "google-api-core"
|
| 1085 |
-
version = "2.24.1"
|
| 1086 |
-
description = "Google API client core library"
|
| 1087 |
-
optional = false
|
| 1088 |
-
python-versions = ">=3.7"
|
| 1089 |
-
files = [
|
| 1090 |
-
{file = "google_api_core-2.24.1-py3-none-any.whl", hash = "sha256:bc78d608f5a5bf853b80bd70a795f703294de656c096c0968320830a4bc280f1"},
|
| 1091 |
-
{file = "google_api_core-2.24.1.tar.gz", hash = "sha256:f8b36f5456ab0dd99a1b693a40a31d1e7757beea380ad1b38faaf8941eae9d8a"},
|
| 1092 |
-
]
|
| 1093 |
-
|
| 1094 |
-
[package.dependencies]
|
| 1095 |
-
google-auth = ">=2.14.1,<3.0.dev0"
|
| 1096 |
-
googleapis-common-protos = ">=1.56.2,<2.0.dev0"
|
| 1097 |
-
grpcio = [
|
| 1098 |
-
{version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
|
| 1099 |
-
{version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""},
|
| 1100 |
-
]
|
| 1101 |
-
grpcio-status = [
|
| 1102 |
-
{version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""},
|
| 1103 |
-
{version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""},
|
| 1104 |
-
]
|
| 1105 |
-
proto-plus = [
|
| 1106 |
-
{version = ">=1.22.3,<2.0.0dev", markers = "python_version < \"3.13\""},
|
| 1107 |
-
{version = ">=1.25.0,<2.0.0dev", markers = "python_version >= \"3.13\""},
|
| 1108 |
-
]
|
| 1109 |
-
protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0"
|
| 1110 |
-
requests = ">=2.18.0,<3.0.0.dev0"
|
| 1111 |
-
|
| 1112 |
-
[package.extras]
|
| 1113 |
-
async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.dev0)"]
|
| 1114 |
-
grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev)", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0)"]
|
| 1115 |
-
grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
|
| 1116 |
-
grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
|
| 1117 |
-
|
| 1118 |
-
[[package]]
|
| 1119 |
-
name = "google-api-python-client"
|
| 1120 |
-
version = "2.160.0"
|
| 1121 |
-
description = "Google API Client Library for Python"
|
| 1122 |
-
optional = false
|
| 1123 |
-
python-versions = ">=3.7"
|
| 1124 |
-
files = [
|
| 1125 |
-
{file = "google_api_python_client-2.160.0-py2.py3-none-any.whl", hash = "sha256:63d61fb3e4cf3fb31a70a87f45567c22f6dfe87bbfa27252317e3e2c42900db4"},
|
| 1126 |
-
{file = "google_api_python_client-2.160.0.tar.gz", hash = "sha256:a8ccafaecfa42d15d5b5c3134ced8de08380019717fc9fb1ed510ca58eca3b7e"},
|
| 1127 |
-
]
|
| 1128 |
-
|
| 1129 |
-
[package.dependencies]
|
| 1130 |
-
google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0.dev0"
|
| 1131 |
-
google-auth = ">=1.32.0,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0.dev0"
|
| 1132 |
-
google-auth-httplib2 = ">=0.2.0,<1.0.0"
|
| 1133 |
-
httplib2 = ">=0.19.0,<1.dev0"
|
| 1134 |
-
uritemplate = ">=3.0.1,<5"
|
| 1135 |
-
|
| 1136 |
[[package]]
|
| 1137 |
name = "google-auth"
|
| 1138 |
version = "2.38.0"
|
|
@@ -1158,59 +1091,21 @@ reauth = ["pyu2f (>=0.1.5)"]
|
|
| 1158 |
requests = ["requests (>=2.20.0,<3.0.0.dev0)"]
|
| 1159 |
|
| 1160 |
[[package]]
|
| 1161 |
-
name = "google-
|
| 1162 |
-
version = "0.
|
| 1163 |
-
description = "
|
| 1164 |
-
optional = false
|
| 1165 |
-
python-versions = "*"
|
| 1166 |
-
files = [
|
| 1167 |
-
{file = "google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05"},
|
| 1168 |
-
{file = "google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d"},
|
| 1169 |
-
]
|
| 1170 |
-
|
| 1171 |
-
[package.dependencies]
|
| 1172 |
-
google-auth = "*"
|
| 1173 |
-
httplib2 = ">=0.19.0"
|
| 1174 |
-
|
| 1175 |
-
[[package]]
|
| 1176 |
-
name = "google-generativeai"
|
| 1177 |
-
version = "0.8.4"
|
| 1178 |
-
description = "Google Generative AI High level API client library and tools."
|
| 1179 |
optional = false
|
| 1180 |
python-versions = ">=3.9"
|
| 1181 |
files = [
|
| 1182 |
-
{file = "
|
|
|
|
| 1183 |
]
|
| 1184 |
|
| 1185 |
[package.dependencies]
|
| 1186 |
-
google-
|
| 1187 |
-
|
| 1188 |
-
|
| 1189 |
-
|
| 1190 |
-
protobuf = "*"
|
| 1191 |
-
pydantic = "*"
|
| 1192 |
-
tqdm = "*"
|
| 1193 |
-
typing-extensions = "*"
|
| 1194 |
-
|
| 1195 |
-
[package.extras]
|
| 1196 |
-
dev = ["Pillow", "absl-py", "black", "ipython", "nose2", "pandas", "pytype", "pyyaml"]
|
| 1197 |
-
|
| 1198 |
-
[[package]]
|
| 1199 |
-
name = "googleapis-common-protos"
|
| 1200 |
-
version = "1.66.0"
|
| 1201 |
-
description = "Common protobufs used in Google APIs"
|
| 1202 |
-
optional = false
|
| 1203 |
-
python-versions = ">=3.7"
|
| 1204 |
-
files = [
|
| 1205 |
-
{file = "googleapis_common_protos-1.66.0-py2.py3-none-any.whl", hash = "sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed"},
|
| 1206 |
-
{file = "googleapis_common_protos-1.66.0.tar.gz", hash = "sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c"},
|
| 1207 |
-
]
|
| 1208 |
-
|
| 1209 |
-
[package.dependencies]
|
| 1210 |
-
protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0"
|
| 1211 |
-
|
| 1212 |
-
[package.extras]
|
| 1213 |
-
grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"]
|
| 1214 |
|
| 1215 |
[[package]]
|
| 1216 |
name = "greenlet"
|
|
@@ -1298,89 +1193,6 @@ files = [
|
|
| 1298 |
docs = ["Sphinx", "furo"]
|
| 1299 |
test = ["objgraph", "psutil"]
|
| 1300 |
|
| 1301 |
-
[[package]]
|
| 1302 |
-
name = "grpcio"
|
| 1303 |
-
version = "1.70.0"
|
| 1304 |
-
description = "HTTP/2-based RPC framework"
|
| 1305 |
-
optional = false
|
| 1306 |
-
python-versions = ">=3.8"
|
| 1307 |
-
files = [
|
| 1308 |
-
{file = "grpcio-1.70.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:95469d1977429f45fe7df441f586521361e235982a0b39e33841549143ae2851"},
|
| 1309 |
-
{file = "grpcio-1.70.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:ed9718f17fbdb472e33b869c77a16d0b55e166b100ec57b016dc7de9c8d236bf"},
|
| 1310 |
-
{file = "grpcio-1.70.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:374d014f29f9dfdb40510b041792e0e2828a1389281eb590df066e1cc2b404e5"},
|
| 1311 |
-
{file = "grpcio-1.70.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2af68a6f5c8f78d56c145161544ad0febbd7479524a59c16b3e25053f39c87f"},
|
| 1312 |
-
{file = "grpcio-1.70.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce7df14b2dcd1102a2ec32f621cc9fab6695effef516efbc6b063ad749867295"},
|
| 1313 |
-
{file = "grpcio-1.70.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c78b339869f4dbf89881e0b6fbf376313e4f845a42840a7bdf42ee6caed4b11f"},
|
| 1314 |
-
{file = "grpcio-1.70.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:58ad9ba575b39edef71f4798fdb5c7b6d02ad36d47949cd381d4392a5c9cbcd3"},
|
| 1315 |
-
{file = "grpcio-1.70.0-cp310-cp310-win32.whl", hash = "sha256:2b0d02e4b25a5c1f9b6c7745d4fa06efc9fd6a611af0fb38d3ba956786b95199"},
|
| 1316 |
-
{file = "grpcio-1.70.0-cp310-cp310-win_amd64.whl", hash = "sha256:0de706c0a5bb9d841e353f6343a9defc9fc35ec61d6eb6111802f3aa9fef29e1"},
|
| 1317 |
-
{file = "grpcio-1.70.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:17325b0be0c068f35770f944124e8839ea3185d6d54862800fc28cc2ffad205a"},
|
| 1318 |
-
{file = "grpcio-1.70.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:dbe41ad140df911e796d4463168e33ef80a24f5d21ef4d1e310553fcd2c4a386"},
|
| 1319 |
-
{file = "grpcio-1.70.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:5ea67c72101d687d44d9c56068328da39c9ccba634cabb336075fae2eab0d04b"},
|
| 1320 |
-
{file = "grpcio-1.70.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb5277db254ab7586769e490b7b22f4ddab3876c490da0a1a9d7c695ccf0bf77"},
|
| 1321 |
-
{file = "grpcio-1.70.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7831a0fc1beeeb7759f737f5acd9fdcda520e955049512d68fda03d91186eea"},
|
| 1322 |
-
{file = "grpcio-1.70.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:27cc75e22c5dba1fbaf5a66c778e36ca9b8ce850bf58a9db887754593080d839"},
|
| 1323 |
-
{file = "grpcio-1.70.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d63764963412e22f0491d0d32833d71087288f4e24cbcddbae82476bfa1d81fd"},
|
| 1324 |
-
{file = "grpcio-1.70.0-cp311-cp311-win32.whl", hash = "sha256:bb491125103c800ec209d84c9b51f1c60ea456038e4734688004f377cfacc113"},
|
| 1325 |
-
{file = "grpcio-1.70.0-cp311-cp311-win_amd64.whl", hash = "sha256:d24035d49e026353eb042bf7b058fb831db3e06d52bee75c5f2f3ab453e71aca"},
|
| 1326 |
-
{file = "grpcio-1.70.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:ef4c14508299b1406c32bdbb9fb7b47612ab979b04cf2b27686ea31882387cff"},
|
| 1327 |
-
{file = "grpcio-1.70.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:aa47688a65643afd8b166928a1da6247d3f46a2784d301e48ca1cc394d2ffb40"},
|
| 1328 |
-
{file = "grpcio-1.70.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:880bfb43b1bb8905701b926274eafce5c70a105bc6b99e25f62e98ad59cb278e"},
|
| 1329 |
-
{file = "grpcio-1.70.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e654c4b17d07eab259d392e12b149c3a134ec52b11ecdc6a515b39aceeec898"},
|
| 1330 |
-
{file = "grpcio-1.70.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2394e3381071045a706ee2eeb6e08962dd87e8999b90ac15c55f56fa5a8c9597"},
|
| 1331 |
-
{file = "grpcio-1.70.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:b3c76701428d2df01964bc6479422f20e62fcbc0a37d82ebd58050b86926ef8c"},
|
| 1332 |
-
{file = "grpcio-1.70.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ac073fe1c4cd856ebcf49e9ed6240f4f84d7a4e6ee95baa5d66ea05d3dd0df7f"},
|
| 1333 |
-
{file = "grpcio-1.70.0-cp312-cp312-win32.whl", hash = "sha256:cd24d2d9d380fbbee7a5ac86afe9787813f285e684b0271599f95a51bce33528"},
|
| 1334 |
-
{file = "grpcio-1.70.0-cp312-cp312-win_amd64.whl", hash = "sha256:0495c86a55a04a874c7627fd33e5beaee771917d92c0e6d9d797628ac40e7655"},
|
| 1335 |
-
{file = "grpcio-1.70.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:aa573896aeb7d7ce10b1fa425ba263e8dddd83d71530d1322fd3a16f31257b4a"},
|
| 1336 |
-
{file = "grpcio-1.70.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:d405b005018fd516c9ac529f4b4122342f60ec1cee181788249372524e6db429"},
|
| 1337 |
-
{file = "grpcio-1.70.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f32090238b720eb585248654db8e3afc87b48d26ac423c8dde8334a232ff53c9"},
|
| 1338 |
-
{file = "grpcio-1.70.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dfa089a734f24ee5f6880c83d043e4f46bf812fcea5181dcb3a572db1e79e01c"},
|
| 1339 |
-
{file = "grpcio-1.70.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f19375f0300b96c0117aca118d400e76fede6db6e91f3c34b7b035822e06c35f"},
|
| 1340 |
-
{file = "grpcio-1.70.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:7c73c42102e4a5ec76608d9b60227d917cea46dff4d11d372f64cbeb56d259d0"},
|
| 1341 |
-
{file = "grpcio-1.70.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:0a5c78d5198a1f0aa60006cd6eb1c912b4a1520b6a3968e677dbcba215fabb40"},
|
| 1342 |
-
{file = "grpcio-1.70.0-cp313-cp313-win32.whl", hash = "sha256:fe9dbd916df3b60e865258a8c72ac98f3ac9e2a9542dcb72b7a34d236242a5ce"},
|
| 1343 |
-
{file = "grpcio-1.70.0-cp313-cp313-win_amd64.whl", hash = "sha256:4119fed8abb7ff6c32e3d2255301e59c316c22d31ab812b3fbcbaf3d0d87cc68"},
|
| 1344 |
-
{file = "grpcio-1.70.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:8058667a755f97407fca257c844018b80004ae8035565ebc2812cc550110718d"},
|
| 1345 |
-
{file = "grpcio-1.70.0-cp38-cp38-macosx_10_14_universal2.whl", hash = "sha256:879a61bf52ff8ccacbedf534665bb5478ec8e86ad483e76fe4f729aaef867cab"},
|
| 1346 |
-
{file = "grpcio-1.70.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:0ba0a173f4feacf90ee618fbc1a27956bfd21260cd31ced9bc707ef551ff7dc7"},
|
| 1347 |
-
{file = "grpcio-1.70.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:558c386ecb0148f4f99b1a65160f9d4b790ed3163e8610d11db47838d452512d"},
|
| 1348 |
-
{file = "grpcio-1.70.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:412faabcc787bbc826f51be261ae5fa996b21263de5368a55dc2cf824dc5090e"},
|
| 1349 |
-
{file = "grpcio-1.70.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3b0f01f6ed9994d7a0b27eeddea43ceac1b7e6f3f9d86aeec0f0064b8cf50fdb"},
|
| 1350 |
-
{file = "grpcio-1.70.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:7385b1cb064734005204bc8994eed7dcb801ed6c2eda283f613ad8c6c75cf873"},
|
| 1351 |
-
{file = "grpcio-1.70.0-cp38-cp38-win32.whl", hash = "sha256:07269ff4940f6fb6710951116a04cd70284da86d0a4368fd5a3b552744511f5a"},
|
| 1352 |
-
{file = "grpcio-1.70.0-cp38-cp38-win_amd64.whl", hash = "sha256:aba19419aef9b254e15011b230a180e26e0f6864c90406fdbc255f01d83bc83c"},
|
| 1353 |
-
{file = "grpcio-1.70.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:4f1937f47c77392ccd555728f564a49128b6a197a05a5cd527b796d36f3387d0"},
|
| 1354 |
-
{file = "grpcio-1.70.0-cp39-cp39-macosx_10_14_universal2.whl", hash = "sha256:0cd430b9215a15c10b0e7d78f51e8a39d6cf2ea819fd635a7214fae600b1da27"},
|
| 1355 |
-
{file = "grpcio-1.70.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:e27585831aa6b57b9250abaf147003e126cd3a6c6ca0c531a01996f31709bed1"},
|
| 1356 |
-
{file = "grpcio-1.70.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1af8e15b0f0fe0eac75195992a63df17579553b0c4af9f8362cc7cc99ccddf4"},
|
| 1357 |
-
{file = "grpcio-1.70.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cbce24409beaee911c574a3d75d12ffb8c3e3dd1b813321b1d7a96bbcac46bf4"},
|
| 1358 |
-
{file = "grpcio-1.70.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ff4a8112a79464919bb21c18e956c54add43ec9a4850e3949da54f61c241a4a6"},
|
| 1359 |
-
{file = "grpcio-1.70.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5413549fdf0b14046c545e19cfc4eb1e37e9e1ebba0ca390a8d4e9963cab44d2"},
|
| 1360 |
-
{file = "grpcio-1.70.0-cp39-cp39-win32.whl", hash = "sha256:b745d2c41b27650095e81dea7091668c040457483c9bdb5d0d9de8f8eb25e59f"},
|
| 1361 |
-
{file = "grpcio-1.70.0-cp39-cp39-win_amd64.whl", hash = "sha256:a31d7e3b529c94e930a117b2175b2efd179d96eb3c7a21ccb0289a8ab05b645c"},
|
| 1362 |
-
{file = "grpcio-1.70.0.tar.gz", hash = "sha256:8d1584a68d5922330025881e63a6c1b54cc8117291d382e4fa69339b6d914c56"},
|
| 1363 |
-
]
|
| 1364 |
-
|
| 1365 |
-
[package.extras]
|
| 1366 |
-
protobuf = ["grpcio-tools (>=1.70.0)"]
|
| 1367 |
-
|
| 1368 |
-
[[package]]
|
| 1369 |
-
name = "grpcio-status"
|
| 1370 |
-
version = "1.70.0"
|
| 1371 |
-
description = "Status proto mapping for gRPC"
|
| 1372 |
-
optional = false
|
| 1373 |
-
python-versions = ">=3.8"
|
| 1374 |
-
files = [
|
| 1375 |
-
{file = "grpcio_status-1.70.0-py3-none-any.whl", hash = "sha256:fc5a2ae2b9b1c1969cc49f3262676e6854aa2398ec69cb5bd6c47cd501904a85"},
|
| 1376 |
-
{file = "grpcio_status-1.70.0.tar.gz", hash = "sha256:0e7b42816512433b18b9d764285ff029bde059e9d41f8fe10a60631bd8348101"},
|
| 1377 |
-
]
|
| 1378 |
-
|
| 1379 |
-
[package.dependencies]
|
| 1380 |
-
googleapis-common-protos = ">=1.5.5"
|
| 1381 |
-
grpcio = ">=1.70.0"
|
| 1382 |
-
protobuf = ">=5.26.1,<6.0dev"
|
| 1383 |
-
|
| 1384 |
[[package]]
|
| 1385 |
name = "h11"
|
| 1386 |
version = "0.14.0"
|
|
@@ -1413,20 +1225,6 @@ http2 = ["h2 (>=3,<5)"]
|
|
| 1413 |
socks = ["socksio (==1.*)"]
|
| 1414 |
trio = ["trio (>=0.22.0,<1.0)"]
|
| 1415 |
|
| 1416 |
-
[[package]]
|
| 1417 |
-
name = "httplib2"
|
| 1418 |
-
version = "0.22.0"
|
| 1419 |
-
description = "A comprehensive HTTP client library."
|
| 1420 |
-
optional = false
|
| 1421 |
-
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
| 1422 |
-
files = [
|
| 1423 |
-
{file = "httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc"},
|
| 1424 |
-
{file = "httplib2-0.22.0.tar.gz", hash = "sha256:d7a10bc5ef5ab08322488bde8c726eeee5c8618723fdb399597ec58f3d82df81"},
|
| 1425 |
-
]
|
| 1426 |
-
|
| 1427 |
-
[package.dependencies]
|
| 1428 |
-
pyparsing = {version = ">=2.4.2,<3.0.0 || >3.0.0,<3.0.1 || >3.0.1,<3.0.2 || >3.0.2,<3.0.3 || >3.0.3,<4", markers = "python_version > \"3.0\""}
|
| 1429 |
-
|
| 1430 |
[[package]]
|
| 1431 |
name = "httpx"
|
| 1432 |
version = "0.28.1"
|
|
@@ -1545,13 +1343,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio
|
|
| 1545 |
|
| 1546 |
[[package]]
|
| 1547 |
name = "ipython"
|
| 1548 |
-
version = "8.
|
| 1549 |
description = "IPython: Productive Interactive Computing"
|
| 1550 |
optional = false
|
| 1551 |
python-versions = ">=3.10"
|
| 1552 |
files = [
|
| 1553 |
-
{file = "ipython-8.
|
| 1554 |
-
{file = "ipython-8.
|
| 1555 |
]
|
| 1556 |
|
| 1557 |
[package.dependencies]
|
|
@@ -1818,17 +1616,18 @@ test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout"
|
|
| 1818 |
|
| 1819 |
[[package]]
|
| 1820 |
name = "jupyter-events"
|
| 1821 |
-
version = "0.
|
| 1822 |
description = "Jupyter Event System library"
|
| 1823 |
optional = false
|
| 1824 |
python-versions = ">=3.9"
|
| 1825 |
files = [
|
| 1826 |
-
{file = "jupyter_events-0.
|
| 1827 |
-
{file = "jupyter_events-0.
|
| 1828 |
]
|
| 1829 |
|
| 1830 |
[package.dependencies]
|
| 1831 |
jsonschema = {version = ">=4.18.0", extras = ["format-nongpl"]}
|
|
|
|
| 1832 |
python-json-logger = ">=2.0.4"
|
| 1833 |
pyyaml = ">=5.3"
|
| 1834 |
referencing = "*"
|
|
@@ -2468,13 +2267,13 @@ dill = ">=0.3.8"
|
|
| 2468 |
|
| 2469 |
[[package]]
|
| 2470 |
name = "narwhals"
|
| 2471 |
-
version = "1.
|
| 2472 |
description = "Extremely lightweight compatibility layer between dataframe libraries"
|
| 2473 |
optional = false
|
| 2474 |
python-versions = ">=3.8"
|
| 2475 |
files = [
|
| 2476 |
-
{file = "narwhals-1.
|
| 2477 |
-
{file = "narwhals-1.
|
| 2478 |
]
|
| 2479 |
|
| 2480 |
[package.extras]
|
|
@@ -3176,23 +2975,23 @@ type = ["mypy (>=1.11.2)"]
|
|
| 3176 |
|
| 3177 |
[[package]]
|
| 3178 |
name = "playwright"
|
| 3179 |
-
version = "1.
|
| 3180 |
description = "A high-level API to automate web browsers"
|
| 3181 |
optional = false
|
| 3182 |
python-versions = ">=3.9"
|
| 3183 |
files = [
|
| 3184 |
-
{file = "playwright-1.
|
| 3185 |
-
{file = "playwright-1.
|
| 3186 |
-
{file = "playwright-1.
|
| 3187 |
-
{file = "playwright-1.
|
| 3188 |
-
{file = "playwright-1.
|
| 3189 |
-
{file = "playwright-1.
|
| 3190 |
-
{file = "playwright-1.
|
| 3191 |
]
|
| 3192 |
|
| 3193 |
[package.dependencies]
|
| 3194 |
-
greenlet = "3.1.1"
|
| 3195 |
-
pyee = "12
|
| 3196 |
|
| 3197 |
[[package]]
|
| 3198 |
name = "pluggy"
|
|
@@ -3328,23 +3127,6 @@ files = [
|
|
| 3328 |
{file = "propcache-0.2.1.tar.gz", hash = "sha256:3f77ce728b19cb537714499928fe800c3dda29e8d9428778fc7c186da4c09a64"},
|
| 3329 |
]
|
| 3330 |
|
| 3331 |
-
[[package]]
|
| 3332 |
-
name = "proto-plus"
|
| 3333 |
-
version = "1.26.0"
|
| 3334 |
-
description = "Beautiful, Pythonic protocol buffers"
|
| 3335 |
-
optional = false
|
| 3336 |
-
python-versions = ">=3.7"
|
| 3337 |
-
files = [
|
| 3338 |
-
{file = "proto_plus-1.26.0-py3-none-any.whl", hash = "sha256:bf2dfaa3da281fc3187d12d224c707cb57214fb2c22ba854eb0c105a3fb2d4d7"},
|
| 3339 |
-
{file = "proto_plus-1.26.0.tar.gz", hash = "sha256:6e93d5f5ca267b54300880fff156b6a3386b3fa3f43b1da62e680fc0c586ef22"},
|
| 3340 |
-
]
|
| 3341 |
-
|
| 3342 |
-
[package.dependencies]
|
| 3343 |
-
protobuf = ">=3.19.0,<6.0.0dev"
|
| 3344 |
-
|
| 3345 |
-
[package.extras]
|
| 3346 |
-
testing = ["google-api-core (>=1.31.5)"]
|
| 3347 |
-
|
| 3348 |
[[package]]
|
| 3349 |
name = "protobuf"
|
| 3350 |
version = "5.29.3"
|
|
@@ -3683,13 +3465,13 @@ jupyter = ["ipykernel (>=5.1.2)", "ipython (>=5.8.0)", "ipywidgets (>=7,<8)", "t
|
|
| 3683 |
|
| 3684 |
[[package]]
|
| 3685 |
name = "pyee"
|
| 3686 |
-
version = "12.
|
| 3687 |
description = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own"
|
| 3688 |
optional = false
|
| 3689 |
python-versions = ">=3.8"
|
| 3690 |
files = [
|
| 3691 |
-
{file = "pyee-12.
|
| 3692 |
-
{file = "pyee-12.
|
| 3693 |
]
|
| 3694 |
|
| 3695 |
[package.dependencies]
|
|
@@ -3712,20 +3494,6 @@ files = [
|
|
| 3712 |
[package.extras]
|
| 3713 |
windows-terminal = ["colorama (>=0.4.6)"]
|
| 3714 |
|
| 3715 |
-
[[package]]
|
| 3716 |
-
name = "pyparsing"
|
| 3717 |
-
version = "3.2.1"
|
| 3718 |
-
description = "pyparsing module - Classes and methods to define and execute parsing grammars"
|
| 3719 |
-
optional = false
|
| 3720 |
-
python-versions = ">=3.9"
|
| 3721 |
-
files = [
|
| 3722 |
-
{file = "pyparsing-3.2.1-py3-none-any.whl", hash = "sha256:506ff4f4386c4cec0590ec19e6302d3aedb992fdc02c761e90416f158dacf8e1"},
|
| 3723 |
-
{file = "pyparsing-3.2.1.tar.gz", hash = "sha256:61980854fd66de3a90028d679a954d5f2623e83144b5afe5ee86f43d762e5f0a"},
|
| 3724 |
-
]
|
| 3725 |
-
|
| 3726 |
-
[package.extras]
|
| 3727 |
-
diagrams = ["jinja2", "railroad-diagrams"]
|
| 3728 |
-
|
| 3729 |
[[package]]
|
| 3730 |
name = "pypdfium2"
|
| 3731 |
version = "4.30.0"
|
|
@@ -3880,17 +3648,18 @@ files = [
|
|
| 3880 |
|
| 3881 |
[[package]]
|
| 3882 |
name = "pywinpty"
|
| 3883 |
-
version = "2.0.
|
| 3884 |
description = "Pseudo terminal support for Windows from Python."
|
| 3885 |
optional = false
|
| 3886 |
-
python-versions = ">=3.
|
| 3887 |
files = [
|
| 3888 |
-
{file = "pywinpty-2.0.
|
| 3889 |
-
{file = "pywinpty-2.0.
|
| 3890 |
-
{file = "pywinpty-2.0.
|
| 3891 |
-
{file = "pywinpty-2.0.
|
| 3892 |
-
{file = "pywinpty-2.0.
|
| 3893 |
-
{file = "pywinpty-2.0.
|
|
|
|
| 3894 |
]
|
| 3895 |
|
| 3896 |
[[package]]
|
|
@@ -4752,13 +4521,13 @@ full = ["httpx (>=0.27.0,<0.29.0)", "itsdangerous", "jinja2", "python-multipart
|
|
| 4752 |
|
| 4753 |
[[package]]
|
| 4754 |
name = "streamlit"
|
| 4755 |
-
version = "1.
|
| 4756 |
description = "A faster way to build and share data apps"
|
| 4757 |
optional = false
|
| 4758 |
python-versions = "!=3.9.7,>=3.9"
|
| 4759 |
files = [
|
| 4760 |
-
{file = "streamlit-1.
|
| 4761 |
-
{file = "streamlit-1.
|
| 4762 |
]
|
| 4763 |
|
| 4764 |
[package.dependencies]
|
|
@@ -4779,11 +4548,11 @@ rich = ">=10.14.0,<14"
|
|
| 4779 |
tenacity = ">=8.1.0,<10"
|
| 4780 |
toml = ">=0.10.1,<2"
|
| 4781 |
tornado = ">=6.0.3,<7"
|
| 4782 |
-
typing-extensions = ">=4.
|
| 4783 |
watchdog = {version = ">=2.1.5,<7", markers = "platform_system != \"Darwin\""}
|
| 4784 |
|
| 4785 |
[package.extras]
|
| 4786 |
-
snowflake = ["snowflake-connector-python (>=
|
| 4787 |
|
| 4788 |
[[package]]
|
| 4789 |
name = "surya-ocr"
|
|
@@ -5235,17 +5004,6 @@ files = [
|
|
| 5235 |
[package.extras]
|
| 5236 |
dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake8-commas", "flake8-comprehensions", "flake8-continuation", "flake8-datetimez", "flake8-docstrings", "flake8-import-order", "flake8-literal", "flake8-modern-annotations", "flake8-noqa", "flake8-pyproject", "flake8-requirements", "flake8-typechecking-import", "flake8-use-fstring", "mypy", "pep8-naming", "types-PyYAML"]
|
| 5237 |
|
| 5238 |
-
[[package]]
|
| 5239 |
-
name = "uritemplate"
|
| 5240 |
-
version = "4.1.1"
|
| 5241 |
-
description = "Implementation of RFC 6570 URI Templates"
|
| 5242 |
-
optional = false
|
| 5243 |
-
python-versions = ">=3.6"
|
| 5244 |
-
files = [
|
| 5245 |
-
{file = "uritemplate-4.1.1-py2.py3-none-any.whl", hash = "sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e"},
|
| 5246 |
-
{file = "uritemplate-4.1.1.tar.gz", hash = "sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0"},
|
| 5247 |
-
]
|
| 5248 |
-
|
| 5249 |
[[package]]
|
| 5250 |
name = "urllib3"
|
| 5251 |
version = "2.3.0"
|
|
@@ -5373,6 +5131,84 @@ docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"]
|
|
| 5373 |
optional = ["python-socks", "wsaccel"]
|
| 5374 |
test = ["websockets"]
|
| 5375 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5376 |
[[package]]
|
| 5377 |
name = "widgetsnbextension"
|
| 5378 |
version = "4.0.13"
|
|
@@ -5615,4 +5451,4 @@ propcache = ">=0.2.0"
|
|
| 5615 |
[metadata]
|
| 5616 |
lock-version = "2.0"
|
| 5617 |
python-versions = "^3.10"
|
| 5618 |
-
content-hash = "
|
|
|
|
| 13 |
|
| 14 |
[[package]]
|
| 15 |
name = "aiohttp"
|
| 16 |
+
version = "3.11.12"
|
| 17 |
description = "Async http client/server framework (asyncio)"
|
| 18 |
optional = false
|
| 19 |
python-versions = ">=3.9"
|
| 20 |
files = [
|
| 21 |
+
{file = "aiohttp-3.11.12-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:aa8a8caca81c0a3e765f19c6953416c58e2f4cc1b84829af01dd1c771bb2f91f"},
|
| 22 |
+
{file = "aiohttp-3.11.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:84ede78acde96ca57f6cf8ccb8a13fbaf569f6011b9a52f870c662d4dc8cd854"},
|
| 23 |
+
{file = "aiohttp-3.11.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:584096938a001378484aa4ee54e05dc79c7b9dd933e271c744a97b3b6f644957"},
|
| 24 |
+
{file = "aiohttp-3.11.12-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:392432a2dde22b86f70dd4a0e9671a349446c93965f261dbaecfaf28813e5c42"},
|
| 25 |
+
{file = "aiohttp-3.11.12-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:88d385b8e7f3a870146bf5ea31786ef7463e99eb59e31db56e2315535d811f55"},
|
| 26 |
+
{file = "aiohttp-3.11.12-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b10a47e5390c4b30a0d58ee12581003be52eedd506862ab7f97da7a66805befb"},
|
| 27 |
+
{file = "aiohttp-3.11.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b5263dcede17b6b0c41ef0c3ccce847d82a7da98709e75cf7efde3e9e3b5cae"},
|
| 28 |
+
{file = "aiohttp-3.11.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50c5c7b8aa5443304c55c262c5693b108c35a3b61ef961f1e782dd52a2f559c7"},
|
| 29 |
+
{file = "aiohttp-3.11.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d1c031a7572f62f66f1257db37ddab4cb98bfaf9b9434a3b4840bf3560f5e788"},
|
| 30 |
+
{file = "aiohttp-3.11.12-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:7e44eba534381dd2687be50cbd5f2daded21575242ecfdaf86bbeecbc38dae8e"},
|
| 31 |
+
{file = "aiohttp-3.11.12-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:145a73850926018ec1681e734cedcf2716d6a8697d90da11284043b745c286d5"},
|
| 32 |
+
{file = "aiohttp-3.11.12-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:2c311e2f63e42c1bf86361d11e2c4a59f25d9e7aabdbdf53dc38b885c5435cdb"},
|
| 33 |
+
{file = "aiohttp-3.11.12-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:ea756b5a7bac046d202a9a3889b9a92219f885481d78cd318db85b15cc0b7bcf"},
|
| 34 |
+
{file = "aiohttp-3.11.12-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:526c900397f3bbc2db9cb360ce9c35134c908961cdd0ac25b1ae6ffcaa2507ff"},
|
| 35 |
+
{file = "aiohttp-3.11.12-cp310-cp310-win32.whl", hash = "sha256:b8d3bb96c147b39c02d3db086899679f31958c5d81c494ef0fc9ef5bb1359b3d"},
|
| 36 |
+
{file = "aiohttp-3.11.12-cp310-cp310-win_amd64.whl", hash = "sha256:7fe3d65279bfbee8de0fb4f8c17fc4e893eed2dba21b2f680e930cc2b09075c5"},
|
| 37 |
+
{file = "aiohttp-3.11.12-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:87a2e00bf17da098d90d4145375f1d985a81605267e7f9377ff94e55c5d769eb"},
|
| 38 |
+
{file = "aiohttp-3.11.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b34508f1cd928ce915ed09682d11307ba4b37d0708d1f28e5774c07a7674cac9"},
|
| 39 |
+
{file = "aiohttp-3.11.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:936d8a4f0f7081327014742cd51d320296b56aa6d324461a13724ab05f4b2933"},
|
| 40 |
+
{file = "aiohttp-3.11.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de1378f72def7dfb5dbd73d86c19eda0ea7b0a6873910cc37d57e80f10d64e1"},
|
| 41 |
+
{file = "aiohttp-3.11.12-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b9d45dbb3aaec05cf01525ee1a7ac72de46a8c425cb75c003acd29f76b1ffe94"},
|
| 42 |
+
{file = "aiohttp-3.11.12-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:930ffa1925393381e1e0a9b82137fa7b34c92a019b521cf9f41263976666a0d6"},
|
| 43 |
+
{file = "aiohttp-3.11.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8340def6737118f5429a5df4e88f440746b791f8f1c4ce4ad8a595f42c980bd5"},
|
| 44 |
+
{file = "aiohttp-3.11.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4016e383f91f2814e48ed61e6bda7d24c4d7f2402c75dd28f7e1027ae44ea204"},
|
| 45 |
+
{file = "aiohttp-3.11.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3c0600bcc1adfaaac321422d615939ef300df81e165f6522ad096b73439c0f58"},
|
| 46 |
+
{file = "aiohttp-3.11.12-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:0450ada317a65383b7cce9576096150fdb97396dcfe559109b403c7242faffef"},
|
| 47 |
+
{file = "aiohttp-3.11.12-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:850ff6155371fd802a280f8d369d4e15d69434651b844bde566ce97ee2277420"},
|
| 48 |
+
{file = "aiohttp-3.11.12-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:8fd12d0f989c6099e7b0f30dc6e0d1e05499f3337461f0b2b0dadea6c64b89df"},
|
| 49 |
+
{file = "aiohttp-3.11.12-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:76719dd521c20a58a6c256d058547b3a9595d1d885b830013366e27011ffe804"},
|
| 50 |
+
{file = "aiohttp-3.11.12-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:97fe431f2ed646a3b56142fc81d238abcbaff08548d6912acb0b19a0cadc146b"},
|
| 51 |
+
{file = "aiohttp-3.11.12-cp311-cp311-win32.whl", hash = "sha256:e10c440d142fa8b32cfdb194caf60ceeceb3e49807072e0dc3a8887ea80e8c16"},
|
| 52 |
+
{file = "aiohttp-3.11.12-cp311-cp311-win_amd64.whl", hash = "sha256:246067ba0cf5560cf42e775069c5d80a8989d14a7ded21af529a4e10e3e0f0e6"},
|
| 53 |
+
{file = "aiohttp-3.11.12-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e392804a38353900c3fd8b7cacbea5132888f7129f8e241915e90b85f00e3250"},
|
| 54 |
+
{file = "aiohttp-3.11.12-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8fa1510b96c08aaad49303ab11f8803787c99222288f310a62f493faf883ede1"},
|
| 55 |
+
{file = "aiohttp-3.11.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dc065a4285307607df3f3686363e7f8bdd0d8ab35f12226362a847731516e42c"},
|
| 56 |
+
{file = "aiohttp-3.11.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddb31f8474695cd61fc9455c644fc1606c164b93bff2490390d90464b4655df"},
|
| 57 |
+
{file = "aiohttp-3.11.12-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9dec0000d2d8621d8015c293e24589d46fa218637d820894cb7356c77eca3259"},
|
| 58 |
+
{file = "aiohttp-3.11.12-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e3552fe98e90fdf5918c04769f338a87fa4f00f3b28830ea9b78b1bdc6140e0d"},
|
| 59 |
+
{file = "aiohttp-3.11.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dfe7f984f28a8ae94ff3a7953cd9678550dbd2a1f9bda5dd9c5ae627744c78e"},
|
| 60 |
+
{file = "aiohttp-3.11.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a481a574af914b6e84624412666cbfbe531a05667ca197804ecc19c97b8ab1b0"},
|
| 61 |
+
{file = "aiohttp-3.11.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1987770fb4887560363b0e1a9b75aa303e447433c41284d3af2840a2f226d6e0"},
|
| 62 |
+
{file = "aiohttp-3.11.12-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:a4ac6a0f0f6402854adca4e3259a623f5c82ec3f0c049374133bcb243132baf9"},
|
| 63 |
+
{file = "aiohttp-3.11.12-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c96a43822f1f9f69cc5c3706af33239489a6294be486a0447fb71380070d4d5f"},
|
| 64 |
+
{file = "aiohttp-3.11.12-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a5e69046f83c0d3cb8f0d5bd9b8838271b1bc898e01562a04398e160953e8eb9"},
|
| 65 |
+
{file = "aiohttp-3.11.12-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:68d54234c8d76d8ef74744f9f9fc6324f1508129e23da8883771cdbb5818cbef"},
|
| 66 |
+
{file = "aiohttp-3.11.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c9fd9dcf9c91affe71654ef77426f5cf8489305e1c66ed4816f5a21874b094b9"},
|
| 67 |
+
{file = "aiohttp-3.11.12-cp312-cp312-win32.whl", hash = "sha256:0ed49efcd0dc1611378beadbd97beb5d9ca8fe48579fc04a6ed0844072261b6a"},
|
| 68 |
+
{file = "aiohttp-3.11.12-cp312-cp312-win_amd64.whl", hash = "sha256:54775858c7f2f214476773ce785a19ee81d1294a6bedc5cc17225355aab74802"},
|
| 69 |
+
{file = "aiohttp-3.11.12-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:413ad794dccb19453e2b97c2375f2ca3cdf34dc50d18cc2693bd5aed7d16f4b9"},
|
| 70 |
+
{file = "aiohttp-3.11.12-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4a93d28ed4b4b39e6f46fd240896c29b686b75e39cc6992692e3922ff6982b4c"},
|
| 71 |
+
{file = "aiohttp-3.11.12-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d589264dbba3b16e8951b6f145d1e6b883094075283dafcab4cdd564a9e353a0"},
|
| 72 |
+
{file = "aiohttp-3.11.12-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5148ca8955affdfeb864aca158ecae11030e952b25b3ae15d4e2b5ba299bad2"},
|
| 73 |
+
{file = "aiohttp-3.11.12-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:525410e0790aab036492eeea913858989c4cb070ff373ec3bc322d700bdf47c1"},
|
| 74 |
+
{file = "aiohttp-3.11.12-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bd8695be2c80b665ae3f05cb584093a1e59c35ecb7d794d1edd96e8cc9201d7"},
|
| 75 |
+
{file = "aiohttp-3.11.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0203433121484b32646a5f5ea93ae86f3d9559d7243f07e8c0eab5ff8e3f70e"},
|
| 76 |
+
{file = "aiohttp-3.11.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40cd36749a1035c34ba8d8aaf221b91ca3d111532e5ccb5fa8c3703ab1b967ed"},
|
| 77 |
+
{file = "aiohttp-3.11.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a7442662afebbf7b4c6d28cb7aab9e9ce3a5df055fc4116cc7228192ad6cb484"},
|
| 78 |
+
{file = "aiohttp-3.11.12-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:8a2fb742ef378284a50766e985804bd6adb5adb5aa781100b09befdbfa757b65"},
|
| 79 |
+
{file = "aiohttp-3.11.12-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2cee3b117a8d13ab98b38d5b6bdcd040cfb4181068d05ce0c474ec9db5f3c5bb"},
|
| 80 |
+
{file = "aiohttp-3.11.12-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f6a19bcab7fbd8f8649d6595624856635159a6527861b9cdc3447af288a00c00"},
|
| 81 |
+
{file = "aiohttp-3.11.12-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e4cecdb52aaa9994fbed6b81d4568427b6002f0a91c322697a4bfcc2b2363f5a"},
|
| 82 |
+
{file = "aiohttp-3.11.12-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:30f546358dfa0953db92ba620101fefc81574f87b2346556b90b5f3ef16e55ce"},
|
| 83 |
+
{file = "aiohttp-3.11.12-cp313-cp313-win32.whl", hash = "sha256:ce1bb21fc7d753b5f8a5d5a4bae99566386b15e716ebdb410154c16c91494d7f"},
|
| 84 |
+
{file = "aiohttp-3.11.12-cp313-cp313-win_amd64.whl", hash = "sha256:f7914ab70d2ee8ab91c13e5402122edbc77821c66d2758abb53aabe87f013287"},
|
| 85 |
+
{file = "aiohttp-3.11.12-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7c3623053b85b4296cd3925eeb725e386644fd5bc67250b3bb08b0f144803e7b"},
|
| 86 |
+
{file = "aiohttp-3.11.12-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:67453e603cea8e85ed566b2700efa1f6916aefbc0c9fcb2e86aaffc08ec38e78"},
|
| 87 |
+
{file = "aiohttp-3.11.12-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6130459189e61baac5a88c10019b21e1f0c6d00ebc770e9ce269475650ff7f73"},
|
| 88 |
+
{file = "aiohttp-3.11.12-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9060addfa4ff753b09392efe41e6af06ea5dd257829199747b9f15bfad819460"},
|
| 89 |
+
{file = "aiohttp-3.11.12-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:34245498eeb9ae54c687a07ad7f160053911b5745e186afe2d0c0f2898a1ab8a"},
|
| 90 |
+
{file = "aiohttp-3.11.12-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8dc0fba9a74b471c45ca1a3cb6e6913ebfae416678d90529d188886278e7f3f6"},
|
| 91 |
+
{file = "aiohttp-3.11.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a478aa11b328983c4444dacb947d4513cb371cd323f3845e53caeda6be5589d5"},
|
| 92 |
+
{file = "aiohttp-3.11.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c160a04283c8c6f55b5bf6d4cad59bb9c5b9c9cd08903841b25f1f7109ef1259"},
|
| 93 |
+
{file = "aiohttp-3.11.12-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:edb69b9589324bdc40961cdf0657815df674f1743a8d5ad9ab56a99e4833cfdd"},
|
| 94 |
+
{file = "aiohttp-3.11.12-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:4ee84c2a22a809c4f868153b178fe59e71423e1f3d6a8cd416134bb231fbf6d3"},
|
| 95 |
+
{file = "aiohttp-3.11.12-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:bf4480a5438f80e0f1539e15a7eb8b5f97a26fe087e9828e2c0ec2be119a9f72"},
|
| 96 |
+
{file = "aiohttp-3.11.12-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:e6b2732ef3bafc759f653a98881b5b9cdef0716d98f013d376ee8dfd7285abf1"},
|
| 97 |
+
{file = "aiohttp-3.11.12-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f752e80606b132140883bb262a457c475d219d7163d996dc9072434ffb0784c4"},
|
| 98 |
+
{file = "aiohttp-3.11.12-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ab3247d58b393bda5b1c8f31c9edece7162fc13265334217785518dd770792b8"},
|
| 99 |
+
{file = "aiohttp-3.11.12-cp39-cp39-win32.whl", hash = "sha256:0d5176f310a7fe6f65608213cc74f4228e4f4ce9fd10bcb2bb6da8fc66991462"},
|
| 100 |
+
{file = "aiohttp-3.11.12-cp39-cp39-win_amd64.whl", hash = "sha256:74bd573dde27e58c760d9ca8615c41a57e719bff315c9adb6f2a4281a28e8798"},
|
| 101 |
+
{file = "aiohttp-3.11.12.tar.gz", hash = "sha256:7603ca26d75b1b86160ce1bbe2787a0b706e592af5b2504e12caa88a217767b0"},
|
| 102 |
]
|
| 103 |
|
| 104 |
[package.dependencies]
|
|
|
|
| 344 |
|
| 345 |
[[package]]
|
| 346 |
name = "babel"
|
| 347 |
+
version = "2.17.0"
|
| 348 |
description = "Internationalization utilities"
|
| 349 |
optional = false
|
| 350 |
python-versions = ">=3.8"
|
| 351 |
files = [
|
| 352 |
+
{file = "babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2"},
|
| 353 |
+
{file = "babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d"},
|
| 354 |
]
|
| 355 |
|
| 356 |
[package.extras]
|
| 357 |
+
dev = ["backports.zoneinfo", "freezegun (>=1.0,<2.0)", "jinja2 (>=3.0)", "pytest (>=6.0)", "pytest-cov", "pytz", "setuptools", "tzdata"]
|
| 358 |
|
| 359 |
[[package]]
|
| 360 |
name = "beautifulsoup4"
|
| 361 |
+
version = "4.13.3"
|
| 362 |
description = "Screen-scraping library"
|
| 363 |
optional = false
|
| 364 |
+
python-versions = ">=3.7.0"
|
| 365 |
files = [
|
| 366 |
+
{file = "beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16"},
|
| 367 |
+
{file = "beautifulsoup4-4.13.3.tar.gz", hash = "sha256:1bd32405dacc920b42b83ba01644747ed77456a65760e285fbc47633ceddaf8b"},
|
| 368 |
]
|
| 369 |
|
| 370 |
[package.dependencies]
|
| 371 |
soupsieve = ">1.2"
|
| 372 |
+
typing-extensions = ">=4.0.0"
|
| 373 |
|
| 374 |
[package.extras]
|
| 375 |
cchardet = ["cchardet"]
|
|
|
|
| 1066 |
doc = ["sphinx (>=7.1.2,<7.2)", "sphinx-autodoc-typehints", "sphinx_rtd_theme"]
|
| 1067 |
test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"]
|
| 1068 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1069 |
[[package]]
|
| 1070 |
name = "google-auth"
|
| 1071 |
version = "2.38.0"
|
|
|
|
| 1091 |
requests = ["requests (>=2.20.0,<3.0.0.dev0)"]
|
| 1092 |
|
| 1093 |
[[package]]
|
| 1094 |
+
name = "google-genai"
|
| 1095 |
+
version = "1.0.0"
|
| 1096 |
+
description = "GenAI Python SDK"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1097 |
optional = false
|
| 1098 |
python-versions = ">=3.9"
|
| 1099 |
files = [
|
| 1100 |
+
{file = "google_genai-1.0.0-py3-none-any.whl", hash = "sha256:e9c3abd48f46ecb2b0a51efa7f65c6830b50f9784df603a91019b43918a7531f"},
|
| 1101 |
+
{file = "google_genai-1.0.0.tar.gz", hash = "sha256:15712abb808f891a14eafc9edf21b8cf92ea952f627dd0e2e939657efd234acd"},
|
| 1102 |
]
|
| 1103 |
|
| 1104 |
[package.dependencies]
|
| 1105 |
+
google-auth = ">=2.14.1,<3.0.0dev"
|
| 1106 |
+
pydantic = ">=2.0.0,<3.0.0dev"
|
| 1107 |
+
requests = ">=2.28.1,<3.0.0dev"
|
| 1108 |
+
websockets = ">=13.0,<15.0dev"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1109 |
|
| 1110 |
[[package]]
|
| 1111 |
name = "greenlet"
|
|
|
|
| 1193 |
docs = ["Sphinx", "furo"]
|
| 1194 |
test = ["objgraph", "psutil"]
|
| 1195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1196 |
[[package]]
|
| 1197 |
name = "h11"
|
| 1198 |
version = "0.14.0"
|
|
|
|
| 1225 |
socks = ["socksio (==1.*)"]
|
| 1226 |
trio = ["trio (>=0.22.0,<1.0)"]
|
| 1227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1228 |
[[package]]
|
| 1229 |
name = "httpx"
|
| 1230 |
version = "0.28.1"
|
|
|
|
| 1343 |
|
| 1344 |
[[package]]
|
| 1345 |
name = "ipython"
|
| 1346 |
+
version = "8.32.0"
|
| 1347 |
description = "IPython: Productive Interactive Computing"
|
| 1348 |
optional = false
|
| 1349 |
python-versions = ">=3.10"
|
| 1350 |
files = [
|
| 1351 |
+
{file = "ipython-8.32.0-py3-none-any.whl", hash = "sha256:cae85b0c61eff1fc48b0a8002de5958b6528fa9c8defb1894da63f42613708aa"},
|
| 1352 |
+
{file = "ipython-8.32.0.tar.gz", hash = "sha256:be2c91895b0b9ea7ba49d33b23e2040c352b33eb6a519cca7ce6e0c743444251"},
|
| 1353 |
]
|
| 1354 |
|
| 1355 |
[package.dependencies]
|
|
|
|
| 1616 |
|
| 1617 |
[[package]]
|
| 1618 |
name = "jupyter-events"
|
| 1619 |
+
version = "0.12.0"
|
| 1620 |
description = "Jupyter Event System library"
|
| 1621 |
optional = false
|
| 1622 |
python-versions = ">=3.9"
|
| 1623 |
files = [
|
| 1624 |
+
{file = "jupyter_events-0.12.0-py3-none-any.whl", hash = "sha256:6464b2fa5ad10451c3d35fabc75eab39556ae1e2853ad0c0cc31b656731a97fb"},
|
| 1625 |
+
{file = "jupyter_events-0.12.0.tar.gz", hash = "sha256:fc3fce98865f6784c9cd0a56a20644fc6098f21c8c33834a8d9fe383c17e554b"},
|
| 1626 |
]
|
| 1627 |
|
| 1628 |
[package.dependencies]
|
| 1629 |
jsonschema = {version = ">=4.18.0", extras = ["format-nongpl"]}
|
| 1630 |
+
packaging = "*"
|
| 1631 |
python-json-logger = ">=2.0.4"
|
| 1632 |
pyyaml = ">=5.3"
|
| 1633 |
referencing = "*"
|
|
|
|
| 2267 |
|
| 2268 |
[[package]]
|
| 2269 |
name = "narwhals"
|
| 2270 |
+
version = "1.25.2"
|
| 2271 |
description = "Extremely lightweight compatibility layer between dataframe libraries"
|
| 2272 |
optional = false
|
| 2273 |
python-versions = ">=3.8"
|
| 2274 |
files = [
|
| 2275 |
+
{file = "narwhals-1.25.2-py3-none-any.whl", hash = "sha256:e645f7fc1f8c0a3563a6cdcd0191586cdf88470ad90f0818abba7ceb6c181b00"},
|
| 2276 |
+
{file = "narwhals-1.25.2.tar.gz", hash = "sha256:37594746fc06fe4a588967a34a2974b1f3a7ad6ff1571b6e31ac5e58c9591000"},
|
| 2277 |
]
|
| 2278 |
|
| 2279 |
[package.extras]
|
|
|
|
| 2975 |
|
| 2976 |
[[package]]
|
| 2977 |
name = "playwright"
|
| 2978 |
+
version = "1.50.0"
|
| 2979 |
description = "A high-level API to automate web browsers"
|
| 2980 |
optional = false
|
| 2981 |
python-versions = ">=3.9"
|
| 2982 |
files = [
|
| 2983 |
+
{file = "playwright-1.50.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:f36d754a6c5bd9bf7f14e8f57a2aea6fd08f39ca4c8476481b9c83e299531148"},
|
| 2984 |
+
{file = "playwright-1.50.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:40f274384591dfd27f2b014596250b2250c843ed1f7f4ef5d2960ecb91b4961e"},
|
| 2985 |
+
{file = "playwright-1.50.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:9922ef9bcd316995f01e220acffd2d37a463b4ad10fd73e388add03841dfa230"},
|
| 2986 |
+
{file = "playwright-1.50.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:8fc628c492d12b13d1f347137b2ac6c04f98197ff0985ef0403a9a9ee0d39131"},
|
| 2987 |
+
{file = "playwright-1.50.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcff35f72db2689a79007aee78f1b0621a22e6e3d6c1f58aaa9ac805bf4497c"},
|
| 2988 |
+
{file = "playwright-1.50.0-py3-none-win32.whl", hash = "sha256:3b906f4d351260016a8c5cc1e003bb341651ae682f62213b50168ed581c7558a"},
|
| 2989 |
+
{file = "playwright-1.50.0-py3-none-win_amd64.whl", hash = "sha256:1859423da82de631704d5e3d88602d755462b0906824c1debe140979397d2e8d"},
|
| 2990 |
]
|
| 2991 |
|
| 2992 |
[package.dependencies]
|
| 2993 |
+
greenlet = ">=3.1.1,<4.0.0"
|
| 2994 |
+
pyee = ">=12,<13"
|
| 2995 |
|
| 2996 |
[[package]]
|
| 2997 |
name = "pluggy"
|
|
|
|
| 3127 |
{file = "propcache-0.2.1.tar.gz", hash = "sha256:3f77ce728b19cb537714499928fe800c3dda29e8d9428778fc7c186da4c09a64"},
|
| 3128 |
]
|
| 3129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3130 |
[[package]]
|
| 3131 |
name = "protobuf"
|
| 3132 |
version = "5.29.3"
|
|
|
|
| 3465 |
|
| 3466 |
[[package]]
|
| 3467 |
name = "pyee"
|
| 3468 |
+
version = "12.1.1"
|
| 3469 |
description = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own"
|
| 3470 |
optional = false
|
| 3471 |
python-versions = ">=3.8"
|
| 3472 |
files = [
|
| 3473 |
+
{file = "pyee-12.1.1-py3-none-any.whl", hash = "sha256:18a19c650556bb6b32b406d7f017c8f513aceed1ef7ca618fb65de7bd2d347ef"},
|
| 3474 |
+
{file = "pyee-12.1.1.tar.gz", hash = "sha256:bbc33c09e2ff827f74191e3e5bbc6be7da02f627b7ec30d86f5ce1a6fb2424a3"},
|
| 3475 |
]
|
| 3476 |
|
| 3477 |
[package.dependencies]
|
|
|
|
| 3494 |
[package.extras]
|
| 3495 |
windows-terminal = ["colorama (>=0.4.6)"]
|
| 3496 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3497 |
[[package]]
|
| 3498 |
name = "pypdfium2"
|
| 3499 |
version = "4.30.0"
|
|
|
|
| 3648 |
|
| 3649 |
[[package]]
|
| 3650 |
name = "pywinpty"
|
| 3651 |
+
version = "2.0.15"
|
| 3652 |
description = "Pseudo terminal support for Windows from Python."
|
| 3653 |
optional = false
|
| 3654 |
+
python-versions = ">=3.9"
|
| 3655 |
files = [
|
| 3656 |
+
{file = "pywinpty-2.0.15-cp310-cp310-win_amd64.whl", hash = "sha256:8e7f5de756a615a38b96cd86fa3cd65f901ce54ce147a3179c45907fa11b4c4e"},
|
| 3657 |
+
{file = "pywinpty-2.0.15-cp311-cp311-win_amd64.whl", hash = "sha256:9a6bcec2df2707aaa9d08b86071970ee32c5026e10bcc3cc5f6f391d85baf7ca"},
|
| 3658 |
+
{file = "pywinpty-2.0.15-cp312-cp312-win_amd64.whl", hash = "sha256:83a8f20b430bbc5d8957249f875341a60219a4e971580f2ba694fbfb54a45ebc"},
|
| 3659 |
+
{file = "pywinpty-2.0.15-cp313-cp313-win_amd64.whl", hash = "sha256:ab5920877dd632c124b4ed17bc6dd6ef3b9f86cd492b963ffdb1a67b85b0f408"},
|
| 3660 |
+
{file = "pywinpty-2.0.15-cp313-cp313t-win_amd64.whl", hash = "sha256:a4560ad8c01e537708d2790dbe7da7d986791de805d89dd0d3697ca59e9e4901"},
|
| 3661 |
+
{file = "pywinpty-2.0.15-cp39-cp39-win_amd64.whl", hash = "sha256:d261cd88fcd358cfb48a7ca0700db3e1c088c9c10403c9ebc0d8a8b57aa6a117"},
|
| 3662 |
+
{file = "pywinpty-2.0.15.tar.gz", hash = "sha256:312cf39153a8736c617d45ce8b6ad6cd2107de121df91c455b10ce6bba7a39b2"},
|
| 3663 |
]
|
| 3664 |
|
| 3665 |
[[package]]
|
|
|
|
| 4521 |
|
| 4522 |
[[package]]
|
| 4523 |
name = "streamlit"
|
| 4524 |
+
version = "1.42.0"
|
| 4525 |
description = "A faster way to build and share data apps"
|
| 4526 |
optional = false
|
| 4527 |
python-versions = "!=3.9.7,>=3.9"
|
| 4528 |
files = [
|
| 4529 |
+
{file = "streamlit-1.42.0-py2.py3-none-any.whl", hash = "sha256:edf333fd3525b7c64b19e1156b483a1a93cbdb09a3a06f26478388d68f971090"},
|
| 4530 |
+
{file = "streamlit-1.42.0.tar.gz", hash = "sha256:8c48494ccfad33e7d0bc5873151800b203cb71203bfd42bc7418940710ca4970"},
|
| 4531 |
]
|
| 4532 |
|
| 4533 |
[package.dependencies]
|
|
|
|
| 4548 |
tenacity = ">=8.1.0,<10"
|
| 4549 |
toml = ">=0.10.1,<2"
|
| 4550 |
tornado = ">=6.0.3,<7"
|
| 4551 |
+
typing-extensions = ">=4.4.0,<5"
|
| 4552 |
watchdog = {version = ">=2.1.5,<7", markers = "platform_system != \"Darwin\""}
|
| 4553 |
|
| 4554 |
[package.extras]
|
| 4555 |
+
snowflake = ["snowflake-connector-python (>=3.3.0)", "snowflake-snowpark-python[modin] (>=1.17.0)"]
|
| 4556 |
|
| 4557 |
[[package]]
|
| 4558 |
name = "surya-ocr"
|
|
|
|
| 5004 |
[package.extras]
|
| 5005 |
dev = ["flake8", "flake8-annotations", "flake8-bandit", "flake8-bugbear", "flake8-commas", "flake8-comprehensions", "flake8-continuation", "flake8-datetimez", "flake8-docstrings", "flake8-import-order", "flake8-literal", "flake8-modern-annotations", "flake8-noqa", "flake8-pyproject", "flake8-requirements", "flake8-typechecking-import", "flake8-use-fstring", "mypy", "pep8-naming", "types-PyYAML"]
|
| 5006 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5007 |
[[package]]
|
| 5008 |
name = "urllib3"
|
| 5009 |
version = "2.3.0"
|
|
|
|
| 5131 |
optional = ["python-socks", "wsaccel"]
|
| 5132 |
test = ["websockets"]
|
| 5133 |
|
| 5134 |
+
[[package]]
|
| 5135 |
+
name = "websockets"
|
| 5136 |
+
version = "14.2"
|
| 5137 |
+
description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
|
| 5138 |
+
optional = false
|
| 5139 |
+
python-versions = ">=3.9"
|
| 5140 |
+
files = [
|
| 5141 |
+
{file = "websockets-14.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e8179f95323b9ab1c11723e5d91a89403903f7b001828161b480a7810b334885"},
|
| 5142 |
+
{file = "websockets-14.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0d8c3e2cdb38f31d8bd7d9d28908005f6fa9def3324edb9bf336d7e4266fd397"},
|
| 5143 |
+
{file = "websockets-14.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:714a9b682deb4339d39ffa674f7b674230227d981a37d5d174a4a83e3978a610"},
|
| 5144 |
+
{file = "websockets-14.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2e53c72052f2596fb792a7acd9704cbc549bf70fcde8a99e899311455974ca3"},
|
| 5145 |
+
{file = "websockets-14.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fbd68850c837e57373d95c8fe352203a512b6e49eaae4c2f4088ef8cf21980"},
|
| 5146 |
+
{file = "websockets-14.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b27ece32f63150c268593d5fdb82819584831a83a3f5809b7521df0685cd5d8"},
|
| 5147 |
+
{file = "websockets-14.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4daa0faea5424d8713142b33825fff03c736f781690d90652d2c8b053345b0e7"},
|
| 5148 |
+
{file = "websockets-14.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:bc63cee8596a6ec84d9753fd0fcfa0452ee12f317afe4beae6b157f0070c6c7f"},
|
| 5149 |
+
{file = "websockets-14.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a570862c325af2111343cc9b0257b7119b904823c675b22d4ac547163088d0d"},
|
| 5150 |
+
{file = "websockets-14.2-cp310-cp310-win32.whl", hash = "sha256:75862126b3d2d505e895893e3deac0a9339ce750bd27b4ba515f008b5acf832d"},
|
| 5151 |
+
{file = "websockets-14.2-cp310-cp310-win_amd64.whl", hash = "sha256:cc45afb9c9b2dc0852d5c8b5321759cf825f82a31bfaf506b65bf4668c96f8b2"},
|
| 5152 |
+
{file = "websockets-14.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3bdc8c692c866ce5fefcaf07d2b55c91d6922ac397e031ef9b774e5b9ea42166"},
|
| 5153 |
+
{file = "websockets-14.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c93215fac5dadc63e51bcc6dceca72e72267c11def401d6668622b47675b097f"},
|
| 5154 |
+
{file = "websockets-14.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1c9b6535c0e2cf8a6bf938064fb754aaceb1e6a4a51a80d884cd5db569886910"},
|
| 5155 |
+
{file = "websockets-14.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a52a6d7cf6938e04e9dceb949d35fbdf58ac14deea26e685ab6368e73744e4c"},
|
| 5156 |
+
{file = "websockets-14.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9f05702e93203a6ff5226e21d9b40c037761b2cfb637187c9802c10f58e40473"},
|
| 5157 |
+
{file = "websockets-14.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22441c81a6748a53bfcb98951d58d1af0661ab47a536af08920d129b4d1c3473"},
|
| 5158 |
+
{file = "websockets-14.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd9b868d78b194790e6236d9cbc46d68aba4b75b22497eb4ab64fa640c3af56"},
|
| 5159 |
+
{file = "websockets-14.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1a5a20d5843886d34ff8c57424cc65a1deda4375729cbca4cb6b3353f3ce4142"},
|
| 5160 |
+
{file = "websockets-14.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:34277a29f5303d54ec6468fb525d99c99938607bc96b8d72d675dee2b9f5bf1d"},
|
| 5161 |
+
{file = "websockets-14.2-cp311-cp311-win32.whl", hash = "sha256:02687db35dbc7d25fd541a602b5f8e451a238ffa033030b172ff86a93cb5dc2a"},
|
| 5162 |
+
{file = "websockets-14.2-cp311-cp311-win_amd64.whl", hash = "sha256:862e9967b46c07d4dcd2532e9e8e3c2825e004ffbf91a5ef9dde519ee2effb0b"},
|
| 5163 |
+
{file = "websockets-14.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1f20522e624d7ffbdbe259c6b6a65d73c895045f76a93719aa10cd93b3de100c"},
|
| 5164 |
+
{file = "websockets-14.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:647b573f7d3ada919fd60e64d533409a79dcf1ea21daeb4542d1d996519ca967"},
|
| 5165 |
+
{file = "websockets-14.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6af99a38e49f66be5a64b1e890208ad026cda49355661549c507152113049990"},
|
| 5166 |
+
{file = "websockets-14.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:091ab63dfc8cea748cc22c1db2814eadb77ccbf82829bac6b2fbe3401d548eda"},
|
| 5167 |
+
{file = "websockets-14.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b374e8953ad477d17e4851cdc66d83fdc2db88d9e73abf755c94510ebddceb95"},
|
| 5168 |
+
{file = "websockets-14.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a39d7eceeea35db85b85e1169011bb4321c32e673920ae9c1b6e0978590012a3"},
|
| 5169 |
+
{file = "websockets-14.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0a6f3efd47ffd0d12080594f434faf1cd2549b31e54870b8470b28cc1d3817d9"},
|
| 5170 |
+
{file = "websockets-14.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:065ce275e7c4ffb42cb738dd6b20726ac26ac9ad0a2a48e33ca632351a737267"},
|
| 5171 |
+
{file = "websockets-14.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e9d0e53530ba7b8b5e389c02282f9d2aa47581514bd6049d3a7cffe1385cf5fe"},
|
| 5172 |
+
{file = "websockets-14.2-cp312-cp312-win32.whl", hash = "sha256:20e6dd0984d7ca3037afcb4494e48c74ffb51e8013cac71cf607fffe11df7205"},
|
| 5173 |
+
{file = "websockets-14.2-cp312-cp312-win_amd64.whl", hash = "sha256:44bba1a956c2c9d268bdcdf234d5e5ff4c9b6dc3e300545cbe99af59dda9dcce"},
|
| 5174 |
+
{file = "websockets-14.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6f1372e511c7409a542291bce92d6c83320e02c9cf392223272287ce55bc224e"},
|
| 5175 |
+
{file = "websockets-14.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4da98b72009836179bb596a92297b1a61bb5a830c0e483a7d0766d45070a08ad"},
|
| 5176 |
+
{file = "websockets-14.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8a86a269759026d2bde227652b87be79f8a734e582debf64c9d302faa1e9f03"},
|
| 5177 |
+
{file = "websockets-14.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86cf1aaeca909bf6815ea714d5c5736c8d6dd3a13770e885aafe062ecbd04f1f"},
|
| 5178 |
+
{file = "websockets-14.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9b0f6c3ba3b1240f602ebb3971d45b02cc12bd1845466dd783496b3b05783a5"},
|
| 5179 |
+
{file = "websockets-14.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:669c3e101c246aa85bc8534e495952e2ca208bd87994650b90a23d745902db9a"},
|
| 5180 |
+
{file = "websockets-14.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eabdb28b972f3729348e632ab08f2a7b616c7e53d5414c12108c29972e655b20"},
|
| 5181 |
+
{file = "websockets-14.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2066dc4cbcc19f32c12a5a0e8cc1b7ac734e5b64ac0a325ff8353451c4b15ef2"},
|
| 5182 |
+
{file = "websockets-14.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ab95d357cd471df61873dadf66dd05dd4709cae001dd6342edafc8dc6382f307"},
|
| 5183 |
+
{file = "websockets-14.2-cp313-cp313-win32.whl", hash = "sha256:a9e72fb63e5f3feacdcf5b4ff53199ec8c18d66e325c34ee4c551ca748623bbc"},
|
| 5184 |
+
{file = "websockets-14.2-cp313-cp313-win_amd64.whl", hash = "sha256:b439ea828c4ba99bb3176dc8d9b933392a2413c0f6b149fdcba48393f573377f"},
|
| 5185 |
+
{file = "websockets-14.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7cd5706caec1686c5d233bc76243ff64b1c0dc445339bd538f30547e787c11fe"},
|
| 5186 |
+
{file = "websockets-14.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ec607328ce95a2f12b595f7ae4c5d71bf502212bddcea528290b35c286932b12"},
|
| 5187 |
+
{file = "websockets-14.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da85651270c6bfb630136423037dd4975199e5d4114cae6d3066641adcc9d1c7"},
|
| 5188 |
+
{file = "websockets-14.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ecadc7ce90accf39903815697917643f5b7cfb73c96702318a096c00aa71f5"},
|
| 5189 |
+
{file = "websockets-14.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1979bee04af6a78608024bad6dfcc0cc930ce819f9e10342a29a05b5320355d0"},
|
| 5190 |
+
{file = "websockets-14.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dddacad58e2614a24938a50b85969d56f88e620e3f897b7d80ac0d8a5800258"},
|
| 5191 |
+
{file = "websockets-14.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:89a71173caaf75fa71a09a5f614f450ba3ec84ad9fca47cb2422a860676716f0"},
|
| 5192 |
+
{file = "websockets-14.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6af6a4b26eea4fc06c6818a6b962a952441e0e39548b44773502761ded8cc1d4"},
|
| 5193 |
+
{file = "websockets-14.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:80c8efa38957f20bba0117b48737993643204645e9ec45512579132508477cfc"},
|
| 5194 |
+
{file = "websockets-14.2-cp39-cp39-win32.whl", hash = "sha256:2e20c5f517e2163d76e2729104abc42639c41cf91f7b1839295be43302713661"},
|
| 5195 |
+
{file = "websockets-14.2-cp39-cp39-win_amd64.whl", hash = "sha256:b4c8cef610e8d7c70dea92e62b6814a8cd24fbd01d7103cc89308d2bfe1659ef"},
|
| 5196 |
+
{file = "websockets-14.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d7d9cafbccba46e768be8a8ad4635fa3eae1ffac4c6e7cb4eb276ba41297ed29"},
|
| 5197 |
+
{file = "websockets-14.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:c76193c1c044bd1e9b3316dcc34b174bbf9664598791e6fb606d8d29000e070c"},
|
| 5198 |
+
{file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd475a974d5352390baf865309fe37dec6831aafc3014ffac1eea99e84e83fc2"},
|
| 5199 |
+
{file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c6c0097a41968b2e2b54ed3424739aab0b762ca92af2379f152c1aef0187e1c"},
|
| 5200 |
+
{file = "websockets-14.2-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d7ff794c8b36bc402f2e07c0b2ceb4a2424147ed4785ff03e2a7af03711d60a"},
|
| 5201 |
+
{file = "websockets-14.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dec254fcabc7bd488dab64846f588fc5b6fe0d78f641180030f8ea27b76d72c3"},
|
| 5202 |
+
{file = "websockets-14.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:bbe03eb853e17fd5b15448328b4ec7fb2407d45fb0245036d06a3af251f8e48f"},
|
| 5203 |
+
{file = "websockets-14.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:a3c4aa3428b904d5404a0ed85f3644d37e2cb25996b7f096d77caeb0e96a3b42"},
|
| 5204 |
+
{file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:577a4cebf1ceaf0b65ffc42c54856214165fb8ceeba3935852fc33f6b0c55e7f"},
|
| 5205 |
+
{file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad1c1d02357b7665e700eca43a31d52814ad9ad9b89b58118bdabc365454b574"},
|
| 5206 |
+
{file = "websockets-14.2-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f390024a47d904613577df83ba700bd189eedc09c57af0a904e5c39624621270"},
|
| 5207 |
+
{file = "websockets-14.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3c1426c021c38cf92b453cdf371228d3430acd775edee6bac5a4d577efc72365"},
|
| 5208 |
+
{file = "websockets-14.2-py3-none-any.whl", hash = "sha256:7a6ceec4ea84469f15cf15807a747e9efe57e369c384fa86e022b3bea679b79b"},
|
| 5209 |
+
{file = "websockets-14.2.tar.gz", hash = "sha256:5059ed9c54945efb321f097084b4c7e52c246f2c869815876a69d1efc4ad6eb5"},
|
| 5210 |
+
]
|
| 5211 |
+
|
| 5212 |
[[package]]
|
| 5213 |
name = "widgetsnbextension"
|
| 5214 |
version = "4.0.13"
|
|
|
|
| 5451 |
[metadata]
|
| 5452 |
lock-version = "2.0"
|
| 5453 |
python-versions = "^3.10"
|
| 5454 |
+
content-hash = "0ab5205db01e1abea947536074593b29b16347a16ca5e9489c024a2c3a05df8f"
|
pyproject.toml
CHANGED
|
@@ -31,10 +31,10 @@ regex = "^2024.4.28"
|
|
| 31 |
pdftext = "~0.5.1"
|
| 32 |
markdownify = "^0.13.1"
|
| 33 |
click = "^8.1.7"
|
| 34 |
-
google-generativeai = "^0.8.3"
|
| 35 |
markdown2 = "^2.5.2"
|
| 36 |
filetype = "^1.2.0"
|
| 37 |
scikit-learn = "^1.6.1"
|
|
|
|
| 38 |
|
| 39 |
[tool.poetry.group.dev.dependencies]
|
| 40 |
jupyter = "^1.0.0"
|
|
|
|
| 31 |
pdftext = "~0.5.1"
|
| 32 |
markdownify = "^0.13.1"
|
| 33 |
click = "^8.1.7"
|
|
|
|
| 34 |
markdown2 = "^2.5.2"
|
| 35 |
filetype = "^1.2.0"
|
| 36 |
scikit-learn = "^1.6.1"
|
| 37 |
+
google-genai = "^1.0.0"
|
| 38 |
|
| 39 |
[tool.poetry.group.dev.dependencies]
|
| 40 |
jupyter = "^1.0.0"
|
tests/processors/test_llm_processors.py
CHANGED
|
@@ -6,7 +6,6 @@ from marker.processors.llm.llm_complex import LLMComplexRegionProcessor
|
|
| 6 |
from marker.processors.llm.llm_form import LLMFormProcessor
|
| 7 |
from marker.processors.llm.llm_image_description import LLMImageDescriptionProcessor
|
| 8 |
from marker.processors.llm.llm_table import LLMTableProcessor
|
| 9 |
-
from marker.processors.llm.llm_text import LLMTextProcessor
|
| 10 |
from marker.processors.table import TableProcessor
|
| 11 |
from marker.renderers.markdown import MarkdownRenderer
|
| 12 |
from marker.schema import BlockTypes
|
|
@@ -97,25 +96,6 @@ def test_llm_table_processor(pdf_document, detection_model, table_rec_model, rec
|
|
| 97 |
assert "Value 1 $x$" in markdown
|
| 98 |
|
| 99 |
|
| 100 |
-
@pytest.mark.filename("adversarial.pdf")
|
| 101 |
-
@pytest.mark.config({"page_range": [0]})
|
| 102 |
-
def test_llm_text_processor(pdf_document, mocker):
|
| 103 |
-
inline_math_block = pdf_document.contained_blocks((BlockTypes.TextInlineMath,))[0]
|
| 104 |
-
text_lines = inline_math_block.contained_blocks(pdf_document, (BlockTypes.Line,))
|
| 105 |
-
corrected_lines = ["<i>Text</i>"] * len(text_lines)
|
| 106 |
-
|
| 107 |
-
mock_cls = Mock()
|
| 108 |
-
mock_cls.return_value.generate_response.return_value = {"corrected_lines": corrected_lines}
|
| 109 |
-
mocker.patch("marker.processors.llm.GoogleModel", mock_cls)
|
| 110 |
-
|
| 111 |
-
processor = LLMTextProcessor({"use_llm": True, "google_api_key": "test"})
|
| 112 |
-
processor(pdf_document)
|
| 113 |
-
|
| 114 |
-
contained_spans = text_lines[0].contained_blocks(pdf_document, (BlockTypes.Span,))
|
| 115 |
-
assert contained_spans[0].text == "Text\n" # Newline inserted at end of line
|
| 116 |
-
assert contained_spans[0].formats == ["italic"]
|
| 117 |
-
|
| 118 |
-
|
| 119 |
@pytest.mark.filename("A17_FlightPlan.pdf")
|
| 120 |
@pytest.mark.config({"page_range": [0]})
|
| 121 |
def test_llm_caption_processor_disabled(pdf_document):
|
|
|
|
| 6 |
from marker.processors.llm.llm_form import LLMFormProcessor
|
| 7 |
from marker.processors.llm.llm_image_description import LLMImageDescriptionProcessor
|
| 8 |
from marker.processors.llm.llm_table import LLMTableProcessor
|
|
|
|
| 9 |
from marker.processors.table import TableProcessor
|
| 10 |
from marker.renderers.markdown import MarkdownRenderer
|
| 11 |
from marker.schema import BlockTypes
|
|
|
|
| 96 |
assert "Value 1 $x$" in markdown
|
| 97 |
|
| 98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
@pytest.mark.filename("A17_FlightPlan.pdf")
|
| 100 |
@pytest.mark.config({"page_range": [0]})
|
| 101 |
def test_llm_caption_processor_disabled(pdf_document):
|