mistral_pdf_ocr / app.py
mimoha's picture
Update app.py
6f4a157 verified
import gradio as gr
import json
from pathlib import Path
from mistralai import Mistral, DocumentURLChunk
from mistralai.models import OCRResponse
client = Mistral(api_key="RJIqm5OvwoMvLeWrFdv5JBx26tLsSSK7")
def replace_images_in_markdown(markdown_str: str, images_dict: dict) -> str:
for img_name, base64_str in images_dict.items():
markdown_str = markdown_str.replace(
f"![{img_name}]({img_name})", f"![{img_name}](data:image/png;base64,{base64_str})"
)
return markdown_str
def get_combined_markdown(ocr_response: OCRResponse) -> str:
markdowns = []
for page in ocr_response.pages:
image_data = {}
for img in page.images:
image_data[img.id] = img.image_base64
markdowns.append(replace_images_in_markdown(page.markdown, image_data))
return "\n\n".join(markdowns)
def process_pdf(pdf_path):
with open(pdf_path, "rb") as f:
uploaded_file = client.files.upload(
file={
"file_name": Path(pdf_path).stem,
"content": f.read()
},
purpose="ocr"
)
signed_url = client.files.get_signed_url(file_id=uploaded_file.id, expiry=1)
pdf_response = client.ocr.process(
document=DocumentURLChunk(document_url=signed_url.url),
model="mistral-ocr-latest",
include_image_base64=True
)
markdown = get_combined_markdown(pdf_response)
return markdown
gr.Interface(
fn=process_pdf,
inputs=gr.File(type="filepath", label="ارفع ملف PDF"),
outputs=gr.Markdown(label="الناتج"),
).launch(share=True, show_error=True)