Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from typing import Dict | |
| import os | |
| import base64 | |
| from magic_pdf.user_api import parse_union_pdf | |
| from magic_pdf.rw import BaseReaderWriter | |
| from loguru import logger | |
| from io import BytesIO | |
| class InMemoryReaderWriter(BaseReaderWriter): | |
| """In-memory implementation of ReaderWriter that stores files in memory""" | |
| def __init__(self): | |
| self.storage = {} # Dictionary to store files in memory | |
| def write(self, content, path, content_type=None): | |
| """Write content to in-memory storage""" | |
| self.storage[path] = content | |
| return f"memory://{path}" | |
| def read(self, path): | |
| """Read content from in-memory storage""" | |
| if path.startswith("memory://"): | |
| path = path[9:] # Remove "memory://" prefix | |
| if path in self.storage: | |
| return self.storage[path] | |
| else: | |
| raise FileNotFoundError(f"File not found in memory storage: {path}") | |
| def get_storage(): | |
| """Initialize in-memory storage""" | |
| return InMemoryReaderWriter() | |
| def inference(inputs: Dict): | |
| """ | |
| Serverless API entry point | |
| """ | |
| try: | |
| # Validate input | |
| if "pdf_bytes" not in inputs: | |
| return {"status": "error", "message": "No PDF data provided"} | |
| # Base64 decode PDF content | |
| try: | |
| pdf_bytes = base64.b64decode(inputs["pdf_bytes"]) | |
| except Exception as e: | |
| return {"status": "error", "message": f"Invalid PDF data: {str(e)}"} | |
| # Initialize in-memory writer | |
| image_writer = get_storage() | |
| # Prepare parameters | |
| kwargs = { | |
| "lang": inputs.get("lang", "zh"), | |
| "layout_model": inputs.get("layout_model", True), | |
| "formula_enable": inputs.get("formula_enable", True), | |
| "table_enable": inputs.get("table_enable", True), | |
| "input_model_is_empty": True | |
| } | |
| # Process using parse_union_pdf | |
| result = parse_union_pdf( | |
| pdf_bytes=pdf_bytes, | |
| pdf_models=[], # Use built-in models | |
| imageWriter=image_writer, | |
| **kwargs | |
| ) | |
| return { | |
| "status": "success", | |
| "data": result | |
| } | |
| except Exception as e: | |
| logger.exception("Error processing PDF") | |
| return { | |
| "status": "error", | |
| "message": str(e) | |
| } | |
| # Create Gradio interface | |
| def process_pdf_ui(pdf_file, lang="zh", layout_model=True, formula_enable=True, table_enable=True): | |
| if pdf_file is None: | |
| return {"status": "error", "message": "No PDF file provided"} | |
| # Convert uploaded file to base64 | |
| pdf_bytes = pdf_file | |
| encoded_pdf = base64.b64encode(pdf_bytes).decode('utf-8') | |
| # Call the inference function | |
| result = inference({ | |
| "pdf_bytes": encoded_pdf, | |
| "lang": lang, | |
| "layout_model": layout_model, | |
| "formula_enable": formula_enable, | |
| "table_enable": table_enable | |
| }) | |
| return result | |
| # Create Gradio interface with API | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# PDF Processing API") | |
| with gr.Tab("UI Demo"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| pdf_input = gr.File(label="Upload PDF") | |
| lang = gr.Dropdown(["zh", "en"], label="Language", value="zh") | |
| layout_model = gr.Checkbox(label="Use Layout Model", value=True) | |
| formula_enable = gr.Checkbox(label="Enable Formula Detection", value=True) | |
| table_enable = gr.Checkbox(label="Enable Table Detection", value=True) | |
| submit_btn = gr.Button("Process PDF") | |
| with gr.Column(): | |
| output = gr.JSON(label="Result") | |
| submit_btn.click( | |
| fn=process_pdf_ui, | |
| inputs=[pdf_input, lang, layout_model, formula_enable, table_enable], | |
| outputs=output | |
| ) | |
| with gr.Tab("API Documentation"): | |
| gr.Markdown(""" | |
| ## API Usage | |
| ### Endpoint | |
| `POST https://marcosremar2-apimineru.hf.space/api/predict` | |
| ### Request Format | |
| ```json | |
| { | |
| "pdf_bytes": "base64 encoded PDF content", | |
| "lang": "zh", // Optional, default "zh" | |
| "layout_model": true, // Optional, default true | |
| "formula_enable": true, // Optional, default true | |
| "table_enable": true // Optional, default true | |
| } | |
| ``` | |
| ### Python Example | |
| ```python | |
| from huggingface_hub import InferenceClient | |
| import base64 | |
| def process_pdf(pdf_path: str, hf_token: str): | |
| # Create client | |
| client = InferenceClient( | |
| model="marcosremar2/apimineru", | |
| token=hf_token | |
| ) | |
| # Read and encode PDF | |
| with open(pdf_path, 'rb') as f: | |
| pdf_bytes = base64.b64encode(f.read()).decode() | |
| # Send request | |
| response = client.post(json={ | |
| "pdf_bytes": pdf_bytes, | |
| "lang": "zh", | |
| "layout_model": True, | |
| "formula_enable": True, | |
| "table_enable": True | |
| }) | |
| return response | |
| ``` | |
| """) | |
| # This exposes both the UI and API endpoints | |
| demo.queue().launch() |