Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| import os | |
| import json | |
| import base64 | |
| import requests | |
| import gradio as gr | |
| from PIL import Image | |
| from io import BytesIO | |
| import pypdfium2 as pdfium | |
| from pathlib import Path | |
| ENDPOINT = os.environ.get("VLLM_ENDPOINT") | |
| MODEL = os.environ.get("VLLM_MODEL") | |
| if not ENDPOINT or not MODEL: | |
| raise ValueError("VLLM_ENDPOINT and VLLM_MODEL environment variables must be set.") | |
| def image_to_base64(image): | |
| buffered = BytesIO() | |
| if image.mode == 'RGBA': | |
| image = image.convert('RGB') | |
| image.save(buffered, format="PNG") | |
| return base64.b64encode(buffered.getvalue()).decode("utf-8") | |
| def render_pdf_page(page, max_resolution=1280, scale=2.77): | |
| width, height = page.get_size() | |
| pixel_width = width * scale | |
| pixel_height = height * scale | |
| resize_factor = min(max_resolution / pixel_width, max_resolution / pixel_height) | |
| target_scale = scale * resize_factor | |
| return page.render(scale=target_scale, rev_byteorder=True).to_pil() | |
| def process_pdf(pdf_path, max_pages=5): | |
| pdf = pdfium.PdfDocument(pdf_path) | |
| total_pages = len(pdf) | |
| num_pages = min(total_pages, max_pages) | |
| images = [] | |
| for i in range(num_pages): | |
| page = pdf[i] | |
| img = render_pdf_page(page) | |
| images.append(img) | |
| pdf.close() | |
| return images, total_pages | |
| def process_single_page(pdf_path, page_number): | |
| pdf = pdfium.PdfDocument(pdf_path) | |
| total_pages = len(pdf) | |
| if page_number < 1 or page_number > total_pages: | |
| pdf.close() | |
| return None, total_pages | |
| page = pdf[page_number - 1] | |
| img = render_pdf_page(page) | |
| pdf.close() | |
| return img, total_pages | |
| def process_input(file_input, temperature, page_number): | |
| if file_input is None: | |
| yield "Please upload an image or PDF first.", "", "" | |
| return | |
| images_to_process = [] | |
| page_info = "" | |
| file_path = file_input if isinstance(file_input, str) else file_input.name | |
| if file_path.lower().endswith('.pdf'): | |
| try: | |
| if page_number > 0: | |
| img, total_pages = process_single_page(file_path, page_number) | |
| if img is None: | |
| yield f"Error: Page {page_number} does not exist. PDF has {total_pages} pages.", "", "" | |
| return | |
| images_to_process = [img] | |
| page_info = f"Processing page {page_number} of {total_pages}" | |
| else: | |
| images_to_process, total_pages = process_pdf(file_path, max_pages=5) | |
| if len(images_to_process) == 0: | |
| yield "Error: Could not extract pages from PDF.", "", "" | |
| return | |
| page_info = f"Processing first {len(images_to_process)} pages of {total_pages}" | |
| except Exception as e: | |
| yield f"Error processing PDF: {str(e)}", "", "" | |
| return | |
| else: | |
| try: | |
| img = Image.open(file_path) | |
| images_to_process = [img] | |
| except Exception as e: | |
| yield f"Error opening image: {str(e)}", "", "" | |
| return | |
| for img in images_to_process: | |
| if not isinstance(img, Image.Image): | |
| yield "Error: Invalid image format.", "", "" | |
| return | |
| content = [{"type": "text", "text": ""}] | |
| for img in images_to_process: | |
| try: | |
| b64_image = image_to_base64(img) | |
| content.append({ | |
| "type": "image_url", | |
| "image_url": {"url": f"data:image/png;base64,{b64_image}"} | |
| }) | |
| except Exception as e: | |
| yield f"Error encoding image: {str(e)}", "", "" | |
| return | |
| payload = { | |
| "model": MODEL, | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": content | |
| } | |
| ], | |
| "temperature": temperature, | |
| "stream": True | |
| } | |
| try: | |
| response = requests.post( | |
| ENDPOINT, | |
| headers={"Content-Type": "application/json"}, | |
| data=json.dumps(payload), | |
| stream=True | |
| ) | |
| response.raise_for_status() | |
| accumulated_response = "" | |
| for line in response.iter_lines(): | |
| if line: | |
| line = line.decode('utf-8') | |
| if line.startswith('data: '): | |
| line = line[6:] | |
| if line.strip() == '[DONE]': | |
| break | |
| try: | |
| chunk = json.loads(line) | |
| if 'choices' in chunk and len(chunk['choices']) > 0: | |
| delta = chunk['choices'][0].get('delta', {}) | |
| content_delta = delta.get('content', '') | |
| if content_delta: | |
| accumulated_response += content_delta | |
| yield accumulated_response, accumulated_response, page_info | |
| except json.JSONDecodeError: | |
| continue | |
| except Exception as e: | |
| error_msg = f"Error: {str(e)}" | |
| yield error_msg, error_msg, page_info | |
| with gr.Blocks(title="π Image/PDF OCR", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # π Image/PDF to Text Extraction | |
| **π‘ How to use:** | |
| 1. Upload an image OR a PDF (max 5 pages) | |
| 2. Adjust temperature if needed | |
| 3. Click "Extract Text" to process | |
| The model will extract and format text from your document. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| file_input = gr.File( | |
| label="πΌοΈ Upload Image or PDF", | |
| file_types=[".pdf", ".png", ".jpg", ".jpeg"], | |
| type="filepath" | |
| ) | |
| page_number = gr.Number( | |
| label="PDF: Page Number (0 = first 5 pages)", | |
| value=0, | |
| minimum=0, | |
| step=1, | |
| precision=0 | |
| ) | |
| page_info = gr.Textbox( | |
| label="Page Info", | |
| value="", | |
| interactive=False | |
| ) | |
| gr.Markdown("*Upload an image (PNG/JPG) or PDF. For PDF: 0 = first 5 pages, or specify page number*") | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.2, | |
| step=0.05, | |
| label="Temperature" | |
| ) | |
| submit_btn = gr.Button("Extract Text", variant="primary") | |
| clear_btn = gr.Button("Clear", variant="secondary") | |
| with gr.Column(scale=2): | |
| output_text = gr.Markdown( | |
| label="π Extracted Text (Rendered)", | |
| value="<div style='min-height: 600px; padding: 10px; border: 1px solid #e0e0e0; border-radius: 4px; background-color: #f9f9f9;'><em>Extracted text will appear here...</em></div>", | |
| height=600 | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| raw_output = gr.Textbox( | |
| label="Raw Markdown Output", | |
| placeholder="Raw text will appear here...", | |
| lines=20, | |
| max_lines=30, | |
| show_copy_button=True | |
| ) | |
| submit_btn.click( | |
| fn=process_input, | |
| inputs=[file_input, temperature, page_number], | |
| outputs=[output_text, raw_output, page_info] | |
| ) | |
| clear_btn.click( | |
| fn=lambda: (None, "", "", 0, ""), | |
| outputs=[file_input, output_text, raw_output, page_number, page_info] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |