#!/usr/bin/env python3 import os import json import base64 import requests import gradio as gr from PIL import Image from io import BytesIO ENDPOINT = os.environ.get("VLLM_ENDPOINT") MODEL = os.environ.get("VLLM_MODEL") if not ENDPOINT or not MODEL: raise ValueError("VLLM_ENDPOINT and VLLM_MODEL environment variables must be set. Please add them as secrets in your Space settings.") def image_to_base64(image): buffered = BytesIO() image.save(buffered, format="PNG") return base64.b64encode(buffered.getvalue()).decode("utf-8") def process_image(image, temperature): if image is None: yield "Please upload an image first.", "" return b64_image = image_to_base64(image) payload = { "model": MODEL, "messages": [ { "role": "user", "content": [ {"type": "text", "text": ""}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}} ] } ], "temperature": temperature, "stream": True } try: response = requests.post( ENDPOINT, headers={"Content-Type": "application/json"}, data=json.dumps(payload), stream=True ) response.raise_for_status() accumulated_response = "" for line in response.iter_lines(): if line: line = line.decode('utf-8') if line.startswith('data: '): line = line[6:] if line.strip() == '[DONE]': break try: chunk = json.loads(line) if 'choices' in chunk and len(chunk['choices']) > 0: delta = chunk['choices'][0].get('delta', {}) content = delta.get('content', '') if content: accumulated_response += content yield accumulated_response, accumulated_response except json.JSONDecodeError: continue except Exception as e: error_msg = f"Error: {str(e)}" yield error_msg, error_msg with gr.Blocks(title="📖 Image OCR", theme=gr.themes.Soft()) as demo: gr.Markdown( """ # 📖 Image to Text Extraction **💡 How to use:** 1. Upload an image using the upload box 2. Adjust temperature if needed 3. Click "Extract Text" to process The model will extract and format text from your image. """ ) with gr.Row(): with gr.Column(): image_input = gr.Image( type="pil", label="🖼️ Upload Image", sources=["upload", "clipboard"], height=600 ) temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.15, step=0.05, label="Temperature" ) submit_btn = gr.Button("Extract Text", variant="primary") clear_btn = gr.Button("Clear", variant="secondary") with gr.Column(): output_text = gr.Markdown( label="📄 Extracted Text (Rendered)", value="
Extracted text will appear here...
", height=600 ) with gr.Row(): with gr.Column(): raw_output = gr.Textbox( label="Raw Markdown Output", placeholder="Raw text will appear here...", lines=30, show_copy_button=True ) submit_btn.click( fn=process_image, inputs=[image_input, temperature], outputs=[output_text, raw_output] ) clear_btn.click( fn=lambda: (None, "", ""), outputs=[image_input, output_text, raw_output] ) if __name__ == "__main__": demo.launch()