Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| import os | |
| import json | |
| import base64 | |
| import requests | |
| import gradio as gr | |
| from PIL import Image | |
| from io import BytesIO | |
| # Get environment variables from HF Spaces secrets | |
| ENDPOINT = os.environ.get("VLLM_ENDPOINT") | |
| MODEL = os.environ.get("VLLM_MODEL") | |
| if not ENDPOINT or not MODEL: | |
| raise ValueError("VLLM_ENDPOINT and VLLM_MODEL environment variables must be set. Please add them as secrets in your Space settings.") | |
| def image_to_base64(image): | |
| """Convert PIL Image to base64 string.""" | |
| buffered = BytesIO() | |
| image.save(buffered, format="PNG") | |
| return base64.b64encode(buffered.getvalue()).decode("utf-8") | |
| def process_image(image, temperature): | |
| """ | |
| Send image to vLLM endpoint and stream the response. | |
| """ | |
| if image is None: | |
| return "Please upload an image first." | |
| # Convert image to base64 | |
| b64_image = image_to_base64(image) | |
| # Build the payload with only image input (no text prompt) | |
| payload = { | |
| "model": MODEL, | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": ""}, | |
| {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{b64_image}"}} | |
| ] | |
| } | |
| ], | |
| "temperature": temperature, | |
| "stream": True | |
| } | |
| try: | |
| response = requests.post( | |
| ENDPOINT, | |
| headers={"Content-Type": "application/json"}, | |
| data=json.dumps(payload), | |
| stream=True | |
| ) | |
| response.raise_for_status() | |
| accumulated_response = "" | |
| for line in response.iter_lines(): | |
| if line: | |
| line = line.decode('utf-8') | |
| if line.startswith('data: '): | |
| line = line[6:] # Remove 'data: ' prefix | |
| if line.strip() == '[DONE]': | |
| break | |
| try: | |
| chunk = json.loads(line) | |
| if 'choices' in chunk and len(chunk['choices']) > 0: | |
| delta = chunk['choices'][0].get('delta', {}) | |
| content = delta.get('content', '') | |
| if content: | |
| accumulated_response += content | |
| yield accumulated_response | |
| except json.JSONDecodeError: | |
| continue | |
| except Exception as e: | |
| yield f"Error: {str(e)}" | |
| # Build the Gradio Interface | |
| with gr.Blocks(title="π Image OCR", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # π Image to Text Extraction | |
| **π‘ How to use:** | |
| 1. Upload an image using the upload box | |
| 2. Adjust temperature if needed | |
| 3. Click "Extract Text" to process | |
| The model will extract and format text from your image. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| image_input = gr.Image( | |
| type="pil", | |
| label="πΌοΈ Upload Image", | |
| sources=["upload", "clipboard"] | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.15, | |
| step=0.05, | |
| label="Temperature" | |
| ) | |
| submit_btn = gr.Button("Extract Text", variant="primary") | |
| clear_btn = gr.Button("Clear", variant="secondary") | |
| with gr.Column(scale=2): | |
| output_text = gr.Markdown( | |
| label="π Extracted Text", | |
| value="<div style='min-height: 400px; padding: 10px; border: 1px solid #e0e0e0; border-radius: 4px; background-color: #f9f9f9;'><em>Extracted text will appear here...</em></div>" | |
| ) | |
| with gr.Row(): | |
| raw_output = gr.Textbox( | |
| label="Raw Output", | |
| placeholder="Raw text will appear here...", | |
| lines=10, | |
| show_copy_button=True | |
| ) | |
| # Event handlers | |
| submit_btn.click( | |
| fn=lambda img, temp: (process_image(img, temp), process_image(img, temp)), | |
| inputs=[image_input, temperature], | |
| outputs=[output_text, raw_output] | |
| ) | |
| clear_btn.click( | |
| fn=lambda: (None, "", ""), | |
| outputs=[image_input, output_text, raw_output] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| **Note:** Configure endpoint via `VLLM_ENDPOINT` and `VLLM_MODEL` environment variables. | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |