Spaces:
Running
Running
| import gradio as gr | |
| import requests | |
| import json | |
| import base64 | |
| from PIL import Image | |
| import io | |
| import os | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| load_dotenv() | |
| API_KEY = os.getenv("API_KEY") | |
| if not API_KEY: | |
| raise ValueError("API_KEY environment variable must be set") | |
| def process_image_stream(image_path, prompt, max_tokens=512): | |
| """ | |
| Process image with streaming response via HTTP | |
| """ | |
| if not image_path: | |
| yield "Please upload an image first." | |
| return | |
| try: | |
| # Read and prepare image file | |
| with open(image_path, 'rb') as img_file: | |
| files = { | |
| 'image': ('image.jpg', img_file, 'image/jpeg') | |
| } | |
| data = { | |
| 'prompt': prompt, | |
| 'task': 'instruct', | |
| 'max_tokens': max_tokens | |
| } | |
| headers = { | |
| 'X-API-Key': API_KEY | |
| } | |
| # Make streaming request | |
| response = requests.post( | |
| 'https://nexa-omni.nexa4ai.com/process-image/', | |
| files=files, | |
| data=data, | |
| headers=headers, | |
| stream=True | |
| ) | |
| if response.status_code != 200: | |
| yield f"Error: Server returned status code {response.status_code}" | |
| return | |
| # Initialize response and token counter | |
| response_text = "" | |
| token_count = 0 | |
| # Process the streaming response | |
| for line in response.iter_lines(): | |
| if line: | |
| line = line.decode('utf-8') | |
| if line.startswith('data: '): | |
| try: | |
| data = json.loads(line[6:]) # Skip 'data: ' prefix | |
| if data["status"] == "generating": | |
| # Skip first three tokens if they match specific patterns | |
| if token_count < 3 and data["token"] in [" ", " \n", "\n", "<|im_start|>", "assistant"]: | |
| token_count += 1 | |
| continue | |
| response_text += data["token"] | |
| yield response_text | |
| elif data["status"] == "complete": | |
| break | |
| elif data["status"] == "error": | |
| yield f"Error: {data['error']}" | |
| break | |
| except json.JSONDecodeError: | |
| continue | |
| except Exception as e: | |
| yield f"Error processing request: {str(e)}" | |
| # Create Gradio interface | |
| demo = gr.Interface( | |
| fn=process_image_stream, | |
| inputs=[ | |
| gr.Image(type="filepath", label="Upload Image"), | |
| gr.Textbox( | |
| label="Question", | |
| placeholder="Ask a question about the image...", | |
| value="Describe this image" | |
| ), | |
| gr.Slider( | |
| minimum=50, | |
| maximum=200, | |
| value=200, | |
| step=1, | |
| label="Max Tokens" | |
| ) | |
| ], | |
| outputs=gr.Textbox(label="Response", interactive=False), | |
| title="NEXA OmniVLM-968M", | |
| description=f""" | |
| Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniVLM-968M">NexaAIDev/OmniVLM-968M</a> | |
| *Model updated on Nov 21, 2024\n | |
| Upload an image and ask questions about it. The model will analyze the image and provide detailed answers to your queries. | |
| """, | |
| examples=[ | |
| ["example_images/example_1.jpg", "What kind of cat is this?", 128], | |
| ["example_images/example_2.jpg", "What color is this dress? ", 128], | |
| ["example_images/example_3.jpg", "What is this image about?", 128], | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch(server_name="0.0.0.0", server_port=7860) |