Spaces:

NexaAI
/

omnivlm-dpo-demo

Running

App Files Files Community

PerryCheng614 commited on Dec 17, 2024

Commit

a2bfb71

1 Parent(s): 776bb63

change back to wss

Browse files

Files changed (1) hide show

app.py +44 -63

app.py CHANGED Viewed

@@ -1,79 +1,67 @@
 import gradio as gr
-import requests
 import json
 import os
-import time
-API_KEY = os.getenv("API_KEY")
 if not API_KEY:
-    raise ValueError("API_KEY environment variable must be set")
-def process_image_stream(image_path, prompt, max_tokens=512):
     """
-    Process image with streaming response via HTTP
     """
     if not image_path:
         yield "Please upload an image first."
         return
     try:
-        # Read and prepare image file
-        with open(image_path, 'rb') as img_file:
-            files = {
-                'image': ('image.jpg', img_file, 'image/jpeg')
-            }
-            data = {
-                'prompt': prompt,
-                'task': 'instruct',
-                'max_tokens': max_tokens
-            }
-            headers = {
-                'X-API-Key': API_KEY
-            }
-            # Make streaming request
-            response = requests.post(
-                'https://nexa-omni.nexa4ai.com/process-image/',
-                files=files,
-                data=data,
-                headers=headers,
-                stream=True
-            )
-            if response.status_code != 200:
-                yield f"Error: Server returned status code {response.status_code}"
-                return
             # Initialize response and token counter
-            response_text = ""
             token_count = 0
-            # Process the streaming response
-            for line in response.iter_lines():
-                if line:
-                    line = line.decode('utf-8')
-                    if line.startswith('data: '):
-                        try:
-                            data = json.loads(line[6:])  # Skip 'data: ' prefix
-                            if data["status"] == "generating":
-                                # Skip first three tokens if they match specific patterns
-                                if token_count < 3 and data["token"] in [" ", " \n", "\n", "<|im_start|>", "assistant"]:
-                                    token_count += 1
-                                    continue
-                                response_text += data["token"]
-                                gr.update(value=response_text)
-                                yield response_text
-                                time.sleep(0.01)
-                            elif data["status"] == "complete":
-                                break
-                            elif data["status"] == "error":
-                                yield f"Error: {data['error']}"
-                                break
-                        except json.JSONDecodeError:
                             continue
     except Exception as e:
-        yield f"Error processing request: {str(e)}"
 # Create Gradio interface
 demo = gr.Interface(
@@ -97,7 +85,6 @@ demo = gr.Interface(
     title="NEXA OmniVLM-968M",
     description=f"""
     Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniVLM-968M">NexaAIDev/OmniVLM-968M</a>
     *Model updated on Nov 21, 2024\n
     Upload an image and ask questions about it. The model will analyze the image and provide detailed answers to your queries.
     """,
@@ -109,10 +96,4 @@ demo = gr.Interface(
 )
 if __name__ == "__main__":
-    # Configure the queue for better streaming performance
-    demo.queue(
-        max_size=20,
-    ).launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-    )

 import gradio as gr
+import websockets
+import asyncio
 import json
+import base64
+from PIL import Image
+import io
 import os
+API_KEY = os.getenv('API_KEY')
 if not API_KEY:
+    raise ValueError("API_KEY must be set in environment variables")
+async def process_image_stream(image_path, prompt, max_tokens=512):
     """
+    Process image with streaming response via WebSocket
     """
     if not image_path:
         yield "Please upload an image first."
         return
     try:
+        # Read and convert image to base64
+        with Image.open(image_path) as img:
+            img = img.convert('RGB')
+            buffer = io.BytesIO()
+            img.save(buffer, format="JPEG")
+            base64_image = base64.b64encode(buffer.getvalue()).decode('utf-8')
+        # Connect to WebSocket
+        async with websockets.connect('wss://nexa-omni.nexa4ai.com/ws/process-image/?api_key=' + API_KEY) as websocket:
+            # Send image data and parameters as JSON
+            await websocket.send(json.dumps({
+                "image": f"data:image/jpeg;base64,{base64_image}",
+                "prompt": prompt,
+                "task": "instruct",  # Fixed to instruct
+                "max_tokens": max_tokens
+            }))
             # Initialize response and token counter
+            response = ""
             token_count = 0
+            # Receive streaming response
+            async for message in websocket:
+                try:
+                    data = json.loads(message)
+                    if data["status"] == "generating":
+                        # Skip first three tokens if they match specific patterns
+                        if token_count < 3 and data["token"] in [" ", " \n", "\n", "<|im_start|>", "assistant"]:
+                            token_count += 1
                             continue
+                        response += data["token"]
+                        yield response
+                    elif data["status"] == "complete":
+                        break
+                    elif data["status"] == "error":
+                        yield f"Error: {data['error']}"
+                        break
+                except json.JSONDecodeError:
+                    continue
     except Exception as e:
+        yield f"Error connecting to server: {str(e)}"
 # Create Gradio interface
 demo = gr.Interface(
     title="NEXA OmniVLM-968M",
     description=f"""
     Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniVLM-968M">NexaAIDev/OmniVLM-968M</a>
     *Model updated on Nov 21, 2024\n
     Upload an image and ask questions about it. The model will analyze the image and provide detailed answers to your queries.
     """,
 )
 if __name__ == "__main__":
+    demo.queue().launch(server_name="0.0.0.0", server_port=7860)