Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| import os | |
| import json | |
| import base64 | |
| import requests | |
| import gradio as gr | |
| from PIL import Image | |
| from io import BytesIO | |
| # Get environment variables from HF Spaces secrets | |
| ENDPOINT = os.environ.get("VLLM_ENDPOINT") | |
| MODEL = os.environ.get("VLLM_MODEL") | |
| if not ENDPOINT or not MODEL: | |
| raise ValueError("VLLM_ENDPOINT and VLLM_MODEL environment variables must be set. Please add them as secrets in your Space settings.") | |
| def image_to_base64(image): | |
| """Convert PIL Image to base64 string.""" | |
| buffered = BytesIO() | |
| image.save(buffered, format="PNG") | |
| return base64.b64encode(buffered.getvalue()).decode("utf-8") | |
| def respond( | |
| message, | |
| history: list[dict[str, str]], | |
| system_message, | |
| max_tokens, | |
| temperature, | |
| top_p, | |
| ): | |
| """ | |
| Send messages (with optional images) to vLLM endpoint and stream the response. | |
| """ | |
| messages = [{"role": "system", "content": system_message}] | |
| # Add conversation history | |
| for msg in history: | |
| messages.append(msg) | |
| # Process the current message - check if it contains an image | |
| if message and "files" in message and message["files"]: | |
| # Message has image(s) | |
| content = [] | |
| # Add text if present | |
| if message.get("text", "").strip(): | |
| content.append({"type": "text", "text": message["text"]}) | |
| # Add all images | |
| for file_info in message["files"]: | |
| try: | |
| image = Image.open(file_info) | |
| b64_image = image_to_base64(image) | |
| content.append({ | |
| "type": "image_url", | |
| "image_url": {"url": f"data:image/png;base64,{b64_image}"} | |
| }) | |
| except Exception as e: | |
| print(f"Error processing image: {e}") | |
| messages.append({"role": "user", "content": content}) | |
| else: | |
| # Text-only message | |
| text_content = message if isinstance(message, str) else message.get("text", "") | |
| messages.append({"role": "user", "content": text_content}) | |
| payload = { | |
| "model": MODEL, | |
| "messages": messages, | |
| "max_tokens": max_tokens, | |
| "temperature": temperature, | |
| "top_p": top_p, | |
| "stream": True | |
| } | |
| try: | |
| response = requests.post( | |
| ENDPOINT, | |
| headers={"Content-Type": "application/json"}, | |
| data=json.dumps(payload), | |
| stream=True | |
| ) | |
| response.raise_for_status() | |
| accumulated_response = "" | |
| for line in response.iter_lines(): | |
| if line: | |
| line = line.decode('utf-8') | |
| if line.startswith('data: '): | |
| line = line[6:] # Remove 'data: ' prefix | |
| if line.strip() == '[DONE]': | |
| break | |
| try: | |
| chunk = json.loads(line) | |
| if 'choices' in chunk and len(chunk['choices']) > 0: | |
| delta = chunk['choices'][0].get('delta', {}) | |
| content = delta.get('content', '') | |
| if content: | |
| accumulated_response += content | |
| yield accumulated_response | |
| except json.JSONDecodeError: | |
| continue | |
| except Exception as e: | |
| yield f"Error: {str(e)}" | |
| # Build the Gradio Interface | |
| with gr.Blocks(title="π¬ Vision Chat", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # π¬ Vision-Enabled Chat Interface | |
| **π‘ How to use:** | |
| 1. Type your message in the chat box | |
| 2. Optionally upload images by clicking the π icon | |
| 3. Adjust parameters in the accordion below if needed | |
| 4. Press Enter or click Send | |
| The model can understand both text and images! | |
| """ | |
| ) | |
| chatbot = gr.ChatInterface( | |
| respond, | |
| type="messages", | |
| multimodal=True, | |
| additional_inputs=[ | |
| gr.Textbox( | |
| value="You are a helpful AI assistant with vision capabilities. You can understand and analyze images.", | |
| label="System message" | |
| ), | |
| gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max new tokens"), | |
| gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p (nucleus sampling)", | |
| ), | |
| ], | |
| ) | |
| chatbot.render() | |
| gr.Markdown(""" | |
| --- | |
| **Note:** Configure endpoint via `VLLM_ENDPOINT` and `VLLM_MODEL` environment variables. | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |