GLM-4.6V-Flash

Running

File size: 6,082 Bytes

f5258bc
 
 
2f6bb04
f5258bc
 
 
 
2f6bb04
f5258bc
 
 
 
 
2f6bb04
 
 
 
 
 
 
 
 
 
 
 
e650e3a
f5258bc
 
 
 
 
 
2f6bb04
2195645
 
 
 
 
f5258bc
 
 
2fe8f38
f5258bc
 
2fe8f38
f5258bc
 
2f6bb04
 
 
 
 
 
 
f5258bc
2fe8f38
f5258bc
2195645
2f6bb04
 
2195645
f5258bc
 
 
 
 
 
 
2f6bb04
f5258bc
 
2195645
 
 
f5258bc
 
 
2195645
 
 
f5258bc
 
 
2f6bb04
2195645
f5258bc
2f6bb04
 
f5258bc
2f6bb04
36ddc12
 
 
 
 
2f6bb04
 
 
 
 
f5258bc
 
 
 
2f6bb04
f5258bc
 
 
2f6bb04
2d072ef
 
f5258bc
 
 
2f6bb04
f5258bc
 
2f6bb04
 
 
f5258bc
 
 
2f6bb04
f5258bc
2f6bb04
 
f5258bc
 
 
2f6bb04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5258bc
 
2f6bb04
 
 
 
 
 
 
 
 
2195645
2f6bb04
f5258bc
2f6bb04
73eca71
2f6bb04
f5258bc
 
 
 
2f6bb04
73eca71
2f6bb04
f5258bc
 
 
2f6bb04
f5258bc
 
 
 
 
2f6bb04
 
 
 
 
f5258bc
 
 
 
 
 
ee175f9

import gradio as gr
import os
from openai import OpenAI
import base64

# Initialize OpenAI client
client = OpenAI(
    base_url="https://router.huggingface.co/v1",
    api_key=os.environ.get("HF_TOKEN", ""),
    default_headers={
        "X-HF-Bill-To": "huggingface"
    }
)

def encode_image(image_path):
    """Convert image to base64 for API."""
    if image_path is None:
        return None
    
    try:
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except Exception as e:
        print(f"Error encoding image: {e}")
        return None

def process_message(message, history, image):
    """
    Process user message and image, send to the model, and return the response.
    """
    # Prepare messages for the API
    messages = []
    
    # Add chat history
    for msg_dict in history:
        if msg_dict["role"] == "user":
            messages.append({"role": "user", "content": msg_dict["content"]})
        elif msg_dict["role"] == "assistant":
            messages.append({"role": "assistant", "content": msg_dict["content"]})
    
    # Add current message and image
    if message or image:
        current_message = {"role": "user", "content": []}
        
        if message:
            current_message["content"].append({"type": "text", "text": message})
        
        if image:
            # Encode image to base64
            base64_image = encode_image(image)
            if base64_image:
                current_message["content"].append({
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
                })
        
        messages.append(current_message)
    
    # Add user message to history immediately
    user_content = message if message else "[Image uploaded]"
    new_history = history + [{"role": "user", "content": user_content}]
    
    # Get response from the model
    response = ""
    try:
        stream = client.chat.completions.create(
            model="zai-org/GLM-4.6V-Flash:zai-org",
            messages=messages,
            stream=True,
            max_tokens=2048,
        )
        
        # Add assistant message to history
        new_history.append({"role": "assistant", "content": ""})
        
        for chunk in stream:
            if chunk.choices[0].delta.content:
                response += chunk.choices[0].delta.content
                # Update the assistant's message
                new_history[-1]["content"] = response
                yield new_history
    
    except Exception as e:
        error_msg = f"Error: {str(e)}"
        new_history.append({"role": "assistant", "content": error_msg})
        yield new_history

# Create Gradio interface with Gradio 6 syntax
with gr.Blocks(fill_height=True) as demo:
    gr.Markdown(
        """
        # GLM-4.6V-Flash
        
        GLM-4.6V series model includes two versions: **GLM-4.6V (106B)**, a foundation model designed for cloud and high-performance cluster scenarios, and **GLM-4.6V-Flash (9B)**, a lightweight model optimized for local deployment and low-latency applications.
        
        Upload an image and ask questions about it using this powerful vision-language model!
        
        <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="text-decoration: none;">
        <span style="color: #4F46E5; font-weight: bold;">Built with anycoder</span>
        </a>
        """
    )
    
    chatbot = gr.Chatbot(
        label="Conversation",
        height=500,
        avatar_images=(
            "https://cdn-icons-png.flaticon.com/512/147/147144.png",
            "https://cdn-icons-png.flaticon.com/512/4712/4712025.png"
        ),
        layout="bubble",
        buttons=["copy"]
    )
    
    with gr.Row():
        with gr.Column(scale=4):
            msg = gr.Textbox(
                label="Your message",
                placeholder="Type your message here or upload an image...",
                lines=2,
                autofocus=True,
            )
        with gr.Column(scale=1):
            img = gr.Image(
                label="Upload image (optional)",
                type="filepath",
                height=150,
                sources=["upload", "clipboard"],
            )
    
    with gr.Row():
        submit_btn = gr.Button("Send 📤", variant="primary", scale=1)
        clear_btn = gr.ClearButton(
            components=[msg, img, chatbot], 
            value="Clear Chat 🗑️",
            scale=1
        )
    
    gr.Examples(
        examples=[
            ["What's in this image?"],
            ["Describe this image in detail"],
            ["What colors do you see?"],
        ],
        inputs=msg,
        label="Example prompts"
    )
    
    # Set up the chat interface
    def submit_and_clear(message, history, image):
        """Submit message and return updated history plus cleared inputs."""
        if not message and not image:
            yield history, "", None
            return
        
        # Process the message
        for updated_history in process_message(message, history, image):
            yield updated_history, "", None
    
    # Handle submission
    msg.submit(
        fn=submit_and_clear,
        inputs=[msg, chatbot, img],
        outputs=[chatbot, msg, img],
        api_visibility="public"
    )
    
    submit_btn.click(
        fn=submit_and_clear,
        inputs=[msg, chatbot, img],
        outputs=[chatbot, msg, img],
        api_visibility="public"
    )

# Launch the app with Gradio 6 syntax
if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        theme=gr.themes.Soft(
            primary_hue="blue",
            secondary_hue="indigo",
            font=gr.themes.GoogleFont("Inter"),
        ),
        footer_links=[
            {
                "label": "Built with anycoder",
                "url": "https://huggingface.co/spaces/akhaliq/anycoder"
            }
        ]
    )