import gradio as gr from huggingface_hub import InferenceClient import base64 from io import BytesIO def encode_image(image): buffered = BytesIO() image.save(buffered, format="JPEG") return f"data:image/jpeg;base64,{base64.b64encode(buffered.getvalue()).decode('utf-8')}" def caption_image(image, oauth_token: gr.OAuthToken | None): # Check if user is logged in if oauth_token is None: return "Please log in using the button on the sidebar to use the model." if image is None: return "Please upload an image." # Initialize client with the user's OAuth token client = InferenceClient(token=oauth_token.token) base64_image = encode_image(image) try: completion = client.chat.completions.create( model="zai-org/GLM-4.6V-Flash:novita", messages=[ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": base64_image } } ] } ], ) return completion.choices[0].message.content except Exception as e: return f"Error: {str(e)}" # Define the Interface with gr.Blocks(theme=gr.themes.Soft()) as demo: with gr.Sidebar(): gr.Markdown("### Authentication") gr.LoginButton() gr.Markdown("Logging in allows you to use your own HF quota for the Inference API.") gr.Markdown("# 📸 GLM Image Captioner (OAuth)") with gr.Row(): with gr.Column(): input_img = gr.Image(type="pil", label="Upload Image") submit_btn = gr.Button("Generate Caption", variant="primary") with gr.Column(): output_text = gr.Textbox(label="Caption", lines=10) # Pass the image and the implicit OAuth token to the function submit_btn.click( fn=caption_image, inputs=[input_img], outputs=[output_text] ) if __name__ == "__main__": demo.launch()