| import gradio as gr | |
| import openai | |
| import os | |
| import json | |
| import numpy as np | |
| import torch | |
| from transformers import AutoProcessor, AutoModelForCausalLM | |
| openai.organization = os.getenv("API_ORG") | |
| openai.api_key = os.getenv("API_KEY") | |
| app_password = os.getenv("APP_PASSWORD") | |
| app_username = os.getenv("APP_USERNAME") | |
| checkpoint = "openai/clip-vit-base-patch32" | |
| processor = AutoProcessor.from_pretrained(checkpoint) | |
| model = AutoModelForCausalLM.from_pretrained(checkpoint) | |
| def generate(input_image): | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| inputs = processor(images=input_image, return_tensors="pt").to(device) | |
| pixel_values = inputs.pixel_values | |
| generated_ids = model.generate(pixel_values=pixel_values, max_length=50) | |
| generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return generated_caption | |
| demo = gr.Interface( | |
| fn=generate, | |
| inputs=gr.Image(label="Input", elem_id="input_image", type="pil"), | |
| outputs=gr.Text(label="Generated Caption"), | |
| flagging_options=[], | |
| ) | |
| demo.launch(share=False, auth=(app_username, app_password)) | |