Spaces:
Build error
Build error
| import transformers, accelerate | |
| import requests | |
| print(accelerate.__version__) | |
| print(transformers.__version__) | |
| # Image Captioning | |
| from transformers import AutoProcessor | |
| from transformers import AutoModelForCausalLM | |
| import torch | |
| import streamlit as st | |
| device = "cuda" if torch.cuda.is_available() else "cpu" # Set device to GPU if its available | |
| checkpoint = "microsoft/git-base" | |
| processor = AutoProcessor.from_pretrained(checkpoint) # We would load a tokenizer for language. Here we load a processor to process images | |
| model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device) | |
| # Text Search | |
| st.title("Flower Type Demo") | |
| st.subheader("Upload an image and See how Chinese qisper works") | |
| upload_file = st.file_uploader('Upload an Image') | |
| from PIL import Image | |
| import torch | |
| from diffusers import StableDiffusionPipeline | |
| import time | |
| t1 = time.time() | |
| model_id = "CompVis/stable-diffusion-v1-4" | |
| device = "cpu" | |
| pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16) | |
| pipe = pipe.to(device) | |
| if upload_file: | |
| test_sample = Image.open(upload_file) | |
| inputs = processor(images=test_sample, return_tensors="pt").to(device) | |
| pixel_values = inputs.pixel_values.to(device) | |
| generated_ids = model.generate(pixel_values=pixel_values, max_length=50) | |
| generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| for i in range(10): | |
| st.write('New Caption is :') | |
| st.write(generated_caption) | |
| image = pipe(generated_caption).images[0] | |
| display(image) | |
| print("Model Loading + Inference time = " + str(time.time() - t1) + " seconds") | |
| st.write("Showing the Image") | |
| st.image (image, caption=name, width=None, use_column_width=None, clamp=False, channels='RGB', output_format='auto') | |
| inputs = processor(images=image, return_tensors="pt").to(device) | |
| pixel_values = inputs.pixel_values.to(device) | |
| generated_ids = model.generate(pixel_values=pixel_values, max_length=50) | |
| generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |