Spaces:
Sleeping
Sleeping
| import os | |
| import numpy as np | |
| import torch | |
| from PIL import Image | |
| import open_clip | |
| import gradio as gr | |
| import pickle | |
| # Load pre-trained model | |
| model, _, tokenizer = open_clip.create_model_and_transforms('ViT-L-14', pretrained='openai') | |
| # Load features | |
| def load_features(pickle_file): | |
| with open(pickle_file, 'rb') as f: | |
| data = pickle.load(f) | |
| return data | |
| # Calculate similarity | |
| def calculate_similarity(image_features, text_feature, lambda_val=0.5): | |
| image_similarities = image_features @ text_feature.T | |
| text_similarities = text_feature @ text_feature.T | |
| combined_similarities = (1 - lambda_val) * image_similarities + lambda_val * text_similarities | |
| return combined_similarities | |
| # Load precomputed features | |
| features = load_features('features/patternnet_clip.pkl') | |
| image_features = torch.tensor(features['feats'])#.cuda() | |
| image_paths = features['paths'] | |
| def image_text_retrieval(image, text, lambda_val): | |
| # Preprocess image | |
| preprocess = open_clip.get_preprocess('ViT-L-14') | |
| image = preprocess(image).unsqueeze(0)#.cuda() | |
| # Encode image and text | |
| image_feature = model.encode_image(image).cpu() | |
| #text_feature = model.encode_text(tokenizer(text).unsqueeze(0).cuda()).cpu() | |
| text_feature = model.encode_text(tokenizer(text).unsqueeze(0)).cpu() | |
| # Calculate combined similarities | |
| similarities = calculate_similarity(image_features, text_feature, lambda_val) | |
| top_indices = similarities.topk(5).indices.squeeze().tolist() | |
| # Retrieve top images | |
| top_images = [Image.open(image_paths[i]) for i in top_indices] | |
| return top_images | |
| # Create Gradio interface | |
| def demo(image, text, lambda_val): | |
| return image_text_retrieval(image, text, lambda_val) | |
| iface = gr.Interface( | |
| fn=demo, | |
| inputs=[ | |
| gr.Image(type="pil", label="Query Image"), | |
| gr.Textbox(lines=2, placeholder="Enter text query...", label="Text Query"), | |
| gr.Slider(minimum=0, maximum=1, value=0.5, label="Lambda Value (Image-Text Weight)") | |
| ], | |
| outputs=gr.Gallery(label="Retrieved Images"), | |
| title="Composed Image Retrieval for Remote Sensing", | |
| description="Upload a query image, enter a text query, and adjust the lambda value to retrieve images based on both image and text inputs." | |
| ) | |
| iface.launch() | |