| !pip install -U adapter-transformers | |
| !pip install -U transformers | |
| import gradio as gr | |
| from transformers import CLIPProcessor, CLIPModel | |
| from PIL import Image | |
| import torch | |
| # Load the model and processor | |
| model = CLIPModel.from_pretrained("Taarhoinc/TaarhoGen1") | |
| processor = CLIPProcessor.from_pretrained("Taarhoinc/TaarhoGen1") | |
| # Define the function to describe a floor plan | |
| def describe_floorplan(floorplan_image: Image.Image, top_k: int = 3): | |
| """Describes a floor plan drawing by listing components.""" | |
| # Define a list of common floor plan components | |
| components = [ | |
| "bedroom", | |
| "kitchen", | |
| "bathroom", | |
| "living room", | |
| "dining room", | |
| "hallway", | |
| "garage", | |
| "balcony", | |
| "stairs", | |
| "door", | |
| "window", | |
| ] | |
| # Preprocess the image and text prompts | |
| inputs = processor( | |
| text=components, images=floorplan_image, return_tensors="pt", padding=True | |
| ) | |
| # Get the logits (similarity scores) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits_per_image = outputs.logits_per_image | |
| # Get the predicted probabilities | |
| probs = logits_per_image.softmax(dim=1).cpu().numpy()[0] | |
| # Get the indices of the top-k components | |
| top_k_indices = probs.argsort()[-top_k:][::-1] | |
| # Get the top-k components | |
| detected_components = [components[i] for i in top_k_indices] | |
| return ", ".join(detected_components) # Return as a comma-separated string | |
| # Create the Gradio interface | |
| gr.Interface( | |
| fn=describe_floorplan, | |
| inputs=[ | |
| gr.Image(label="Upload a floor plan drawing", type="pil"), | |
| gr.Slider(1, 10, step=1, value=3, label="Number of components to detect"), | |
| ], | |
| outputs=gr.Label(label="Detected Components"), | |
| title="Floor Plan Description with TaarhoGen1", | |
| description="Upload a floor plan drawing to get a list of detected components.", | |
| ).launch() |