Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import CLIPProcessor, CLIPModel | |
| from PIL import Image | |
| import torch | |
| # -- Model (loaded once at startup) -- | |
| MODEL_ID = "openai/clip-vit-base-patch32" | |
| model = CLIPModel.from_pretrained(MODEL_ID) | |
| processor = CLIPProcessor.from_pretrained(MODEL_ID) | |
| model.eval() | |
| # -- Curated car labels -- | |
| CAR_LABELS = [ | |
| # Sedans | |
| "2020 Toyota Camry", "2021 Honda Accord", "2019 Hyundai Sonata", | |
| "2022 Nissan Altima", "2021 Kia K5", "2020 Mazda 6", | |
| "2019 Volkswagen Passat", "2021 Subaru Legacy", | |
| # SUVs & Crossovers | |
| "2021 Toyota RAV4", "2022 Honda CR-V", "2020 Ford Escape", | |
| "2021 Chevrolet Equinox", "2022 Jeep Cherokee", "2021 Hyundai Tucson", | |
| "2020 Kia Sportage", "2022 Mazda CX-5", "2021 Subaru Forester", | |
| "2021 Volkswagen Tiguan", "2022 Nissan Rogue", | |
| # Trucks | |
| "2022 Ford F-150", "2021 Chevrolet Silverado 1500", "2020 RAM 1500", | |
| "2021 GMC Sierra", "2022 Toyota Tacoma", "2021 Nissan Frontier", | |
| # Muscle & Sports Cars | |
| "2021 Ford Mustang GT", "2022 Chevrolet Camaro SS", "2020 Dodge Challenger", | |
| "2021 Dodge Charger Hellcat", "2020 Subaru WRX STI", | |
| # Luxury Sedans | |
| "2021 BMW 3 Series", "2022 Mercedes-Benz C-Class", "2021 Audi A4", | |
| "2020 Lexus ES", "2022 Genesis G70", "2021 Cadillac CT5", | |
| "2020 Volvo S60", "2022 Infiniti Q50", | |
| # Luxury SUVs | |
| "2021 BMW X5", "2022 Mercedes-Benz GLC", "2021 Audi Q5", | |
| "2020 Lexus RX 350", "2022 Volvo XC90", "2021 Cadillac Escalade", | |
| "2022 Lincoln Navigator", | |
| # EVs & Hybrids | |
| "2022 Tesla Model 3", "2021 Tesla Model Y", "2022 Tesla Model S", | |
| "2021 Chevrolet Bolt EV", "2022 Ford Mustang Mach-E", | |
| "2021 Toyota Prius", "2022 Hyundai Ioniq 5", "2021 Kia EV6", | |
| # Supercars | |
| "2020 Ferrari 488", "2021 Lamborghini Huracan", | |
| "2021 McLaren 720S", "2022 Ferrari F8", | |
| # Porsche (expanded) | |
| "2020 Porsche 911 Carrera", "2021 Porsche 911 Carrera S", | |
| "2022 Porsche 911 GT3", "2019 Porsche 911 Turbo S", | |
| "2023 Porsche 911 Targa 4S", "2021 Porsche 718 Cayman", | |
| "2022 Porsche 718 Boxster", "2021 Porsche Cayenne", | |
| "2022 Porsche Cayenne GTS", "2021 Porsche Macan", | |
| "2022 Porsche Panamera", "2021 Porsche Taycan", | |
| "2022 Porsche Taycan 4S", "2023 Porsche Taycan Turbo S Cross Turismo", | |
| ] | |
| # -- Prompt templates (ensemble improves CLIP accuracy significantly) -- | |
| PROMPT_TEMPLATES = [ | |
| "a photo of a {}", | |
| "a photograph of a {}", | |
| "a {} driving on the road", | |
| "a {} parked in a driveway", | |
| "a side view of a {}", | |
| ] | |
| def build_text_features(): | |
| """Pre-compute averaged text embeddings for all labels across all templates.""" | |
| all_embeddings = [] | |
| for template in PROMPT_TEMPLATES: | |
| texts = [template.format(label) for label in CAR_LABELS] | |
| inputs = processor(text=texts, return_tensors="pt", padding=True, truncation=True) | |
| with torch.no_grad(): | |
| emb = model.get_text_features(**inputs) | |
| emb = emb / emb.norm(dim=-1, keepdim=True) | |
| all_embeddings.append(emb) | |
| avg_emb = torch.stack(all_embeddings).mean(dim=0) | |
| avg_emb = avg_emb / avg_emb.norm(dim=-1, keepdim=True) | |
| return avg_emb | |
| # Pre-compute once at startup | |
| TEXT_FEATURES = build_text_features() | |
| def classify_car(image: Image.Image): | |
| """Run ensembled CLIP classification and return top-5 predictions.""" | |
| if image is None: | |
| return {} | |
| inputs = processor(images=image, return_tensors="pt") | |
| with torch.no_grad(): | |
| img_features = model.get_image_features(**inputs) | |
| img_features = img_features / img_features.norm(dim=-1, keepdim=True) | |
| logits = (img_features @ TEXT_FEATURES.T) * model.logit_scale.exp() | |
| probs = logits.softmax(dim=-1)[0] | |
| top_k = min(5, len(CAR_LABELS)) | |
| top_indices = probs.topk(top_k).indices.tolist() | |
| results = {CAR_LABELS[i]: float(probs[i]) for i in top_indices} | |
| return results | |
| # -- Gradio UI -- | |
| with gr.Blocks( | |
| theme=gr.themes.Soft( | |
| primary_hue="blue", | |
| secondary_hue="indigo", | |
| ), | |
| title="Car Classifier", | |
| css=""" | |
| #header { text-align: center; margin-bottom: 10px; } | |
| #header h1 { font-size: 2.4rem; font-weight: 800; } | |
| #header p { color: #6b7280; font-size: 1rem; } | |
| #result-label .label-container { font-size: 1.05rem; } | |
| footer { display: none !important; } | |
| """, | |
| ) as demo: | |
| gr.HTML(""" | |
| <div id="header"> | |
| <h1>Car Classifier</h1> | |
| <p>Upload a photo of any car -- get the brand, model & year instantly.</p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| image_input = gr.Image( | |
| type="pil", | |
| label="Upload Car Image", | |
| sources=["upload", "clipboard"], | |
| height=320, | |
| ) | |
| classify_btn = gr.Button("Classify", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| output_label = gr.Label( | |
| num_top_classes=5, | |
| label="Top-5 Predictions", | |
| elem_id="result-label", | |
| ) | |
| gr.Examples( | |
| examples=[], | |
| inputs=image_input, | |
| label="Example Images", | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| **How it works** - Uses [CLIP](https://huggingface.co/openai/clip-vit-base-patch32) | |
| (zero-shot vision-language model) with **prompt ensembling** (5 templates averaged) | |
| to match your image against ~70 curated car labels. | |
| No GPU required - runs entirely on CPU. | |
| """, | |
| elem_id="footer-note", | |
| ) | |
| classify_btn.click(fn=classify_car, inputs=image_input, outputs=output_label) | |
| image_input.change(fn=classify_car, inputs=image_input, outputs=output_label) | |
| if __name__ == "__main__": | |
| demo.launch() | |