| Hugging Face's logo | |
| Hugging Face | |
| Search models, datasets, users... | |
| Models | |
| Datasets | |
| Spaces | |
| Posts | |
| Docs | |
| Solutions | |
| Pricing | |
| Spaces: | |
| Satyacoder | |
| / | |
| vision_test | |
| like | |
| 0 | |
| App | |
| Files | |
| Community | |
| vision_test | |
| / | |
| app.py | |
| Satyacoder's picture | |
| Satyacoder | |
| Update app.py | |
| 8602d39 | |
| 5 months ago | |
| raw | |
| history | |
| blame | |
| contribute | |
| delete | |
| No virus | |
| 1.72 kB | |
| from transformers import DetrImageProcessor, DetrForObjectDetection | |
| from transformers import BlipProcessor, BlipForConditionalGeneration | |
| import torch | |
| from PIL import Image | |
| import requests | |
| import gradio as gr | |
| box_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") | |
| box_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") | |
| caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") | |
| caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") | |
| def predict_bounding_boxes(imageurl:str): | |
| try: | |
| response = requests.get(imageurl, stream=True) | |
| response.raise_for_status() | |
| image_data = Image.open(response.raw) | |
| inputs = box_processor(images=image_data, return_tensors="pt") | |
| outputs = box_model(**inputs) | |
| target_sizes = torch.tensor([image_data.size[::-1]]) | |
| results = box_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.70)[0] | |
| detections = [{"score": score.item(), "label": box_model.config.id2label[label.item()], "box": box.tolist()} for score, label, box in zip(results["scores"], results["labels"], results["boxes"])] | |
| raw_image = image_data.convert('RGB') | |
| inputs = caption_processor(raw_image, return_tensors="pt") | |
| out = caption_model.generate(**inputs) | |
| label = caption_processor.decode(out[0], skip_special_tokens=True) | |
| return {"image label": label, "detections": detections} | |
| except Exception as e: | |
| return {"error": str(e)} | |
| app = gr.Interface(fn=predict_bounding_boxes, inputs="text", outputs="json") | |
| app.api = True | |
| app.launch() | |