Spaces:

Hayloo9838
/

uno-recognizer

Sleeping

App Files Files Community

Hayloo9838 commited on Dec 22, 2024

Commit

c3b28d7

verified ·

1 Parent(s): a3d0c64

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -1

app.py CHANGED Viewed

@@ -9,7 +9,62 @@ import requests
 import matplotlib.pyplot as plt
 from huggingface_hub import hf_hub_download
-# ... (rest of your code remains the same)
 def process_image_classification(image):
     model, processor, reverse_mapping, device = load_model()
@@ -38,6 +93,27 @@ def process_image_classification(image):
     return visualization_rgb, card_name, confidence
 def gradio_interface():
     gr_interface = gr.Interface(
         fn=process_image_classification,

 import matplotlib.pyplot as plt
 from huggingface_hub import hf_hub_download
+MODEL_PATH = "pytorch_model.bin"
+REPO_ID = "Hayloo9838/uno-recognizer"
+MAPANDSTUFF = "mapandstuff.pth"
+class CLIPVisionClassifier(nn.Module):
+    def __init__(self, num_labels):
+        super().__init__()
+        self.vision_model = CLIPVisionModel.from_pretrained('openai/clip-vit-large-patch14',
+                                                          attn_implementation="eager")
+        self.classifier = nn.Linear(self.vision_model.config.hidden_size, num_labels, bias=False)
+        self.dropout = nn.Dropout(0.1)
+    def forward(self, pixel_values, output_attentions=False):
+        outputs = self.vision_model(pixel_values, output_attentions=output_attentions)
+        pooled_output = outputs.pooler_output
+        logits = self.classifier(pooled_output)
+        if output_attentions:
+            return logits, outputs.attentions
+        return logits
+def get_attention_map(attentions):
+    attention = attentions[-1]
+    attention = attention.mean(dim=1)
+    attention = attention[0, 0, 1:]
+    num_patches = int(np.sqrt(attention.shape[0]))
+    attention_map = attention.reshape(num_patches, num_patches)
+    attention_map = attention_map.cpu().numpy()
+    attention_map = (attention_map - attention_map.min()) / (attention_map.max() - attention_map.min())
+    return attention_map
+def apply_heatmap(image, attention_map, new_size=None):
+    heatmap = cv2.applyColorMap(np.uint8(255 * attention_map), cv2.COLORMAP_JET)
+    if isinstance(image, Image.Image):
+        image = np.array(image)
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    if new_size is not None:
+        image_resized = cv2.resize(image, new_size)
+        attention_map_resized = cv2.resize(attention_map, image_resized.shape[:2][::-1] , interpolation=cv2.INTER_LINEAR)
+        attention_map_resized = (attention_map_resized - attention_map_resized.min()) / (attention_map_resized.max() - attention_map_resized.min())
+        heatmap_resized = cv2.applyColorMap(np.uint8(255 * attention_map_resized), cv2.COLORMAP_JET)
+        output = cv2.addWeighted(image_resized, 0.7, heatmap_resized, 0.3, 0)
+    else:
+        attention_map_resized = cv2.resize(attention_map, image.shape[:2][::-1] , interpolation=cv2.INTER_LINEAR)
+        attention_map_resized = (attention_map_resized - attention_map_resized.min()) / (attention_map_resized.max() - attention_map_resized.min())
+        heatmap_resized = cv2.applyColorMap(np.uint8(255 * attention_map_resized), cv2.COLORMAP_JET)
+        output = cv2.addWeighted(image, 0.7, heatmap_resized, 0.3, 0)
+    return output
 def process_image_classification(image):
     model, processor, reverse_mapping, device = load_model()
     return visualization_rgb, card_name, confidence
+def load_model():
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Download model weights and label mapping from Hugging Face Hub
+    model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_PATH)
+    #mapandstuff_path = hf_hub_download(repo_id=REPO_ID, filename=MAPANDSTUFF)
+    checkpoint = torch.load(model_path, map_location=device)
+    label_mapping = checkpoint['label_mapping']
+    reverse_mapping = {v: k for k, v in label_mapping.items()}
+    model = CLIPVisionClassifier(len(label_mapping))
+    model_state_dict = checkpoint["model_state_dict"]
+    model.load_state_dict(model_state_dict)
+    model = model.to(device)
+    model.eval()
+    processor = CLIPProcessor.from_pretrained('openai/clip-vit-large-patch14')
+    return model, processor, reverse_mapping, device
 def gradio_interface():
     gr_interface = gr.Interface(
         fn=process_image_classification,