Spaces:

Johnny-Z
/

dan_tagger

Sleeping

App Files Files Community

Johnny-Z commited on Nov 6, 2025

Commit

d52be25

verified ·

1 Parent(s): beefff8

Upload app.py

Browse files

Files changed (1) hide show

app.py +53 -0

app.py CHANGED Viewed

@@ -127,6 +127,25 @@ class MLP(nn.Module):
         x = self.sigmoid(x)
         return x
 with open(os.path.join(repo_dir, 'general_tag_dict.json'), 'r', encoding='utf-8') as f:
     general_dict = json.load(f)
@@ -167,6 +186,10 @@ mlp_artist = MLP(2048, artist_class)
 mlp_artist.load_state_dict(torch.load(os.path.join(repo_dir, "cls_predictor_artist.pth"), map_location=device, weights_only=True))
 mlp_artist.to(device).to(dtype).eval()
 def prediction_to_tag(prediction, tag_dict, class_num):
     prediction = prediction.view(class_num)
     predicted_ids = (prediction >= 0.2).nonzero(as_tuple=True)[0].cpu().numpy() + 1
@@ -202,6 +225,29 @@ def prediction_to_tag(prediction, tag_dict, class_num):
     return general, character, artist, date, rating
 def process_image(image):
     try:
         image = image.convert('RGBA')
@@ -227,10 +273,17 @@ def process_image(image):
         character_ = prediction_to_tag(character_prediction, character_dict, character_class)
         character_tags = character_[1]
         artist_prediction = mlp_artist(embedding)
         artist_ = prediction_to_tag(artist_prediction, artist_dict, artist_class)
         artist_tags = artist_[2]
         date = artist_[3]
     combined_tags = {**general_tags}

         x = self.sigmoid(x)
         return x
+class MLP_Retrieval(nn.Module):
+    def __init__(self, input_size, class_num):
+        super().__init__()
+        self.mlp_layer0 = nn.Sequential(
+            nn.Linear(input_size, input_size // 2),
+            nn.SiLU()
+        )
+        self.mlp_layer1 = nn.Linear(input_size // 2, class_num)
+    def forward(self, x):
+        x = self.mlp_layer0(x)
+        x = self.mlp_layer1(x)
+        x1, x2 = x[:, :15], x[:, 15:]
+        x1 = torch.softmax(x1, dim=1)
+        x2 = torch.softmax(x2, dim=1)
+        x = torch.cat([x1, x2], dim=1)
+        return x
 with open(os.path.join(repo_dir, 'general_tag_dict.json'), 'r', encoding='utf-8') as f:
     general_dict = json.load(f)
 mlp_artist.load_state_dict(torch.load(os.path.join(repo_dir, "cls_predictor_artist.pth"), map_location=device, weights_only=True))
 mlp_artist.to(device).to(dtype).eval()
+mlp_artist_retrieval = MLP_Retrieval(2048, artist_class)
+mlp_artist_retrieval.load_state_dict(torch.load(os.path.join(repo_dir, "cls_predictor_artist_retrieval.pth"), map_location=device, weights_only=True))
+mlp_artist_retrieval.to(device).to(dtype).eval()
 def prediction_to_tag(prediction, tag_dict, class_num):
     prediction = prediction.view(class_num)
     predicted_ids = (prediction >= 0.2).nonzero(as_tuple=True)[0].cpu().numpy() + 1
     return general, character, artist, date, rating
+def prediction_to_retrieval(prediction, tag_dict, class_num, top_k):
+    prediction = prediction.view(class_num)
+    predicted_ids = (prediction>=0.005).nonzero(as_tuple=True)[0].cpu().numpy() + 1
+    artist = {}
+    date = {}
+    for tag, value in tag_dict.items():
+        if value[2] in predicted_ids:
+            tag_value = round(prediction[value[2] - 1].item(), 6)
+            if value[1] == "artist":
+                artist[tag] = tag_value
+            elif value[1] == "date":
+                date[tag] = tag_value
+    artist = dict(sorted(artist.items(), key=lambda item: item[1], reverse=True))
+    artist = dict(list(artist.items())[:top_k])
+    if date:
+        date = {max(date, key=date.get): date[max(date, key=date.get)]}
+    return artist, date
 def process_image(image):
     try:
         image = image.convert('RGBA')
         character_ = prediction_to_tag(character_prediction, character_dict, character_class)
         character_tags = character_[1]
+        """
         artist_prediction = mlp_artist(embedding)
         artist_ = prediction_to_tag(artist_prediction, artist_dict, artist_class)
         artist_tags = artist_[2]
         date = artist_[3]
+        """
+        artist_retrieval_prediction = mlp_artist_retrieval(embedding)
+        artist_retrieval_ = prediction_to_retrieval(artist_retrieval_prediction, artist_dict, artist_class, 10)
+        artist_tags = artist_retrieval_[0]
+        date = artist_retrieval_[1]
     combined_tags = {**general_tags}