Spaces:

Johnny-Z
/

dan_tagger

Running

App Files Files Community

Johnny-Z commited on Dec 16, 2025

Commit

1aee7bf

verified ·

1 Parent(s): d52be25

Upload app.py

Browse files

Files changed (1) hide show

app.py +29 -56

app.py CHANGED Viewed

@@ -10,41 +10,44 @@ from huggingface_hub import login, snapshot_download
 TITLE = "Danbooru Tagger"
 DESCRIPTION = """
 ## Dataset
-- Source: Cleaned Danbooru
-## Metrics
 - Validation Split: 10% of Dataset
-- Validation Results:
 ### General
 | Metric          | Value       |
 |-----------------|-------------|
-| Macro F1        | 0.4678      |
-| Macro Precision | 0.4605      |
-| Macro Recall    | 0.5229      |
-| Micro F1        | 0.6661      |
-| Micro Precision | 0.6049      |
-| Micro Recall    | 0.7411      |
 ### Character
 | Metric          | Value       |
 |-----------------|-------------|
-| Macro F1        | 0.8925      |
-| Macro Precision | 0.9099      |
-| Macro Recall    | 0.8935      |
-| Micro F1        | 0.9232      |
-| Micro Precision | 0.9264      |
-| Micro Recall    | 0.9199      |
 ### Artist
 | Metric          | Value       |
 |-----------------|-------------|
-| Macro F1        | 0.7904      |
-| Macro Precision | 0.8286      |
-| Macro Recall    | 0.7904      |
-| Micro F1        | 0.5989      |
-| Micro Precision | 0.5975      |
-| Micro Recall    | 0.6004      |
 """
 kaomojis = [
@@ -78,7 +81,7 @@ if hf_token:
 else:
     raise ValueError("environment variable HF_TOKEN not found.")
-repo_id = "Johnny-Z/vit-e4"
 repo_dir = snapshot_download(repo_id)
 model = AutoModel.from_pretrained(repo_id, dtype=dtype, trust_remote_code=True, device_map=device)
@@ -127,25 +130,6 @@ class MLP(nn.Module):
         x = self.sigmoid(x)
         return x
-class MLP_Retrieval(nn.Module):
-    def __init__(self, input_size, class_num):
-        super().__init__()
-        self.mlp_layer0 = nn.Sequential(
-            nn.Linear(input_size, input_size // 2),
-            nn.SiLU()
-        )
-        self.mlp_layer1 = nn.Linear(input_size // 2, class_num)
-    def forward(self, x):
-        x = self.mlp_layer0(x)
-        x = self.mlp_layer1(x)
-        x1, x2 = x[:, :15], x[:, 15:]
-        x1 = torch.softmax(x1, dim=1)
-        x2 = torch.softmax(x2, dim=1)
-        x = torch.cat([x1, x2], dim=1)
-        return x
 with open(os.path.join(repo_dir, 'general_tag_dict.json'), 'r', encoding='utf-8') as f:
     general_dict = json.load(f)
@@ -171,25 +155,21 @@ model_map = MultiheadAttentionPoolingHead(2048)
 model_map.load_state_dict(torch.load(os.path.join(repo_dir, "map_head.pth"), map_location=device, weights_only=True))
 model_map.to(device).to(dtype).eval()
-general_class = 9775
 mlp_general = MLP(2048, general_class)
 mlp_general.load_state_dict(torch.load(os.path.join(repo_dir, "cls_predictor_general.pth"), map_location=device, weights_only=True))
 mlp_general.to(device).to(dtype).eval()
-character_class = 7568
 mlp_character = MLP(2048, character_class)
 mlp_character.load_state_dict(torch.load(os.path.join(repo_dir, "cls_predictor_character.pth"), map_location=device, weights_only=True))
 mlp_character.to(device).to(dtype).eval()
-artist_class = 13957
 mlp_artist = MLP(2048, artist_class)
 mlp_artist.load_state_dict(torch.load(os.path.join(repo_dir, "cls_predictor_artist.pth"), map_location=device, weights_only=True))
 mlp_artist.to(device).to(dtype).eval()
-mlp_artist_retrieval = MLP_Retrieval(2048, artist_class)
-mlp_artist_retrieval.load_state_dict(torch.load(os.path.join(repo_dir, "cls_predictor_artist_retrieval.pth"), map_location=device, weights_only=True))
-mlp_artist_retrieval.to(device).to(dtype).eval()
 def prediction_to_tag(prediction, tag_dict, class_num):
     prediction = prediction.view(class_num)
     predicted_ids = (prediction >= 0.2).nonzero(as_tuple=True)[0].cpu().numpy() + 1
@@ -273,17 +253,10 @@ def process_image(image):
         character_ = prediction_to_tag(character_prediction, character_dict, character_class)
         character_tags = character_[1]
-        """
         artist_prediction = mlp_artist(embedding)
         artist_ = prediction_to_tag(artist_prediction, artist_dict, artist_class)
         artist_tags = artist_[2]
         date = artist_[3]
-        """
-        artist_retrieval_prediction = mlp_artist_retrieval(embedding)
-        artist_retrieval_ = prediction_to_retrieval(artist_retrieval_prediction, artist_dict, artist_class, 10)
-        artist_tags = artist_retrieval_[0]
-        date = artist_retrieval_[1]
     combined_tags = {**general_tags}

 TITLE = "Danbooru Tagger"
 DESCRIPTION = """
 ## Dataset
+- Source: Danbooru
+- Cutoff Date: 2025-11-27
 - Validation Split: 10% of Dataset
+## Validation Results
 ### General
+Tags Count: 11046
 | Metric          | Value       |
 |-----------------|-------------|
+| Macro F1        | 0.4439      |
+| Macro Precision | 0.4168      |
+| Macro Recall    | 0.4964      |
+| Micro F1        | 0.6595      |
+| Micro Precision | 0.5982      |
+| Micro Recall    | 0.7349      |
 ### Character
+Tags Count: 9148
 | Metric          | Value       |
 |-----------------|-------------|
+| Macro F1        | 0.8646      |
+| Macro Precision | 0.8897      |
+| Macro Recall    | 0.8492      |
+| Micro F1        | 0.9092      |
+| Micro Precision | 0.9195      |
+| Micro Recall    | 0.8991      |
 ### Artist
+Tags Count: 17171
 | Metric          | Value       |
 |-----------------|-------------|
+| Macro F1        | 0.8008      |
+| Macro Precision | 0.8669      |
+| Macro Recall    | 0.7641      |
+| Micro F1        | 0.8596      |
+| Micro Precision | 0.8948      |
+| Micro Recall    | 0.8271      |
 """
 kaomojis = [
 else:
     raise ValueError("environment variable HF_TOKEN not found.")
+repo_id = "Johnny-Z/danbooru_vfm"
 repo_dir = snapshot_download(repo_id)
 model = AutoModel.from_pretrained(repo_id, dtype=dtype, trust_remote_code=True, device_map=device)
         x = self.sigmoid(x)
         return x
 with open(os.path.join(repo_dir, 'general_tag_dict.json'), 'r', encoding='utf-8') as f:
     general_dict = json.load(f)
 model_map.load_state_dict(torch.load(os.path.join(repo_dir, "map_head.pth"), map_location=device, weights_only=True))
 model_map.to(device).to(dtype).eval()
+general_class = 11046
 mlp_general = MLP(2048, general_class)
 mlp_general.load_state_dict(torch.load(os.path.join(repo_dir, "cls_predictor_general.pth"), map_location=device, weights_only=True))
 mlp_general.to(device).to(dtype).eval()
+character_class = 9148
 mlp_character = MLP(2048, character_class)
 mlp_character.load_state_dict(torch.load(os.path.join(repo_dir, "cls_predictor_character.pth"), map_location=device, weights_only=True))
 mlp_character.to(device).to(dtype).eval()
+artist_class = 17171
 mlp_artist = MLP(2048, artist_class)
 mlp_artist.load_state_dict(torch.load(os.path.join(repo_dir, "cls_predictor_artist.pth"), map_location=device, weights_only=True))
 mlp_artist.to(device).to(dtype).eval()
 def prediction_to_tag(prediction, tag_dict, class_num):
     prediction = prediction.view(class_num)
     predicted_ids = (prediction >= 0.2).nonzero(as_tuple=True)[0].cpu().numpy() + 1
         character_ = prediction_to_tag(character_prediction, character_dict, character_class)
         character_tags = character_[1]
         artist_prediction = mlp_artist(embedding)
         artist_ = prediction_to_tag(artist_prediction, artist_dict, artist_class)
         artist_tags = artist_[2]
         date = artist_[3]
     combined_tags = {**general_tags}