michael-0acf4 commited on
Commit
54a6afb
·
2 Parent(s): d92c814721ccc4

Merge branch 'main' of https://huggingface.co/michael-0acf4/anitag2vec

Browse files
Files changed (1) hide show
  1. README.md +46 -44
README.md CHANGED
@@ -1,44 +1,46 @@
1
- ---
2
- license: mit
3
- ---
4
-
5
- # AniTag2Vec
6
-
7
- Training and inference examples are all available on [my github](https://github.com/michael-0acf4/anitag2vec).
8
-
9
- Implementation is detailed in [this blog post](https://blog.afmichael.dev/posts/2026/set-embeddings-and-anitag2vec/).
10
-
11
- ```python
12
- TOKENIZER_PATH = "./checkpoints/token_dataset_c7359727bcee4f8b_vocab_size_5000_freq_3.json"
13
- CONFIG_PATH = "./checkpoints/setup_params_8ea07c7d34b64b69_c7359727bcee4f8b.json"
14
- MODEL_PATH = "./checkpoints/anitag2vec_8ea07c7d34b64b69_c7359727bcee4f8b_i128_e20_s60203_b256_p1871744.pth"
15
-
16
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
- cfg = SetupConfig.load_from_file(CONFIG_PATH)
18
- print(cfg)
19
- tagtok = TagBPETokenizer(vocab_size=cfg.HYPERP_TAGTOK_VOCAB_SIZE, min_frequency=cfg.HYPERP_TAGTOK_MIN_FREQ)
20
- tagtok.load(TOKENIZER_PATH)
21
-
22
- anitag2vec = AniTag2Vec(
23
- vocab_size=tagtok.vocab_size,
24
- max_len_cut=cfg.HYPERP_TAGTOK_MAX_TOKEN_CLAMP,
25
- d_model=cfg.HYPERP_TRANSFORMER_D_MODEL,
26
- n_heads=cfg.HYPERP_TRANSFORMER_N_HEADS,
27
- n_layers=cfg.HYPERP_TRANSFORMER_N_LAYERS,
28
- output_emb=cfg.HYPERP_OUTPUT_EMB,
29
- )
30
- anitag2vec.to(device)
31
- anitag2vec.load_state_dict(torch.load(MODEL_PATH))
32
- anitag2vec.eval()
33
- runner = AniTag2VecRunner(tagtok, anitag2vec)
34
-
35
- # Inference
36
- def compare(a: str, b: str):
37
- ax = runner.run_inference_human([a])
38
- bx = runner.run_inference_human([b])
39
- howmuch = ((F.normalize(ax) @ F.normalize(bx).T).item())
40
- print(f"{howmuch:.2f}: '{a}' vs '{b}'")
41
-
42
- compare("#1girl #1boy", "#1boy #1girl")
43
- # 1.00: '#1girl #1boy' vs '#1boy #1girl'
44
- ```
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
4
+
5
+ # AniTag2Vec
6
+
7
+ Generate vector embeddings from Danbooru, Sakugabooru, Pixiv, MAL style tags.
8
+
9
+ Training and inference examples are all available on [my github](https://github.com/michael-0acf4/anitag2vec).
10
+
11
+ Implementation is detailed in [this blog post](https://blog.afmichael.dev/posts/2026/set-embeddings-and-anitag2vec/).
12
+
13
+ ```python
14
+ TOKENIZER_PATH = "./checkpoints/token_dataset_c7359727bcee4f8b_vocab_size_5000_freq_3.json"
15
+ CONFIG_PATH = "./checkpoints/setup_params_8ea07c7d34b64b69_c7359727bcee4f8b.json"
16
+ MODEL_PATH = "./checkpoints/anitag2vec_8ea07c7d34b64b69_c7359727bcee4f8b_i128_e20_s60203_b256_p1871744.pth"
17
+
18
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
+ cfg = SetupConfig.load_from_file(CONFIG_PATH)
20
+ print(cfg)
21
+ tagtok = TagBPETokenizer(vocab_size=cfg.HYPERP_TAGTOK_VOCAB_SIZE, min_frequency=cfg.HYPERP_TAGTOK_MIN_FREQ)
22
+ tagtok.load(TOKENIZER_PATH)
23
+
24
+ anitag2vec = AniTag2Vec(
25
+ vocab_size=tagtok.vocab_size,
26
+ max_len_cut=cfg.HYPERP_TAGTOK_MAX_TOKEN_CLAMP,
27
+ d_model=cfg.HYPERP_TRANSFORMER_D_MODEL,
28
+ n_heads=cfg.HYPERP_TRANSFORMER_N_HEADS,
29
+ n_layers=cfg.HYPERP_TRANSFORMER_N_LAYERS,
30
+ output_emb=cfg.HYPERP_OUTPUT_EMB,
31
+ )
32
+ anitag2vec.to(device)
33
+ anitag2vec.load_state_dict(torch.load(MODEL_PATH))
34
+ anitag2vec.eval()
35
+ runner = AniTag2VecRunner(tagtok, anitag2vec)
36
+
37
+ # Inference
38
+ def compare(a: str, b: str):
39
+ ax = runner.run_inference_human([a])
40
+ bx = runner.run_inference_human([b])
41
+ howmuch = ((F.normalize(ax) @ F.normalize(bx).T).item())
42
+ print(f"{howmuch:.2f}: '{a}' vs '{b}'")
43
+
44
+ compare("#1girl #1boy", "#1boy #1girl")
45
+ # 1.00: '#1girl #1boy' vs '#1boy #1girl'
46
+ ```