Finalize Bertose IAR resolver release names
Browse files
README.md
CHANGED
|
@@ -10,29 +10,29 @@ tags:
|
|
| 10 |
- pytorch
|
| 11 |
---
|
| 12 |
|
| 13 |
-
# Bertose IAR
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
This repository contains the contrastive Bertose checkpoint used to score ambiguous WURCS BPE tokens and support iterative ambiguity resolution.
|
| 18 |
|
| 19 |
## Files
|
| 20 |
|
| 21 |
-
- `checkpoints/
|
| 22 |
- `vocab/bpe_vocabulary.json` - WURCS BPE vocabulary.
|
| 23 |
-
- `vocab/bpe_ambiguity_tokens.json` - ambiguous
|
| 24 |
-
- `src/
|
| 25 |
-
- `src/
|
| 26 |
- `src/wurcs_bpe_tokenizer.py` - WURCS BPE tokenizer.
|
| 27 |
|
| 28 |
-
##
|
| 29 |
|
| 30 |
-
|
| 31 |
|
| 32 |
## Output
|
| 33 |
|
| 34 |
Token-level ambiguity-resolution predictions with confidence scores. The companion notebook writes both summary and detail CSVs for batch runs.
|
| 35 |
|
| 36 |
-
##
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
|
|
|
|
| 10 |
- pytorch
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# Bertose IAR Resolver
|
| 14 |
|
| 15 |
+
This repository contains the contrastively refined Bertose checkpoint used for iterative ambiguity resolution (IAR) over ambiguous WURCS BPE tokens.
|
|
|
|
|
|
|
| 16 |
|
| 17 |
## Files
|
| 18 |
|
| 19 |
+
- `checkpoints/bertose_iar_resolver.pt` - Bertose IAR checkpoint.
|
| 20 |
- `vocab/bpe_vocabulary.json` - WURCS BPE vocabulary.
|
| 21 |
+
- `vocab/bpe_ambiguity_tokens.json` - ambiguous-token map used by the resolver.
|
| 22 |
+
- `src/bertose_model.py` - Bertose model definition.
|
| 23 |
+
- `src/bertose_layers.py` - Transformer layers used by Bertose.
|
| 24 |
- `src/wurcs_bpe_tokenizer.py` - WURCS BPE tokenizer.
|
| 25 |
|
| 26 |
+
## Input
|
| 27 |
|
| 28 |
+
Provide one WURCS glycan string or a CSV batch with a WURCS column. The resolver is intended for glycans that already contain uncertainty markers in WURCS form.
|
| 29 |
|
| 30 |
## Output
|
| 31 |
|
| 32 |
Token-level ambiguity-resolution predictions with confidence scores. The companion notebook writes both summary and detail CSVs for batch runs.
|
| 33 |
|
| 34 |
+
## Scope
|
| 35 |
+
|
| 36 |
+
The resolver provides model-backed token updates and confidence values for ambiguous positions. It does not claim to reconstruct a final canonical WURCS string by itself, and it does not perform IUPAC-condensed to WURCS conversion.
|
| 37 |
|
| 38 |
+
The final license and citation text should be set before public release.
|
SHA256SUMS
CHANGED
|
@@ -1,34 +1,10 @@
|
|
| 1 |
-
684888c0ebb17f374298b65ee2807526c066094c701bcc7ebbe1c1095f494fc1 ./.cache/huggingface/.gitignore
|
| 2 |
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ./.cache/huggingface/upload/.gitattributes.lock
|
| 3 |
-
3098e38608a2c2375ac1f78d4c4f52680796f4ff9c0dbaad6b4f0b110fbc7fc3 ./.cache/huggingface/upload/.gitattributes.metadata
|
| 4 |
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ./.cache/huggingface/upload/README.md.lock
|
| 5 |
-
ecc75cccadd48cf2cc8d22daec846b6a760f492162ca145c4cfef3536dafcc2a ./.cache/huggingface/upload/README.md.metadata
|
| 6 |
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ./.cache/huggingface/upload/SHA256SUMS.lock
|
| 7 |
-
aa2c2e921401dba265bdd190a662861cffd8ff05eaf6ae45a96a25385bd6c5e4 ./.cache/huggingface/upload/SHA256SUMS.metadata
|
| 8 |
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ./.cache/huggingface/upload/checkpoints/best_v51_contrastive_model.pt.lock
|
| 9 |
-
0bc5904fe02b6a64df35829729c29d40f0c0a795d586b10d844fbee91e6fa0e7 ./.cache/huggingface/upload/checkpoints/best_v51_contrastive_model.pt.metadata
|
| 10 |
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ./.cache/huggingface/upload/config.json.lock
|
| 11 |
-
9370200adedd2172ffd8459528e7fd47c5913bf9e791f5b731b0e16121ca3ebf ./.cache/huggingface/upload/config.json.metadata
|
| 12 |
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ./.cache/huggingface/upload/requirements.txt.lock
|
| 13 |
-
fef169fb7e8af9c14c21240bb9034cd567bd18dc327ab39423d68ba3b2ee413a ./.cache/huggingface/upload/requirements.txt.metadata
|
| 14 |
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ./.cache/huggingface/upload/src/multimodal_glycan_bert_v3.py.lock
|
| 15 |
-
65dcbe6e66d8bba618e4d22209bd2e83b73b5de767b892c1bbd43db1c9326f42 ./.cache/huggingface/upload/src/multimodal_glycan_bert_v3.py.metadata
|
| 16 |
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ./.cache/huggingface/upload/src/wurcs_bpe_tokenizer.py.lock
|
| 17 |
-
28ca0e31a94c80afc124627b62a574125270a5f269bdff012fd36b465578dc82 ./.cache/huggingface/upload/src/wurcs_bpe_tokenizer.py.metadata
|
| 18 |
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ./.cache/huggingface/upload/vocab/bpe_ambiguity_tokens.json.lock
|
| 19 |
-
eb200fe67e613751c0571950e9a7f22f9f44fde0f85b73a40d392189a203f465 ./.cache/huggingface/upload/vocab/bpe_ambiguity_tokens.json.metadata
|
| 20 |
-
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 ./.cache/huggingface/upload/vocab/bpe_vocabulary.json.lock
|
| 21 |
-
c00560217b399adfb341aacc38053299c7d4b33b4229e89e68275cd454bb7f5b ./.cache/huggingface/upload/vocab/bpe_vocabulary.json.metadata
|
| 22 |
622368f62c23e97e9137c277eaadcc93ee3901cbb420b591422bb1c2e19689a5 ./.gitattributes
|
| 23 |
-
|
| 24 |
-
ae468f4e8c06dc0c3848138a474dc43249aa6d14dfd0df8f58d68fcaad371152 ./checkpoints/
|
| 25 |
-
|
| 26 |
6a56e6f73b8f874470ecde6e538f3f5029ae23aa6c10559817d1c2a8b59b7c0f ./requirements.txt
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
62259d1fe3d8736e57cadf8ce5a8bf24a7b73368d4d653c2e0d56ac94b94fe76 ./src/__pycache__/wurcs_bpe_tokenizer.cpython-312.pyc
|
| 30 |
-
b69f14c9976951325e3a0a4e8107a16126e67d410e966650f513f1f538a732bb ./src/glycan_bert.py
|
| 31 |
-
0d9ce16bf90242f38621d64cd974ea5679bff4c2013bea8d7bffe1b8dd120794 ./src/multimodal_glycan_bert_v3.py
|
| 32 |
0bc54399362945601bcfd403441fc80968d173200dd0561f57568b2053a94839 ./src/wurcs_bpe_tokenizer.py
|
| 33 |
ae6ab1ee4f2be992099ee5766de073954c74ccb005c490179cc70418c587c5b7 ./vocab/bpe_ambiguity_tokens.json
|
| 34 |
6a572afdf53f1494ab96c896876b824ca7ea749777352606aa9f96bf270ceecc ./vocab/bpe_vocabulary.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
622368f62c23e97e9137c277eaadcc93ee3901cbb420b591422bb1c2e19689a5 ./.gitattributes
|
| 2 |
+
06de01e215e940f2633d47561e541b6574fe2c4366ffec9ef3014eef66fa6b6b ./README.md
|
| 3 |
+
ae468f4e8c06dc0c3848138a474dc43249aa6d14dfd0df8f58d68fcaad371152 ./checkpoints/bertose_iar_resolver.pt
|
| 4 |
+
99dc07803204bf4d8e9bc6f7d04e18204e9dbc6582ef3582904a80371cff09a8 ./config.json
|
| 5 |
6a56e6f73b8f874470ecde6e538f3f5029ae23aa6c10559817d1c2a8b59b7c0f ./requirements.txt
|
| 6 |
+
b69f14c9976951325e3a0a4e8107a16126e67d410e966650f513f1f538a732bb ./src/bertose_layers.py
|
| 7 |
+
f247a6c09132a61cb649acfe022b269b5b94c37a5069fcb62045f3340b96b191 ./src/bertose_model.py
|
|
|
|
|
|
|
|
|
|
| 8 |
0bc54399362945601bcfd403441fc80968d173200dd0561f57568b2053a94839 ./src/wurcs_bpe_tokenizer.py
|
| 9 |
ae6ab1ee4f2be992099ee5766de073954c74ccb005c490179cc70418c587c5b7 ./vocab/bpe_ambiguity_tokens.json
|
| 10 |
6a572afdf53f1494ab96c896876b824ca7ea749777352606aa9f96bf270ceecc ./vocab/bpe_vocabulary.json
|
checkpoints/{best_v51_contrastive_model.pt → bertose_iar_resolver.pt}
RENAMED
|
File without changes
|
config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"model_family": "Bertose",
|
| 3 |
-
"release_name": "bertose-iar-
|
| 4 |
-
"checkpoint": "checkpoints/
|
| 5 |
"vocabulary": "vocab/bpe_vocabulary.json",
|
| 6 |
"ambiguity_tokens": "vocab/bpe_ambiguity_tokens.json",
|
| 7 |
"embedding_dim": 768,
|
|
|
|
| 1 |
{
|
| 2 |
"model_family": "Bertose",
|
| 3 |
+
"release_name": "bertose-iar-resolver",
|
| 4 |
+
"checkpoint": "checkpoints/bertose_iar_resolver.pt",
|
| 5 |
"vocabulary": "vocab/bpe_vocabulary.json",
|
| 6 |
"ambiguity_tokens": "vocab/bpe_ambiguity_tokens.json",
|
| 7 |
"embedding_dim": 768,
|
src/{glycan_bert.py → bertose_layers.py}
RENAMED
|
File without changes
|
src/{multimodal_glycan_bert_v3.py → bertose_model.py}
RENAMED
|
@@ -15,9 +15,9 @@ from typing import Dict, Optional, Tuple
|
|
| 15 |
import math
|
| 16 |
|
| 17 |
try:
|
| 18 |
-
from .
|
| 19 |
except ImportError:
|
| 20 |
-
from
|
| 21 |
|
| 22 |
|
| 23 |
class ConvGlycanBERTEmbeddings(nn.Module):
|
|
|
|
| 15 |
import math
|
| 16 |
|
| 17 |
try:
|
| 18 |
+
from .bertose_layers import GlycanBERTConfig, GlycanBERTEmbeddings, GlycanBERTLayer
|
| 19 |
except ImportError:
|
| 20 |
+
from bertose_layers import GlycanBERTConfig, GlycanBERTEmbeddings, GlycanBERTLayer
|
| 21 |
|
| 22 |
|
| 23 |
class ConvGlycanBERTEmbeddings(nn.Module):
|