| | --- |
| | license: apache-2.0 |
| | language: |
| | - en |
| | metrics: |
| | - bleu |
| | - rouge |
| | - meteor |
| | - exact_match |
| | base_model: |
| | - QizhiPei/biot5-plus-base |
| | pipeline_tag: text-generation |
| | library_name: transformers |
| | --- |
| | # Model Card for ChemAligner-T5-Pro |
| |
|
| |
|
| | ## How to Get Started with the Model |
| |
|
| | Below is an example of how to load and generate outputs with this model: |
| |
|
| | ```python |
| | import torch |
| | import transformers |
| | from huggingface_hub import login |
| | from transformers import AutoTokenizer |
| | from transformers.models.t5 import T5ForConditionalGeneration |
| | import torch |
| | |
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| | |
| | tokenizer = AutoTokenizer.from_pretrained("Neeze/ChemAligner-T5-Pro") |
| | model = T5ForConditionalGeneration.from_pretrained("Neeze/ChemAligner-T5-Pro").to(device) |
| | |
| | sample_caption = ( |
| | "The molecule is a energy storage and a fat storage, which impacts cardiovascular " |
| | "disease, cancer, and metabolic syndrome, and is characterized as thyroxine treatment. " |
| | "The molecule is a membrane stabilizer and inflammatory, and it impacts pancreatitis. " |
| | "The molecule is a energy source and a nutrient, impacting both obesity and atherosclerosis." |
| | ) |
| | |
| | task_input = ( |
| | f"Task: Translate description to SELFIES representation.\n" |
| | f"Input: {sample_caption}\n" |
| | f"Output:" |
| | ) |
| | |
| | inputs = tokenizer( |
| | task_input, |
| | return_tensors="pt", |
| | truncation=True, |
| | max_length=512, |
| | ).to(device) |
| | |
| | with torch.no_grad(): |
| | outputs = model.generate( |
| | **inputs, |
| | max_length=512, |
| | num_beams=4, |
| | do_sample=False, |
| | early_stopping= True, |
| | temperature=1.0, |
| | no_repeat_ngram_size=0, |
| | length_penalty=1.0, |
| | decoder_start_token_id=0, |
| | eos_token_id=1, |
| | pad_token_id=0 |
| | ) |
| | |
| | outputs = [ |
| | s.replace("<unk>", "").replace("<pad>", "").replace("</s>", "").strip() |
| | for s in tokenizer.batch_decode(outputs) |
| | ] |
| | |
| | print(*outputs) |
| | |
| | ``` |
| |
|
| | ```bib |
| | @inproceedings{Phan2026ChemAlignerT5, |
| | title = {ChemAligner-T5: A Unified Text-to-Molecule Model via Representation Alignment}, |
| | author = {Nam, Van Hai Phan and |
| | Khoa, Minh Nguyen and |
| | Phu, Nguyen Ngoc Thien and |
| | Nguyen, Doan Hieu Nguyen and |
| | Tri, Minh Pham and |
| | Duc, Dang Ngoc Minh}, |
| | booktitle = {Proceedings of the 2nd International Conference on Computational Intelligence in Engineering Science}, |
| | year = {2026}, |
| | month = apr, |
| | address = {Nha Trang, Khanh Hoa, Vietnam} |
| | } |
| | ``` |
| |
|
| |
|