Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- Glydentify.py +501 -0
- requirements.txt +329 -0
Glydentify.py
ADDED
|
@@ -0,0 +1,501 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from IPython.display import clear_output
|
| 3 |
+
import torch
|
| 4 |
+
from transformers import EsmForSequenceClassification, AdamW, AutoTokenizer
|
| 5 |
+
from torch.utils.data import DataLoader, TensorDataset, random_split
|
| 6 |
+
from sklearn.preprocessing import LabelEncoder
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
import numpy as np
|
| 9 |
+
import seaborn as sns
|
| 10 |
+
from sklearn.model_selection import train_test_split
|
| 11 |
+
import matplotlib.pyplot as plt
|
| 12 |
+
import pickle
|
| 13 |
+
import torch.nn.functional as F
|
| 14 |
+
import gradio as gr
|
| 15 |
+
import io
|
| 16 |
+
from PIL import Image
|
| 17 |
+
import Bio
|
| 18 |
+
from Bio import SeqIO
|
| 19 |
+
import zipfile
|
| 20 |
+
import os
|
| 21 |
+
|
| 22 |
+
# Load the model from the file
|
| 23 |
+
with open('family_labels.pkl', 'rb') as filefam:
|
| 24 |
+
yfam = pickle.load(filefam)
|
| 25 |
+
|
| 26 |
+
tokenizerfam = AutoTokenizer.from_pretrained("facebook/esm2_t33_650M_UR50D") #facebook/esm2_t33_650M_UR50D
|
| 27 |
+
label_encoderfam = LabelEncoder()
|
| 28 |
+
encoded_labelsfam = label_encoderfam.fit_transform(yfam)
|
| 29 |
+
labelsfam = torch.tensor(encoded_labelsfam)
|
| 30 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 31 |
+
device
|
| 32 |
+
|
| 33 |
+
modelfam = EsmForSequenceClassification.from_pretrained("facebook/esm2_t33_650M_UR50D", num_labels=len(set(labelsfam.tolist())))
|
| 34 |
+
modelfam = modelfam.to('cpu')
|
| 35 |
+
|
| 36 |
+
modelfam.load_state_dict(torch.load("/home/aarya/Documents/paper3/model_650M.pth"))
|
| 37 |
+
modelfam.eval()
|
| 38 |
+
|
| 39 |
+
x_testfam = ["""MAEVLRTLAGKPKCHALRPMILFLIMLVLVLFGYGVLSPRSLMPGSLERGFCMAVREPDH
|
| 40 |
+
LQRVSLPRMVYPQPKVLTPCRKDVLVVTPWLAPIVWEGTFNIDILNEQFRLQNTTIGLTV
|
| 41 |
+
FAIKKYVAFLKLFLETAEKHFMVGHRVHYYVFTDQPAAVPRVTLGTGRQLSVLEVRAYKR
|
| 42 |
+
WQDVSMRRMEMISDFCERRFLSEVDYLVCVDVDMEFRDHVGVEILTPLFGTLHPGFYGSS
|
| 43 |
+
REAFTYERRPQSQAYIPKDEGDFYYLGGFFGGSVQEVQRLTRACHQAMMVDQANGIEAVW
|
| 44 |
+
HDESHLNKYLLRHKPTKVLSPEYLWDQQLLGWPAVLRKLRFTAVPKNHQAVRNP
|
| 45 |
+
"""]
|
| 46 |
+
|
| 47 |
+
encoded_inputfam = tokenizerfam(x_testfam, padding=True, truncation=True, max_length=512, return_tensors="pt")
|
| 48 |
+
input_idsfam = encoded_inputfam["input_ids"]
|
| 49 |
+
attention_maskfam = encoded_inputfam["attention_mask"]
|
| 50 |
+
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
outputfam = modelfam(input_idsfam, attention_mask=attention_maskfam)
|
| 53 |
+
logitsfam = outputfam.logits
|
| 54 |
+
probabilitiesfam = F.softmax(logitsfam, dim=1)
|
| 55 |
+
_, predicted_labelsfam = torch.max(logitsfam, dim=1)
|
| 56 |
+
probabilitiesfam[0]
|
| 57 |
+
|
| 58 |
+
decoded_labelsfam = label_encoderfam.inverse_transform(predicted_labelsfam.tolist())
|
| 59 |
+
decoded_labelsfam
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
#Load donor model from file
|
| 64 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t12_35M_UR50D")
|
| 65 |
+
|
| 66 |
+
with open('donorslabels.pkl', 'rb') as file:
|
| 67 |
+
label_encoder = pickle.load(file)
|
| 68 |
+
|
| 69 |
+
# encoded_labels = label_encoder.fit(y)
|
| 70 |
+
# labels = torch.tensor(encoded_labels)
|
| 71 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 72 |
+
device
|
| 73 |
+
|
| 74 |
+
model = EsmForSequenceClassification.from_pretrained("facebook/esm2_t12_35M_UR50D", num_labels=len(label_encoder.classes_))
|
| 75 |
+
model = model.to('cpu')
|
| 76 |
+
|
| 77 |
+
model.load_state_dict(torch.load("best_model_35M_t12_5v5.pth")) #model_best_35v2M.pth
|
| 78 |
+
model.eval()
|
| 79 |
+
|
| 80 |
+
x_test = ["""MAEVLRTLAGKPKCHALRPMILFLIMLVLVLFGYGVLSPRSLMPGSLERGFCMAVREPDH
|
| 81 |
+
LQRVSLPRMVYPQPKVLTPCRKDVLVVTPWLAPIVWEGTFNIDILNEQFRLQNTTIGLTV
|
| 82 |
+
FAIKKYVAFLKLFLETAEKHFMVGHRVHYYVFTDQPAAVPRVTLGTGRQLSVLEVRAYKR
|
| 83 |
+
WQDVSMRRMEMISDFCERRFLSEVDYLVCVDVDMEFRDHVGVEILTPLFGTLHPGFYGSS
|
| 84 |
+
REAFTYERRPQSQAYIPKDEGDFYYLGGFFGGSVQEVQRLTRACHQAMMVDQANGIEAVW
|
| 85 |
+
HDESHLNKYLLRHKPTKVLSPEYLWDQQLLGWPAVLRKLRFTAVPKNHQAVRNP
|
| 86 |
+
"""]
|
| 87 |
+
|
| 88 |
+
encoded_input = tokenizer(x_test, padding=True, truncation=True, max_length=512, return_tensors="pt")
|
| 89 |
+
input_ids = encoded_input["input_ids"]
|
| 90 |
+
attention_mask = encoded_input["attention_mask"]
|
| 91 |
+
|
| 92 |
+
with torch.no_grad():
|
| 93 |
+
output = model(input_ids, attention_mask=attention_mask)
|
| 94 |
+
logits = output.logits
|
| 95 |
+
probabilities = F.softmax(logits, dim=1)
|
| 96 |
+
_, predicted_labels = torch.max(logits, dim=1)
|
| 97 |
+
probabilities[0]
|
| 98 |
+
|
| 99 |
+
decoded_labels = label_encoder.inverse_transform(predicted_labels.tolist())
|
| 100 |
+
decoded_labels
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
glycosyltransferase_db = {
|
| 104 |
+
"GT31-chsy" : {'CAZy Name': 'GT31', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT31.html'},
|
| 105 |
+
"GT2-CesA2" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '1 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 106 |
+
"GT43-arath" : {'CAZy Name': 'GT43', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT43.html'},
|
| 107 |
+
"GT8-Met1" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT8.html' },
|
| 108 |
+
"GT32-higher" : {'CAZy Name': 'GT32', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT32.html'},
|
| 109 |
+
"GT40" : {'CAZy Name': 'GT40', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT40.html'},
|
| 110 |
+
"GT16" : {'CAZy Name': 'GT16', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '6 ', 'More Info': 'http://www.cazy.org/GT16.html'},
|
| 111 |
+
"GT27" : {'CAZy Name': 'GT27', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '5 ', 'More Info': 'http://www.cazy.org/GT27.html'},
|
| 112 |
+
"GT55" : {'CAZy Name': 'GT55', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '2 ', 'More Info': 'http://www.cazy.org/GT55.html'},
|
| 113 |
+
"GT8-Glycogenin" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT8.html' },
|
| 114 |
+
"GT8-1" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT8.html' },
|
| 115 |
+
"GT25" : {'CAZy Name': 'GT25', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '6 ', 'More Info': 'http://www.cazy.org/GT25.html'},
|
| 116 |
+
"GT2-DPM_like" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '2 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 117 |
+
"GT31-fringe" : {'CAZy Name': 'GT31', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT31.html'},
|
| 118 |
+
"GT2-Bact_puta" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 119 |
+
"GT84" : {'CAZy Name': 'GT84', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '1 ', 'More Info': 'http://www.cazy.org/GT84.html'},
|
| 120 |
+
"GT13" : {'CAZy Name': 'GT13', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '6 ', 'More Info': 'http://www.cazy.org/GT13.html'},
|
| 121 |
+
"GT43-cele" : {'CAZy Name': 'GT43', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT43.html'},
|
| 122 |
+
"GT2-Bact_LPS1" : {'CAZy Name': 'GT92', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 123 |
+
"GT2-Bact_Oant" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 124 |
+
"GT67" : {'CAZy Name': 'GT67', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT67.html'},
|
| 125 |
+
"GT2-HAS" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '1 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 126 |
+
"GT82" : {'CAZy Name': 'GT82', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '7 ', 'More Info': 'http://www.cazy.org/GT82.html'},
|
| 127 |
+
"GT24" : {'CAZy Name': 'GT24', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT24.html'},
|
| 128 |
+
"GT31-plant" : {'CAZy Name': 'GT31', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT31.html'},
|
| 129 |
+
"GT81-Bact" : {'CAZy Name': 'GT81', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '2 ', 'More Info': 'http://www.cazy.org/GT81.html'},
|
| 130 |
+
"GT2-Bact_gt25Me": {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 131 |
+
"GT2-B3GntL" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '4 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 132 |
+
"GT49" : {'CAZy Name': 'GT49', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT49.html'},
|
| 133 |
+
"GT34" : {'CAZy Name': 'GT34', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT34.html'},
|
| 134 |
+
"GT45" : {'CAZy Name': 'GT45', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT45.html'},
|
| 135 |
+
"GT32-lower" : {'CAZy Name': 'GT32', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT32.html'},
|
| 136 |
+
"GT88" : {'CAZy Name': 'GT88', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT88.html'},
|
| 137 |
+
"GT21" : {'CAZy Name': 'GT21', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '1 ', 'More Info': 'http://www.cazy.org/GT21.html'},
|
| 138 |
+
"GT2-DPG_synt" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '2 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 139 |
+
"GT43-b3gat2" : {'CAZy Name': 'GT43', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT43.html'},
|
| 140 |
+
"GT2-Chitin_synt": {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '5 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 141 |
+
"GT8-Bact" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT8.html' },
|
| 142 |
+
"GT8-Met2" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT8.html' },
|
| 143 |
+
"GT2-Bact_Chlor1": {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 144 |
+
"GT54" : {'CAZy Name': 'GT54', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '6 ', 'More Info': 'http://www.cazy.org/GT54.html'},
|
| 145 |
+
"GT2-Cel_bre3" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '1 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 146 |
+
"GT2-Bact_Rham" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 147 |
+
"GT6" : {'CAZy Name': 'GT6 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT6.html' },
|
| 148 |
+
"GT2-Bact_puta2" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 149 |
+
"GT7-1" : {'CAZy Name': 'GT7 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '5 ', 'More Info': 'http://www.cazy.org/GT7.html' },
|
| 150 |
+
"GT2-Csl" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '4 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 151 |
+
"GT2-ExoU" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 152 |
+
"GT2-Csl2" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '4 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 153 |
+
"GT64" : {'CAZy Name': 'GT64', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT64.html'},
|
| 154 |
+
"GT2-Bact_Chlor2": {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 155 |
+
"GT78" : {'CAZy Name': 'GT78', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '2 ', 'More Info': 'http://www.cazy.org/GT78.html'},
|
| 156 |
+
"GT12" : {'CAZy Name': 'GT12', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT12.html'},
|
| 157 |
+
"GT31-gnt" : {'CAZy Name': 'GT31', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT31.html'},
|
| 158 |
+
"GT2-Bact_CHS" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '5 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 159 |
+
"GT62" : {'CAZy Name': 'GT62', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '3 ', 'More Info': 'http://www.cazy.org/GT62.html'},
|
| 160 |
+
"GT8-Met_Pla" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT8.html' },
|
| 161 |
+
"GT15" : {'CAZy Name': 'GT15', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT15.html'},
|
| 162 |
+
"GT43-b3gat1" : {'CAZy Name': 'GT43', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT43.html'},
|
| 163 |
+
"GT31-b3glt" : {'CAZy Name': 'GT31', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '8 ', 'More Info': 'http://www.cazy.org/GT31.html'},
|
| 164 |
+
"GT2-CesA1" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '1 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 165 |
+
"GT60" : {'CAZy Name': 'GT60', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '5 ', 'More Info': 'http://www.cazy.org/GT60.html'},
|
| 166 |
+
"GT14" : {'CAZy Name': 'GT14', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '7 ', 'More Info': 'http://www.cazy.org/GT14.html'},
|
| 167 |
+
"GT2-Bact_DPM_sy": {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '2 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 168 |
+
"GT17" : {'CAZy Name': 'GT17', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '7 ', 'More Info': 'http://www.cazy.org/GT17.html'},
|
| 169 |
+
"GT2-Bact_LPS2" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': '3 ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 170 |
+
"GT77" : {'CAZy Name': 'GT77', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT77.html'},
|
| 171 |
+
"GT2-Bact_EpsO" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': ' ', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 172 |
+
"GT43-b3gat3" : {'CAZy Name': 'GT43', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT43.html'},
|
| 173 |
+
"GT8-Fun" : {'CAZy Name': 'GT8 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Retaining', 'Clade': '9 ', 'More Info': 'http://www.cazy.org/GT8.html' },
|
| 174 |
+
"GT75" : {'CAZy Name': 'GT75', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT75.html'},
|
| 175 |
+
"GT2-Bact_GlfT" : {'CAZy Name': 'GT2 ', 'Alternative Name': '', 'Fold': 'A', 'Mechanism': 'Inverting', 'Clade': 'N/A', 'More Info': 'http://www.cazy.org/GT2.html' },
|
| 176 |
+
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def get_family_info(family_name):
|
| 184 |
+
family_info = glycosyltransferase_db.get(family_name, {})
|
| 185 |
+
# convert information to markdown formatted string
|
| 186 |
+
markdown_text = ""
|
| 187 |
+
for key, value in family_info.items():
|
| 188 |
+
if key == "more_info":
|
| 189 |
+
markdown_text += "**{}:**".format(key.title().replace("_", " ")) + "\n"
|
| 190 |
+
for link in value:
|
| 191 |
+
markdown_text += "[{}]({})\n".format(link, link)
|
| 192 |
+
else:
|
| 193 |
+
markdown_text += "**{}:** {}\n".format(key.title().replace("_", " "), value)
|
| 194 |
+
return markdown_text
|
| 195 |
+
|
| 196 |
+
def fig_to_img(fig):
|
| 197 |
+
"""Converts a matplotlib figure to a PIL Image and returns it"""
|
| 198 |
+
buf = io.BytesIO()
|
| 199 |
+
fig.savefig(buf, format='png', bbox_inches='tight')
|
| 200 |
+
buf.seek(0)
|
| 201 |
+
img = Image.open(buf)
|
| 202 |
+
return img
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def process_family_sequence(protein_fasta):
|
| 206 |
+
lines = protein_fasta.split('\n')
|
| 207 |
+
|
| 208 |
+
headers = [line for line in lines if line.startswith('>')]
|
| 209 |
+
if len(headers) > 1:
|
| 210 |
+
return None, "Multiple fasta sequences detected. Please upload a fasta file with multiple sequences, otherwise only include one fasta sequence.", None
|
| 211 |
+
|
| 212 |
+
protein_sequence = ''.join(line for line in lines if not line.startswith('>'))
|
| 213 |
+
|
| 214 |
+
# Check for invalid characters
|
| 215 |
+
valid_characters = set("ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy") # the 20 standard amino acids
|
| 216 |
+
if not set(protein_sequence).issubset(valid_characters):
|
| 217 |
+
return None, "Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids. Does your sequence contain gaps?", None
|
| 218 |
+
|
| 219 |
+
encoded_input = tokenizer([protein_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
|
| 220 |
+
input_idsfam = encoded_input["input_ids"]
|
| 221 |
+
attention_maskfam = encoded_input["attention_mask"]
|
| 222 |
+
|
| 223 |
+
with torch.no_grad():
|
| 224 |
+
outputfam = modelfam(input_idsfam, attention_mask=attention_maskfam)
|
| 225 |
+
logitsfam = outputfam.logits
|
| 226 |
+
probabilitiesfam = F.softmax(logitsfam, dim=1)
|
| 227 |
+
_, predicted_labelsfam = torch.max(logitsfam, dim=1)
|
| 228 |
+
|
| 229 |
+
decoded_labelsfam = label_encoderfam.inverse_transform(predicted_labelsfam.tolist())
|
| 230 |
+
family_info = get_family_info(decoded_labelsfam[0])
|
| 231 |
+
|
| 232 |
+
figfam = plt.figure(figsize=(10, 5))
|
| 233 |
+
labelsfam = label_encoderfam.classes_
|
| 234 |
+
probabilitiesfam = probabilitiesfam.tolist()
|
| 235 |
+
|
| 236 |
+
# Convert the nested list to a flat list of probabilities
|
| 237 |
+
probabilitiesfam_flat = probabilitiesfam[0] if probabilitiesfam else []
|
| 238 |
+
|
| 239 |
+
# Sort labels and probabilities by probability
|
| 240 |
+
labels_probsfam = list(zip(labelsfam, probabilitiesfam_flat))
|
| 241 |
+
labels_probsfam.sort(key=lambda x: x[1], reverse=True)
|
| 242 |
+
|
| 243 |
+
# Select the top 5 fams
|
| 244 |
+
labels_probs_top5fam = labels_probsfam[:5]
|
| 245 |
+
labels_top5, probabilities_top5 = zip(*labels_probs_top5fam)
|
| 246 |
+
|
| 247 |
+
y_posfam = np.arange(len(labels_top5))
|
| 248 |
+
|
| 249 |
+
plt.barh(y_posfam, [prob*100 for prob in probabilities_top5], align='center', alpha=0.5)
|
| 250 |
+
plt.yticks(y_posfam, labels_top5)
|
| 251 |
+
plt.xlabel('Probability (%)')
|
| 252 |
+
plt.title('Top 5 Family Class Probabilities')
|
| 253 |
+
plt.xlim(0, 100)
|
| 254 |
+
plt.close(figfam)
|
| 255 |
+
|
| 256 |
+
img = fig_to_img(figfam)
|
| 257 |
+
|
| 258 |
+
if len(protein_sequence) < 100:
|
| 259 |
+
return decoded_labelsfam[0], img, None, f"**Warning:** The sequence is relatively short. Fragmentary and partial sequences may result in incorrect predictions. \n\n {family_info}"
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
return decoded_labelsfam[0], img, None, family_info
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def process_single_sequence(protein_fasta): #, protein_file
|
| 266 |
+
|
| 267 |
+
lines = protein_fasta.split('\n')
|
| 268 |
+
|
| 269 |
+
headers = [line for line in lines if line.startswith('>')]
|
| 270 |
+
if len(headers) > 1:
|
| 271 |
+
return None, "Multiple fasta sequences detected. Please upload a fasta file with multiple sequences, otherwise only include one fasta sequence.", None
|
| 272 |
+
|
| 273 |
+
protein_sequence = ''.join(line for line in lines if not line.startswith('>'))
|
| 274 |
+
|
| 275 |
+
# Check for invalid characters
|
| 276 |
+
valid_characters = set("ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy") # the 20 standard amino acids
|
| 277 |
+
if not set(protein_sequence).issubset(valid_characters):
|
| 278 |
+
return None, "Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids. Does your sequence contain gaps?", None
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
encoded_input = tokenizer([protein_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
|
| 282 |
+
input_ids = encoded_input["input_ids"]
|
| 283 |
+
attention_mask = encoded_input["attention_mask"]
|
| 284 |
+
|
| 285 |
+
with torch.no_grad():
|
| 286 |
+
output = model(input_ids, attention_mask=attention_mask)
|
| 287 |
+
logits = output.logits
|
| 288 |
+
dprobabilities = F.softmax(logits, dim=1)[0]
|
| 289 |
+
_, predicted_labels = torch.max(logits, dim=1)
|
| 290 |
+
|
| 291 |
+
decoded_labels = label_encoder.inverse_transform(predicted_labels.tolist())
|
| 292 |
+
family_info = get_family_info(decoded_labels[0])
|
| 293 |
+
|
| 294 |
+
fig = plt.figure(figsize=(10, 5))
|
| 295 |
+
labels = label_encoder.classes_
|
| 296 |
+
dprobabilities = dprobabilities.tolist()
|
| 297 |
+
|
| 298 |
+
# Sort labels and probabilities by probability
|
| 299 |
+
labels_probs = list(zip(labels, dprobabilities))
|
| 300 |
+
labels_probs.sort(key=lambda x: x[1], reverse=True)
|
| 301 |
+
|
| 302 |
+
# Select the top 3 donors
|
| 303 |
+
labels_probs_top3 = labels_probs[:3]
|
| 304 |
+
labels_top3, probabilities_top3 = zip(*labels_probs_top3)
|
| 305 |
+
|
| 306 |
+
y_pos = np.arange(len(labels_top3))
|
| 307 |
+
|
| 308 |
+
plt.barh(y_pos, [prob*100 for prob in probabilities_top3], align='center', alpha=0.5)
|
| 309 |
+
plt.yticks(y_pos, labels_top3)
|
| 310 |
+
plt.xlabel('Probability (%)')
|
| 311 |
+
plt.title('Top 3 Donor Class Probabilities')
|
| 312 |
+
plt.xlim(0, 100)
|
| 313 |
+
plt.close(fig)
|
| 314 |
+
|
| 315 |
+
img = fig_to_img(fig)
|
| 316 |
+
|
| 317 |
+
if len(protein_sequence) < 100:
|
| 318 |
+
return decoded_labels[0], img, None, f"**Warning:** The sequence is relatively short. Fragmentary and partial sequences may result in incorrect predictions. \n\n {family_info}"
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
return decoded_labels[0], img, None, None
|
| 322 |
+
|
| 323 |
+
def process_sequence_file(protein_file): # added progress parameter that is displayed in gradio #, progress=gr.Progress()
|
| 324 |
+
try:
|
| 325 |
+
records = list(SeqIO.parse(protein_file.name, "fasta"))
|
| 326 |
+
except Exception as e:
|
| 327 |
+
return str(e)
|
| 328 |
+
|
| 329 |
+
if not os.path.exists('results'):
|
| 330 |
+
os.makedirs('results')
|
| 331 |
+
|
| 332 |
+
total = len(records)
|
| 333 |
+
|
| 334 |
+
for idx, record in enumerate(records):
|
| 335 |
+
protein_sequence = str(record.seq)
|
| 336 |
+
|
| 337 |
+
valid_characters = set("ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy")
|
| 338 |
+
if not set(protein_sequence).issubset(valid_characters):
|
| 339 |
+
with open(f'results/result_{idx+1}.txt', 'w') as file:
|
| 340 |
+
file.write("Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids. Does your sequence contain gaps?")
|
| 341 |
+
continue
|
| 342 |
+
|
| 343 |
+
label, img, _, info = process_single_sequence(protein_sequence)
|
| 344 |
+
img.save(f'results/result_{idx+1}.png')
|
| 345 |
+
with open(f'results/result_{idx+1}.txt', 'w') as file:
|
| 346 |
+
file.write(f'Predicted Donor: {label}\n\n{info}')
|
| 347 |
+
|
| 348 |
+
# progress(idx/total) # Update the progress bar
|
| 349 |
+
|
| 350 |
+
# Create a zip file w/ results -- To Do: Figure out how to improve compression for large files
|
| 351 |
+
with zipfile.ZipFile('predicted_results.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
|
| 352 |
+
for root, dirs, files in os.walk('results/'):
|
| 353 |
+
for file in files:
|
| 354 |
+
zipf.write(os.path.join(root, file))
|
| 355 |
+
|
| 356 |
+
return 'predicted_results.zip' #Provide indication of how to interpret downloaded zip file? f"**Warning:** The sequence is relatively short. Fragmentary and partial sequences may result in incorrect predictions.
|
| 357 |
+
|
| 358 |
+
# Function to mask a residue at a particular position
|
| 359 |
+
def mask_residue(sequence, position):
|
| 360 |
+
return sequence[:position] + 'X' + sequence[position+1:]
|
| 361 |
+
|
| 362 |
+
def generate_heatmap(protein_fasta):
|
| 363 |
+
|
| 364 |
+
lines = protein_fasta.split('\n')
|
| 365 |
+
|
| 366 |
+
headers = [line for line in lines if line.startswith('>')]
|
| 367 |
+
if len(headers) > 1:
|
| 368 |
+
return None, "Multiple fasta sequences detected. Please upload a fasta file with multiple sequences, otherwise only include one fasta sequence.", None
|
| 369 |
+
|
| 370 |
+
protein_sequence = ''.join(line for line in lines if not line.startswith('>'))
|
| 371 |
+
|
| 372 |
+
# Check for invalid characters
|
| 373 |
+
valid_characters = set("ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy") # the 20 standard amino acids
|
| 374 |
+
if not set(protein_sequence).issubset(valid_characters):
|
| 375 |
+
return None, "Invalid protein sequence. It contains characters that are not one of the 20 standard amino acids. Does your sequence contain gaps?", None
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
# Tokenize and predict for original sequence
|
| 379 |
+
encoded_input = tokenizer([protein_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
|
| 380 |
+
with torch.no_grad():
|
| 381 |
+
original_output = model(encoded_input["input_ids"], attention_mask=encoded_input["attention_mask"])
|
| 382 |
+
original_probabilities = F.softmax(original_output.logits, dim=1).cpu().numpy()[0]
|
| 383 |
+
|
| 384 |
+
# Initialize an array to hold the importance scores
|
| 385 |
+
importance_scores = np.zeros((len(protein_sequence), len(original_probabilities)))
|
| 386 |
+
|
| 387 |
+
# Define the size of each group
|
| 388 |
+
group_size = 10 # You can change this
|
| 389 |
+
|
| 390 |
+
# Initialize an array to hold the importance scores
|
| 391 |
+
num_groups = len(original_sequence) // group_size + (len(original_sequence) % group_size > 0)
|
| 392 |
+
importance_scores = np.zeros((num_groups, len(original_probabilities)))
|
| 393 |
+
|
| 394 |
+
# Initialize tqdm progress bar
|
| 395 |
+
# with tqdm(total=num_groups, desc="Processing groups", position=0, leave=True) as pbar:
|
| 396 |
+
# # Loop through each group of residues in the sequence
|
| 397 |
+
for i in range(0, len(protein_sequence), group_size):
|
| 398 |
+
# Mask the residues in the group at positions [i, i + group_size)
|
| 399 |
+
masked_sequence = protein_sequence[:i] + 'X' * min(group_size, len(protein_sequence) - i) + protein_sequence[i + group_size:]
|
| 400 |
+
|
| 401 |
+
# Tokenize and predict for the masked sequence
|
| 402 |
+
encoded_input = tokenizer([masked_sequence], padding=True, truncation=True, max_length=512, return_tensors="pt")
|
| 403 |
+
with torch.no_grad():
|
| 404 |
+
masked_output = model(encoded_input["input_ids"], attention_mask=encoded_input["attention_mask"])
|
| 405 |
+
masked_probabilities = F.softmax(masked_output.logits, dim=1).cpu().numpy()[0]
|
| 406 |
+
|
| 407 |
+
# Calculate the change in probabilities and store it as the importance score
|
| 408 |
+
group_index = i // group_size
|
| 409 |
+
importance_scores[group_index, :] = np.abs(original_probabilities - masked_probabilities)
|
| 410 |
+
|
| 411 |
+
progress = (i // group_size + 1) / num_groups * 100
|
| 412 |
+
print(f"Progress: {progress:.2f}%")
|
| 413 |
+
|
| 414 |
+
figmap, ax = plt.subplots(figsize=(20, 20))
|
| 415 |
+
sns.heatmap(importance_scores, annot=True, cmap="coolwarm", xticklabels=label_encoder.classes_, yticklabels=[f"{i}-{i+group_size-1}" for i in range(0, len(original_sequence), group_size)], ax=ax)
|
| 416 |
+
ax.set_xlabel("Predicted Labels")
|
| 417 |
+
ax.set_ylabel("Residue Position Groups")
|
| 418 |
+
|
| 419 |
+
img = fig_to_img(figmap)
|
| 420 |
+
|
| 421 |
+
return img
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
def main_function_single(sequence):
|
| 425 |
+
# Process seq, and return outputs for both fam and don
|
| 426 |
+
family_label, family_img, _, family_info = process_family_sequence(sequence)
|
| 427 |
+
donor_label, donor_img, *_ = process_single_sequence(sequence)
|
| 428 |
+
figmap = generate_heatmap(sequence)
|
| 429 |
+
return family_label, family_img, family_info, donor_label, donor_img, figmap
|
| 430 |
+
|
| 431 |
+
def main_function_upload(protein_file): #, progress=gr.Progress()
|
| 432 |
+
return process_sequence_file(protein_file) #, progress
|
| 433 |
+
|
| 434 |
+
prediction_imagefam = gr.outputs.Image(type='pil', label="Family prediction graph")
|
| 435 |
+
prediction_imagedonor = gr.outputs.Image(type='pil', label="Donor prediction graph")
|
| 436 |
+
prediction_explain = gr.outputs.Image(type='pil', label="Donor prediction explaination")
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
with gr.Blocks() as app:
|
| 440 |
+
gr.Markdown("# Glydentify")
|
| 441 |
+
|
| 442 |
+
with gr.Tab("Single Sequence Prediction"):
|
| 443 |
+
with gr.Row().style(equal_height=True):
|
| 444 |
+
with gr.Column():
|
| 445 |
+
sequence = gr.inputs.Textbox(lines=15, placeholder='Enter Protein Sequence Here...', label="Protein Sequence")
|
| 446 |
+
with gr.Column():
|
| 447 |
+
with gr.Accordion("Example:"):
|
| 448 |
+
gr.Markdown("""
|
| 449 |
+
\>Q9LTZ9|GALS2_ARATH Galactan beta-1,4-galactosyltransferase GALS2
|
| 450 |
+
MAKERDQNTKDKNLLICFLWNFSAELKLALMALLVLCTLATLLPFLPSSFSISASELRFC
|
| 451 |
+
ISRIAVNSTSVNFTTVVEKPVLDNAVKLTEKPVLDNGVTKQPLTEEKVLNNGVIKRTFTG
|
| 452 |
+
YGWAAYNFVLMNAYRGGVNTFAVIGLSSKPLHVYSHPTYRCEWIPLNQSDNRILTDGTKI
|
| 453 |
+
LTDWGYGRVYTTVVVNCTFPSNTVINPKNTGGTLLLHATTGDTDRNITDSIPVLTETPNT
|
| 454 |
+
VDFALYESNLRRREKYDYLYCGSSLYGNLSPQRIREWIAYHVRFFGERSHFVLHDAGGIT
|
| 455 |
+
EEVFEVLKPWIELGRVTVHDIREQERFDGYYHNQFMVVNDCLHRYRFMAKWMFFFDVDEF
|
| 456 |
+
IYVPAKSSISSVMVSLEEYSQFTIEQMPMSSQLCYDGDGPARTYRKWGFEKLAYRDVKKV
|
| 457 |
+
PRRDRKYAVQPRNVFATGVHMSQHLQGKTYHRAEGKIRYFHYHGSISQRREPCRHLYNGT
|
| 458 |
+
RIVHENN
|
| 459 |
+
""")
|
| 460 |
+
family_prediction = gr.outputs.Textbox(label="Predicted family")
|
| 461 |
+
donor_prediction = gr.outputs.Textbox(label="Predicted donor")
|
| 462 |
+
info_markdown = gr.Markdown()
|
| 463 |
+
|
| 464 |
+
# Predict and Clear buttons
|
| 465 |
+
with gr.Row().style(equal_height=True):
|
| 466 |
+
with gr.Column():
|
| 467 |
+
predict_button = gr.Button("Predict")
|
| 468 |
+
predict_button.click(main_function_single, inputs=sequence,
|
| 469 |
+
outputs=[family_prediction, prediction_imagefam, info_markdown,
|
| 470 |
+
donor_prediction, prediction_imagedonor, prediction_explain])
|
| 471 |
+
|
| 472 |
+
# Family & Donor Section
|
| 473 |
+
with gr.Row().style(equal_height=True):
|
| 474 |
+
with gr.Column():
|
| 475 |
+
with gr.Accordion("Prediction Bar Graphs:"):
|
| 476 |
+
prediction_imagefam.render() # = gr.outputs.Image(type='pil', label="Family prediction graph")
|
| 477 |
+
prediction_imagedonor.render() # = gr.outputs.Image(type='pil', label="Donor prediction graph")
|
| 478 |
+
|
| 479 |
+
# Explain Section
|
| 480 |
+
with gr.Column():
|
| 481 |
+
with gr.Accordion("Donor explanation"):
|
| 482 |
+
prediction_explain.render() # = gr.outputs.Image(type='pil', label="Donor prediction explaination")
|
| 483 |
+
|
| 484 |
+
with gr.Tab("Multiple Sequence Prediction"):
|
| 485 |
+
with gr.Row().style(equal_height=True):
|
| 486 |
+
with gr.Column():
|
| 487 |
+
protein_file = gr.inputs.File(label="Upload FASTA file")
|
| 488 |
+
with gr.Column():
|
| 489 |
+
result_file = gr.outputs.File(label="Download predictions of uploaded sequences")
|
| 490 |
+
with gr.Row().style(equal_height=True):
|
| 491 |
+
with gr.Column():
|
| 492 |
+
process_button = gr.Button("Process")
|
| 493 |
+
process_button.click(main_function_upload, inputs=protein_file, outputs=[result_file])
|
| 494 |
+
with gr.Column():
|
| 495 |
+
clear = gr.Button("Clear")
|
| 496 |
+
clear.click(lambda: None)
|
| 497 |
+
# clear.click()
|
| 498 |
+
|
| 499 |
+
app.launch(show_error=True)
|
| 500 |
+
|
| 501 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: glydentify
|
| 2 |
+
channels:
|
| 3 |
+
- conda-forge
|
| 4 |
+
- bioconda
|
| 5 |
+
dependencies:
|
| 6 |
+
- _libgcc_mutex=0.1=conda_forge
|
| 7 |
+
- _openmp_mutex=4.5=2_gnu
|
| 8 |
+
- aiofiles=22.1.0=pyhd8ed1ab_0
|
| 9 |
+
- aiohttp=3.7.4.post0=py39h3811e60_1
|
| 10 |
+
- aiosqlite=0.18.0=pyhd8ed1ab_0
|
| 11 |
+
- altair=4.2.2=pyhd8ed1ab_0
|
| 12 |
+
- anyio=3.6.2=pyhd8ed1ab_0
|
| 13 |
+
- aom=3.5.0=h27087fc_0
|
| 14 |
+
- argon2-cffi=21.3.0=pyhd8ed1ab_0
|
| 15 |
+
- argon2-cffi-bindings=21.2.0=py39hb9d737c_3
|
| 16 |
+
- asttokens=2.2.1=pyhd8ed1ab_0
|
| 17 |
+
- async-timeout=3.0.1=py_1000
|
| 18 |
+
- attrs=22.2.0=pyh71513ae_0
|
| 19 |
+
- aws-c-auth=0.7.0=hf8751d9_2
|
| 20 |
+
- aws-c-cal=0.6.0=h93469e0_0
|
| 21 |
+
- aws-c-common=0.8.23=hd590300_0
|
| 22 |
+
- aws-c-compression=0.2.17=h862ab75_1
|
| 23 |
+
- aws-c-event-stream=0.3.1=h9599702_1
|
| 24 |
+
- aws-c-http=0.7.11=hbe98c3e_0
|
| 25 |
+
- aws-c-io=0.13.28=h3870b5a_0
|
| 26 |
+
- aws-c-mqtt=0.8.14=h2e270ba_2
|
| 27 |
+
- aws-c-s3=0.3.13=heb0bb06_2
|
| 28 |
+
- aws-c-sdkutils=0.1.11=h862ab75_1
|
| 29 |
+
- aws-checksums=0.1.16=h862ab75_1
|
| 30 |
+
- aws-crt-cpp=0.20.3=he9c0e7f_4
|
| 31 |
+
- aws-sdk-cpp=1.10.57=hbc2ea52_17
|
| 32 |
+
- babel=2.12.1=pyhd8ed1ab_1
|
| 33 |
+
- backcall=0.2.0=pyh9f0ad1d_0
|
| 34 |
+
- backports=1.0=pyhd8ed1ab_3
|
| 35 |
+
- backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
|
| 36 |
+
- beautifulsoup4=4.12.2=pyha770c72_0
|
| 37 |
+
- biopython=1.81=py39h72bdee0_0
|
| 38 |
+
- bleach=6.0.0=pyhd8ed1ab_0
|
| 39 |
+
- brotli=1.0.9=h166bdaf_8
|
| 40 |
+
- brotli-bin=1.0.9=h166bdaf_8
|
| 41 |
+
- brotlipy=0.7.0=py39hb9d737c_1005
|
| 42 |
+
- bzip2=1.0.8=h7f98852_4
|
| 43 |
+
- c-ares=1.19.1=hd590300_0
|
| 44 |
+
- ca-certificates=2023.5.7=hbcca054_0
|
| 45 |
+
- certifi=2023.5.7=pyhd8ed1ab_0
|
| 46 |
+
- cffi=1.15.1=py39he91dace_3
|
| 47 |
+
- chardet=4.0.0=py39hf3d152e_3
|
| 48 |
+
- charset-normalizer=3.1.0=pyhd8ed1ab_0
|
| 49 |
+
- click=8.1.3=unix_pyhd8ed1ab_2
|
| 50 |
+
- colorama=0.4.6=pyhd8ed1ab_0
|
| 51 |
+
- comm=0.1.3=pyhd8ed1ab_0
|
| 52 |
+
- contourpy=1.0.7=py39h4b4f3f3_0
|
| 53 |
+
- cryptography=40.0.2=py39h079d5ae_0
|
| 54 |
+
- cycler=0.11.0=pyhd8ed1ab_0
|
| 55 |
+
- dataclasses=0.8=pyhc8e2a94_3
|
| 56 |
+
- datasets=2.13.1=pyhd8ed1ab_0
|
| 57 |
+
- debugpy=1.6.7=py39h227be39_0
|
| 58 |
+
- decorator=5.1.1=pyhd8ed1ab_0
|
| 59 |
+
- defusedxml=0.7.1=pyhd8ed1ab_0
|
| 60 |
+
- dill=0.3.6=pyhd8ed1ab_1
|
| 61 |
+
- entrypoints=0.4=pyhd8ed1ab_0
|
| 62 |
+
- executing=1.2.0=pyhd8ed1ab_0
|
| 63 |
+
- expat=2.5.0=hcb278e6_1
|
| 64 |
+
- fastapi=0.95.1=pyhd8ed1ab_0
|
| 65 |
+
- ffmpeg=5.1.2=gpl_h8dda1f0_106
|
| 66 |
+
- ffmpy=0.3.0=pyhb6f538c_0
|
| 67 |
+
- filelock=3.12.0=pyhd8ed1ab_0
|
| 68 |
+
- flit-core=3.8.0=pyhd8ed1ab_0
|
| 69 |
+
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
|
| 70 |
+
- font-ttf-inconsolata=3.000=h77eed37_0
|
| 71 |
+
- font-ttf-source-code-pro=2.038=h77eed37_0
|
| 72 |
+
- font-ttf-ubuntu=0.83=hab24e00_0
|
| 73 |
+
- fontconfig=2.14.2=h14ed4e7_0
|
| 74 |
+
- fonts-conda-ecosystem=1=0
|
| 75 |
+
- fonts-conda-forge=1=0
|
| 76 |
+
- fonttools=4.39.3=py39h72bdee0_0
|
| 77 |
+
- freetype=2.12.1=hca18f0e_1
|
| 78 |
+
- fsspec=2023.4.0=pyh1a96a4e_0
|
| 79 |
+
- gflags=2.2.2=he1b5a44_1004
|
| 80 |
+
- glog=0.6.0=h6f12383_0
|
| 81 |
+
- gmp=6.2.1=h58526e2_0
|
| 82 |
+
- gnutls=3.7.8=hf3e180e_0
|
| 83 |
+
- gradio=3.23.0=pyhd8ed1ab_0
|
| 84 |
+
- h11=0.14.0=pyhd8ed1ab_0
|
| 85 |
+
- h2=4.1.0=pyhd8ed1ab_0
|
| 86 |
+
- hpack=4.0.0=pyh9f0ad1d_0
|
| 87 |
+
- httpcore=0.17.0=pyhd8ed1ab_0
|
| 88 |
+
- httpx=0.24.0=pyhd8ed1ab_1
|
| 89 |
+
- huggingface_hub=0.16.4=pyhd8ed1ab_0
|
| 90 |
+
- hyperframe=6.0.1=pyhd8ed1ab_0
|
| 91 |
+
- icu=72.1=hcb278e6_0
|
| 92 |
+
- idna=3.4=pyhd8ed1ab_0
|
| 93 |
+
- importlib-metadata=6.5.0=pyha770c72_0
|
| 94 |
+
- importlib-resources=5.12.0=pyhd8ed1ab_0
|
| 95 |
+
- importlib_metadata=6.5.0=hd8ed1ab_0
|
| 96 |
+
- importlib_resources=5.12.0=pyhd8ed1ab_0
|
| 97 |
+
- ipykernel=6.22.0=pyh210e3f2_0
|
| 98 |
+
- ipython=8.12.0=pyh41d4057_0
|
| 99 |
+
- ipython_genutils=0.2.0=py_1
|
| 100 |
+
- jedi=0.18.2=pyhd8ed1ab_0
|
| 101 |
+
- jinja2=3.1.2=pyhd8ed1ab_1
|
| 102 |
+
- json5=0.9.5=pyh9f0ad1d_0
|
| 103 |
+
- jsonschema=4.17.3=pyhd8ed1ab_0
|
| 104 |
+
- jupyter_client=8.2.0=pyhd8ed1ab_0
|
| 105 |
+
- jupyter_core=5.3.0=py39hf3d152e_0
|
| 106 |
+
- jupyter_events=0.6.3=pyhd8ed1ab_0
|
| 107 |
+
- jupyter_server=2.5.0=pyhd8ed1ab_0
|
| 108 |
+
- jupyter_server_fileid=0.9.0=pyhd8ed1ab_0
|
| 109 |
+
- jupyter_server_terminals=0.4.4=pyhd8ed1ab_1
|
| 110 |
+
- jupyter_server_ydoc=0.8.0=pyhd8ed1ab_0
|
| 111 |
+
- jupyter_ydoc=0.2.3=pyhd8ed1ab_0
|
| 112 |
+
- jupyterlab=3.6.3=pyhd8ed1ab_0
|
| 113 |
+
- jupyterlab_pygments=0.2.2=pyhd8ed1ab_0
|
| 114 |
+
- jupyterlab_server=2.22.1=pyhd8ed1ab_0
|
| 115 |
+
- keyutils=1.6.1=h166bdaf_0
|
| 116 |
+
- kiwisolver=1.4.4=py39hf939315_1
|
| 117 |
+
- krb5=1.21.1=h659d440_0
|
| 118 |
+
- lame=3.100=h166bdaf_1003
|
| 119 |
+
- lcms2=2.15=haa2dc70_1
|
| 120 |
+
- ld_impl_linux-64=2.40=h41732ed_0
|
| 121 |
+
- lerc=4.0.0=h27087fc_0
|
| 122 |
+
- libabseil=20230125.3=cxx17_h59595ed_0
|
| 123 |
+
- libarrow=12.0.1=h657c46f_5_cpu
|
| 124 |
+
- libblas=3.9.0=16_linux64_openblas
|
| 125 |
+
- libbrotlicommon=1.0.9=h166bdaf_8
|
| 126 |
+
- libbrotlidec=1.0.9=h166bdaf_8
|
| 127 |
+
- libbrotlienc=1.0.9=h166bdaf_8
|
| 128 |
+
- libcblas=3.9.0=16_linux64_openblas
|
| 129 |
+
- libcrc32c=1.1.2=h9c3ff4c_0
|
| 130 |
+
- libcurl=8.2.0=hca28451_0
|
| 131 |
+
- libdeflate=1.18=h0b41bf4_0
|
| 132 |
+
- libdrm=2.4.114=h166bdaf_0
|
| 133 |
+
- libedit=3.1.20191231=he28a2e2_2
|
| 134 |
+
- libev=4.33=h516909a_1
|
| 135 |
+
- libevent=2.1.12=hf998b51_1
|
| 136 |
+
- libexpat=2.5.0=hcb278e6_1
|
| 137 |
+
- libffi=3.4.2=h7f98852_5
|
| 138 |
+
- libgcc-ng=12.2.0=h65d4601_19
|
| 139 |
+
- libgfortran-ng=12.2.0=h69a702a_19
|
| 140 |
+
- libgfortran5=12.2.0=h337968e_19
|
| 141 |
+
- libgomp=12.2.0=h65d4601_19
|
| 142 |
+
- libgoogle-cloud=2.12.0=h840a212_1
|
| 143 |
+
- libgrpc=1.56.2=h3905398_0
|
| 144 |
+
- libiconv=1.17=h166bdaf_0
|
| 145 |
+
- libidn2=2.1.0=h470a237_0
|
| 146 |
+
- libjpeg-turbo=2.1.5.1=h0b41bf4_0
|
| 147 |
+
- liblapack=3.9.0=16_linux64_openblas
|
| 148 |
+
- libnghttp2=1.52.0=h61bc06f_0
|
| 149 |
+
- libnsl=2.0.0=h7f98852_0
|
| 150 |
+
- libnuma=2.0.16=h0b41bf4_1
|
| 151 |
+
- libopenblas=0.3.21=pthreads_h78a6416_3
|
| 152 |
+
- libopus=1.3.1=h7f98852_1
|
| 153 |
+
- libpciaccess=0.17=h166bdaf_0
|
| 154 |
+
- libpng=1.6.39=h753d276_0
|
| 155 |
+
- libprotobuf=4.23.3=hd1fb520_0
|
| 156 |
+
- libsodium=1.0.18=h36c2ea0_1
|
| 157 |
+
- libsqlite=3.40.0=h753d276_0
|
| 158 |
+
- libssh2=1.11.0=h0841786_0
|
| 159 |
+
- libstdcxx-ng=12.2.0=h46fd767_19
|
| 160 |
+
- libtasn1=4.19.0=h166bdaf_0
|
| 161 |
+
- libthrift=0.18.1=h8fd135c_2
|
| 162 |
+
- libtiff=4.5.0=ha587672_6
|
| 163 |
+
- libutf8proc=2.8.0=h166bdaf_0
|
| 164 |
+
- libuuid=2.38.1=h0b41bf4_0
|
| 165 |
+
- libva=2.18.0=h0b41bf4_0
|
| 166 |
+
- libvpx=1.11.0=h9c3ff4c_3
|
| 167 |
+
- libwebp-base=1.3.0=h0b41bf4_0
|
| 168 |
+
- libxcb=1.13=h7f98852_1004
|
| 169 |
+
- libxml2=2.10.4=hfdac1af_0
|
| 170 |
+
- libzlib=1.2.13=h166bdaf_4
|
| 171 |
+
- linkify-it-py=2.0.0=pyhd8ed1ab_0
|
| 172 |
+
- lz4-c=1.9.4=hcb278e6_0
|
| 173 |
+
- markdown-it-py=2.2.0=pyhd8ed1ab_0
|
| 174 |
+
- markupsafe=2.1.2=py39h72bdee0_0
|
| 175 |
+
- matplotlib-base=3.7.1=py39he190548_0
|
| 176 |
+
- matplotlib-inline=0.1.6=pyhd8ed1ab_0
|
| 177 |
+
- mdit-py-plugins=0.3.3=pyhd8ed1ab_0
|
| 178 |
+
- mdurl=0.1.0=pyhd8ed1ab_0
|
| 179 |
+
- mistune=2.0.5=pyhd8ed1ab_0
|
| 180 |
+
- multidict=6.0.4=py39h72bdee0_0
|
| 181 |
+
- multiprocess=0.70.14=py39hb9d737c_3
|
| 182 |
+
- munkres=1.1.4=pyh9f0ad1d_0
|
| 183 |
+
- nbclassic=0.5.5=pyhb4ecaf3_1
|
| 184 |
+
- nbclient=0.7.3=pyhd8ed1ab_0
|
| 185 |
+
- nbconvert=7.3.1=pyhd8ed1ab_0
|
| 186 |
+
- nbconvert-core=7.3.1=pyhd8ed1ab_0
|
| 187 |
+
- nbconvert-pandoc=7.3.1=pyhd8ed1ab_0
|
| 188 |
+
- nbformat=5.8.0=pyhd8ed1ab_0
|
| 189 |
+
- ncurses=6.3=h27087fc_1
|
| 190 |
+
- nest-asyncio=1.5.6=pyhd8ed1ab_0
|
| 191 |
+
- nettle=3.8.1=hc379101_1
|
| 192 |
+
- notebook=6.5.4=pyha770c72_0
|
| 193 |
+
- notebook-shim=0.2.2=pyhd8ed1ab_0
|
| 194 |
+
- numpy=1.24.2=py39h7360e5f_0
|
| 195 |
+
- openh264=2.3.1=hcb278e6_2
|
| 196 |
+
- openjpeg=2.5.0=hfec8fc6_2
|
| 197 |
+
- openssl=3.1.1=hd590300_1
|
| 198 |
+
- orc=1.9.0=h385abfd_1
|
| 199 |
+
- orjson=3.8.10=py39hd8b4457_0
|
| 200 |
+
- p11-kit=0.24.1=hc5aa10d_0
|
| 201 |
+
- packaging=23.1=pyhd8ed1ab_0
|
| 202 |
+
- pandas=2.0.0=py39h2ad29b5_0
|
| 203 |
+
- pandoc=2.19.2=h32600fe_2
|
| 204 |
+
- pandocfilters=1.5.0=pyhd8ed1ab_0
|
| 205 |
+
- parso=0.8.3=pyhd8ed1ab_0
|
| 206 |
+
- patsy=0.5.3=pyhd8ed1ab_0
|
| 207 |
+
- pexpect=4.8.0=pyh1a96a4e_2
|
| 208 |
+
- pickleshare=0.7.5=py_1003
|
| 209 |
+
- pillow=9.5.0=py39h7207d5c_0
|
| 210 |
+
- pip=23.1=pyhd8ed1ab_0
|
| 211 |
+
- pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0
|
| 212 |
+
- platformdirs=3.2.0=pyhd8ed1ab_0
|
| 213 |
+
- pooch=1.7.0=pyha770c72_3
|
| 214 |
+
- prometheus_client=0.16.0=pyhd8ed1ab_0
|
| 215 |
+
- prompt-toolkit=3.0.38=pyha770c72_0
|
| 216 |
+
- prompt_toolkit=3.0.38=hd8ed1ab_0
|
| 217 |
+
- psutil=5.9.5=py39h72bdee0_0
|
| 218 |
+
- pthread-stubs=0.4=h36c2ea0_1001
|
| 219 |
+
- ptyprocess=0.7.0=pyhd3deb0d_0
|
| 220 |
+
- pure_eval=0.2.2=pyhd8ed1ab_0
|
| 221 |
+
- pyarrow=12.0.1=py39hfbd5978_5_cpu
|
| 222 |
+
- pycparser=2.21=pyhd8ed1ab_0
|
| 223 |
+
- pydantic=1.10.7=py39h72bdee0_0
|
| 224 |
+
- pydub=0.25.1=pyhd8ed1ab_0
|
| 225 |
+
- pygments=2.15.1=pyhd8ed1ab_0
|
| 226 |
+
- pyopenssl=23.1.1=pyhd8ed1ab_0
|
| 227 |
+
- pyparsing=3.0.9=pyhd8ed1ab_0
|
| 228 |
+
- pyrsistent=0.19.3=py39h72bdee0_0
|
| 229 |
+
- pysocks=1.7.1=pyha2e5f31_6
|
| 230 |
+
- python=3.9.16=h2782a2a_0_cpython
|
| 231 |
+
- python-dateutil=2.8.2=pyhd8ed1ab_0
|
| 232 |
+
- python-fastjsonschema=2.16.3=pyhd8ed1ab_0
|
| 233 |
+
- python-json-logger=2.0.7=pyhd8ed1ab_0
|
| 234 |
+
- python-multipart=0.0.6=pyhd8ed1ab_0
|
| 235 |
+
- python-tzdata=2023.3=pyhd8ed1ab_0
|
| 236 |
+
- python-xxhash=3.2.0=py39h72bdee0_0
|
| 237 |
+
- python_abi=3.9=3_cp39
|
| 238 |
+
- pytz=2023.3=pyhd8ed1ab_0
|
| 239 |
+
- pyyaml=6.0=py39hb9d737c_5
|
| 240 |
+
- pyzmq=25.0.2=py39h0be026e_0
|
| 241 |
+
- rdma-core=28.9=h59595ed_1
|
| 242 |
+
- re2=2023.03.02=h8c504da_0
|
| 243 |
+
- readline=8.2=h8228510_1
|
| 244 |
+
- regex=2023.6.3=py39hd1e30aa_0
|
| 245 |
+
- requests=2.28.2=pyhd8ed1ab_1
|
| 246 |
+
- responses=0.18.0=pyhd8ed1ab_0
|
| 247 |
+
- rfc3339-validator=0.1.4=pyhd8ed1ab_0
|
| 248 |
+
- rfc3986-validator=0.1.1=pyh9f0ad1d_0
|
| 249 |
+
- s2n=1.3.46=h06160fa_0
|
| 250 |
+
- sacremoses=0.0.53=pyhd8ed1ab_0
|
| 251 |
+
- safetensors=0.3.1=py39h9fdd4d6_0
|
| 252 |
+
- scipy=1.10.1=py39he83f1e1_0
|
| 253 |
+
- seaborn=0.12.2=hd8ed1ab_0
|
| 254 |
+
- seaborn-base=0.12.2=pyhd8ed1ab_0
|
| 255 |
+
- semantic_version=2.10.0=pyhd8ed1ab_0
|
| 256 |
+
- send2trash=1.8.0=pyhd8ed1ab_0
|
| 257 |
+
- setuptools=67.6.1=pyhd8ed1ab_0
|
| 258 |
+
- six=1.16.0=pyh6c4a22f_0
|
| 259 |
+
- snappy=1.1.10=h9fff704_0
|
| 260 |
+
- sniffio=1.3.0=pyhd8ed1ab_0
|
| 261 |
+
- soupsieve=2.3.2.post1=pyhd8ed1ab_0
|
| 262 |
+
- stack_data=0.6.2=pyhd8ed1ab_0
|
| 263 |
+
- starlette=0.26.1=pyhd8ed1ab_0
|
| 264 |
+
- statsmodels=0.13.5=py39h2ae25f5_2
|
| 265 |
+
- svt-av1=1.4.1=hcb278e6_0
|
| 266 |
+
- terminado=0.17.1=pyh41d4057_0
|
| 267 |
+
- tinycss2=1.2.1=pyhd8ed1ab_0
|
| 268 |
+
- tk=8.6.12=h27826a3_0
|
| 269 |
+
- tokenizers=0.13.3=py39h585fa2d_0
|
| 270 |
+
- tomli=2.0.1=pyhd8ed1ab_0
|
| 271 |
+
- toolz=0.12.0=pyhd8ed1ab_0
|
| 272 |
+
- tornado=6.3=py39h72bdee0_0
|
| 273 |
+
- tqdm=4.65.0=pyhd8ed1ab_1
|
| 274 |
+
- traitlets=5.9.0=pyhd8ed1ab_0
|
| 275 |
+
- transformers=4.31.0
|
| 276 |
+
- typing-extensions=4.5.0=hd8ed1ab_0
|
| 277 |
+
- typing_extensions=4.5.0=pyha770c72_0
|
| 278 |
+
- tzdata=2023c=h71feb2d_0
|
| 279 |
+
- uc-micro-py=1.0.1=pyhd8ed1ab_0
|
| 280 |
+
- ucx=1.14.1=hf587318_2
|
| 281 |
+
- unicodedata2=15.0.0=py39hb9d737c_0
|
| 282 |
+
- urllib3=1.26.15=pyhd8ed1ab_0
|
| 283 |
+
- uvicorn=0.21.1=py39hf3d152e_0
|
| 284 |
+
- wcwidth=0.2.6=pyhd8ed1ab_0
|
| 285 |
+
- webencodings=0.5.1=py_1
|
| 286 |
+
- websocket-client=1.5.1=pyhd8ed1ab_0
|
| 287 |
+
- websockets=11.0.2=py39h72bdee0_0
|
| 288 |
+
- wheel=0.40.0=pyhd8ed1ab_0
|
| 289 |
+
- wordcloud=1.8.2.2=py39hb9d737c_1
|
| 290 |
+
- x264=1!164.3095=h166bdaf_2
|
| 291 |
+
- x265=3.5=h924138e_3
|
| 292 |
+
- xorg-fixesproto=5.0=h7f98852_1002
|
| 293 |
+
- xorg-kbproto=1.0.7=h7f98852_1002
|
| 294 |
+
- xorg-libx11=1.8.4=h0b41bf4_0
|
| 295 |
+
- xorg-libxau=1.0.9=h7f98852_0
|
| 296 |
+
- xorg-libxdmcp=1.1.3=h7f98852_0
|
| 297 |
+
- xorg-libxext=1.3.4=h0b41bf4_2
|
| 298 |
+
- xorg-libxfixes=5.0.3=h7f98852_1004
|
| 299 |
+
- xorg-xextproto=7.3.0=h0b41bf4_1003
|
| 300 |
+
- xorg-xproto=7.0.31=h7f98852_1007
|
| 301 |
+
- xxhash=0.8.1=h0b41bf4_0
|
| 302 |
+
- xz=5.2.6=h166bdaf_0
|
| 303 |
+
- y-py=0.5.9=py39h50f1755_0
|
| 304 |
+
- yaml=0.2.5=h7f98852_2
|
| 305 |
+
- yarl=1.8.2=py39hb9d737c_0
|
| 306 |
+
- ypy-websocket=0.8.2=pyhd8ed1ab_0
|
| 307 |
+
- zeromq=4.3.4=h9c3ff4c_1
|
| 308 |
+
- zipp=3.15.0=pyhd8ed1ab_0
|
| 309 |
+
- zstd=1.5.2=h3eb15da_6
|
| 310 |
+
- pip:
|
| 311 |
+
- cmake==3.25.0
|
| 312 |
+
- cssselect2==0.7.0
|
| 313 |
+
- glypy==1.0.8
|
| 314 |
+
- hjson==3.1.0
|
| 315 |
+
- joblib==1.3.1
|
| 316 |
+
- lit==15.0.7
|
| 317 |
+
- lxml==4.9.2
|
| 318 |
+
- mpmath==1.2.1
|
| 319 |
+
- networkx==3.0
|
| 320 |
+
- pmw-py3==2.1
|
| 321 |
+
- preppy==4.2.1
|
| 322 |
+
- scikit-learn==1.3.0
|
| 323 |
+
- svglib==1.4.1
|
| 324 |
+
- sympy==1.11.1
|
| 325 |
+
- threadpoolctl==3.2.0
|
| 326 |
+
- torch==2.0.1
|
| 327 |
+
- torchaudio==2.0.2
|
| 328 |
+
- torchvision==0.15.2
|
| 329 |
+
- triton==2.0.0
|