Spaces:
No application file
No application file
Delete BioOracle_V14
Browse files- BioOracle_V14/app.py +0 -247
- BioOracle_V14/requirements.txt +0 -8
- BioOracle_V14/special_tokens_map.json +0 -7
- BioOracle_V14/tokenizer_config.json +0 -53
- BioOracle_V14/v14_weights.bin +0 -3
- BioOracle_V14/vocab.txt +0 -33
BioOracle_V14/app.py
DELETED
|
@@ -1,247 +0,0 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
-
import torch
|
| 3 |
-
import numpy as np
|
| 4 |
-
import pandas as pd
|
| 5 |
-
from torch import nn
|
| 6 |
-
from transformers import AutoTokenizer, AutoModel
|
| 7 |
-
from peft import get_peft_model, LoraConfig, TaskType
|
| 8 |
-
import os
|
| 9 |
-
import plotly.express as px
|
| 10 |
-
from sklearn.preprocessing import MinMaxScaler
|
| 11 |
-
|
| 12 |
-
# ================= 配置区 =================
|
| 13 |
-
# 页面设置
|
| 14 |
-
st.set_page_config(
|
| 15 |
-
page_title="巨子智筛 BioOracle V14",
|
| 16 |
-
page_icon="🧬",
|
| 17 |
-
layout="wide",
|
| 18 |
-
initial_sidebar_state="expanded"
|
| 19 |
-
)
|
| 20 |
-
|
| 21 |
-
# 路径配置 (默认权重文件在当前目录)
|
| 22 |
-
MODEL_DIR = "."
|
| 23 |
-
BASE_MODEL_NAME = "facebook/esm2_t30_150M_UR50D"
|
| 24 |
-
# 标签列表 (顺序必须与训练时完全一致!)
|
| 25 |
-
LABELS = ['anti_acne', 'anti_aging', 'anti_inflammatory', 'anti_oxidant', 'repair', 'whitening', 'delivery', 'negative']
|
| 26 |
-
|
| 27 |
-
# ================= 1. 核心组件复刻 (必须与训练代码一致) =================
|
| 28 |
-
AA_PROPS = {'A': 1.8, 'R': -4.5, 'N': -3.5, 'D': -3.5, 'C': 2.5, 'Q': -3.5, 'E': -3.5, 'G': -0.4, 'H': -3.2, 'I': 4.5, 'L': 3.8, 'K': -3.9, 'M': 1.9, 'F': 2.8, 'P': -1.6, 'S': -0.8, 'T': -0.7, 'W': -0.9, 'Y': -1.3, 'V': 4.2}
|
| 29 |
-
AA_CHARGE = {'R': 1, 'K': 1, 'H': 0.1, 'D': -1, 'E': -1}
|
| 30 |
-
|
| 31 |
-
def compute_biophysics(seq):
|
| 32 |
-
length = len(seq)
|
| 33 |
-
if length == 0: return [0]*5
|
| 34 |
-
hydro = sum([AA_PROPS.get(aa, 0) for aa in seq]) / length
|
| 35 |
-
charge = sum([AA_CHARGE.get(aa, 0) for aa in seq])
|
| 36 |
-
weight = length * 110 / 1000.0 # 简化的归一化分子量
|
| 37 |
-
n_term = AA_PROPS.get(seq[0], 0)
|
| 38 |
-
c_term = AA_CHARGE.get(seq[-1], 0)
|
| 39 |
-
return np.array([hydro, charge, weight, n_term, c_term], dtype=np.float32)
|
| 40 |
-
|
| 41 |
-
# V14 模型架构定义
|
| 42 |
-
class AdaptiveFusionModel(nn.Module):
|
| 43 |
-
def __init__(self, base_model, num_labels, feature_dim=5):
|
| 44 |
-
super().__init__()
|
| 45 |
-
self.esm = base_model
|
| 46 |
-
self.num_labels = num_labels
|
| 47 |
-
hidden_size = base_model.config.hidden_size
|
| 48 |
-
|
| 49 |
-
self.esm_classifier = nn.Sequential(nn.Dropout(0.1), nn.Linear(hidden_size, num_labels))
|
| 50 |
-
self.feature_classifier = nn.Sequential(nn.Linear(feature_dim, 64), nn.BatchNorm1d(64), nn.ReLU(), nn.Dropout(0.1), nn.Linear(64, num_labels))
|
| 51 |
-
self.gate_weight = nn.Parameter(torch.tensor([1.38]))
|
| 52 |
-
|
| 53 |
-
def forward(self, input_ids, attention_mask=None, extra_features=None, **kwargs):
|
| 54 |
-
outputs = self.esm(input_ids=input_ids, attention_mask=attention_mask, **kwargs)
|
| 55 |
-
cls_embedding = outputs.last_hidden_state[:, 0, :]
|
| 56 |
-
logits_esm = self.esm_classifier(cls_embedding)
|
| 57 |
-
|
| 58 |
-
if extra_features is not None:
|
| 59 |
-
logits_feat = self.feature_classifier(extra_features)
|
| 60 |
-
alpha = torch.sigmoid(self.gate_weight)
|
| 61 |
-
logits = alpha * logits_esm + (1 - alpha) * logits_feat
|
| 62 |
-
else:
|
| 63 |
-
logits = logits_esm
|
| 64 |
-
alpha = None # Should not happen in inference
|
| 65 |
-
return logits, alpha
|
| 66 |
-
|
| 67 |
-
# ================= 2. 加载引擎 (带缓存) =================
|
| 68 |
-
@st.cache_resource
|
| 69 |
-
def load_engine():
|
| 70 |
-
# 优先使用 GPU,没有则使用 CPU
|
| 71 |
-
device_str = "cuda" if torch.cuda.is_available() else "cpu"
|
| 72 |
-
device = torch.device(device_str)
|
| 73 |
-
print(f"正在使用设备: {device_str} 加载模型...")
|
| 74 |
-
|
| 75 |
-
try:
|
| 76 |
-
# A. 加载 Tokenizer
|
| 77 |
-
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
|
| 78 |
-
|
| 79 |
-
# B. 加载基座模型并应用 LoRA
|
| 80 |
-
# 注意:首次运行会自动下载 ESM-2 150M 权重 (约600MB),请保持网络通畅
|
| 81 |
-
base_model = AutoModel.from_pretrained(BASE_MODEL_NAME)
|
| 82 |
-
peft_config = LoraConfig(
|
| 83 |
-
task_type=TaskType.FEATURE_EXTRACTION,
|
| 84 |
-
r=32, lora_alpha=64, lora_dropout=0.1,
|
| 85 |
-
target_modules=["query", "key", "value", "dense"]
|
| 86 |
-
)
|
| 87 |
-
base_model = get_peft_model(base_model, peft_config)
|
| 88 |
-
|
| 89 |
-
# C. 构建 V14 融合架构
|
| 90 |
-
model = AdaptiveFusionModel(base_model, num_labels=len(LABELS))
|
| 91 |
-
|
| 92 |
-
# D. 加载训练好的 V14 权重
|
| 93 |
-
weights_path = os.path.join(MODEL_DIR, "v14_weights.bin")
|
| 94 |
-
if not os.path.exists(weights_path):
|
| 95 |
-
st.error(f"❌ 严重错误:找不到权重文件 `v14_weights.bin`!\n请确认你已解压 zip 文件,并且 `app.py` 和 `.bin` 文件在同一个文件夹内。")
|
| 96 |
-
st.stop()
|
| 97 |
-
|
| 98 |
-
# map_location 确保在 CPU 机器上也能加载 GPU 训练的权重
|
| 99 |
-
state_dict = torch.load(weights_path, map_location=device)
|
| 100 |
-
model.load_state_dict(state_dict)
|
| 101 |
-
model.to(device)
|
| 102 |
-
model.eval()
|
| 103 |
-
print("✅ 模型加载完成!")
|
| 104 |
-
return model, tokenizer, device
|
| 105 |
-
|
| 106 |
-
except Exception as e:
|
| 107 |
-
st.error(f"模型加载失败: {e}")
|
| 108 |
-
st.stop()
|
| 109 |
-
|
| 110 |
-
# ================= 3. 可视化辅助函数 =================
|
| 111 |
-
def plot_radar_chart(seq, feats):
|
| 112 |
-
"""绘制生物物理特征雷达图"""
|
| 113 |
-
categories = ['平均疏水性', '净电荷', '估算分子量', 'N端疏水性', 'C端电荷']
|
| 114 |
-
# 为了雷达图好看,做一个简单的归一化 (基于短肽��经验范围)
|
| 115 |
-
# 注意:这只是为了可视化展示差异,不影响模型预测
|
| 116 |
-
scaler = MinMaxScaler(feature_range=(0, 1))
|
| 117 |
-
# 预设一些经验最大最小值用于缩放
|
| 118 |
-
min_vals = np.array([-4.5, -3, 0.1, -4.5, -1]).reshape(1, -1)
|
| 119 |
-
max_vals = np.array([4.5, 3, 1.0, 4.5, 1]).reshape(1, -1)
|
| 120 |
-
scaler.fit(np.vstack([min_vals, max_vals]))
|
| 121 |
-
|
| 122 |
-
normalized_feats = scaler.transform(feats.reshape(1, -1)).flatten()
|
| 123 |
-
|
| 124 |
-
df = pd.DataFrame(dict(
|
| 125 |
-
r=normalized_feats,
|
| 126 |
-
theta=categories
|
| 127 |
-
))
|
| 128 |
-
fig = px.line_polar(df, r='r', theta='theta', line_close=True,
|
| 129 |
-
title=f"肽 {seq} 的生物物理指纹 (Biophysical Fingerprint)",
|
| 130 |
-
range_r=[0, 1], markers=True)
|
| 131 |
-
fig.update_traces(fill='toself', line_color='#00CC96')
|
| 132 |
-
fig.update_layout(polar=dict(radialaxis=dict(visible=True, range=[0, 1])), showlegend=False)
|
| 133 |
-
return fig
|
| 134 |
-
|
| 135 |
-
# ================= 4. 网页 UI 主体 =================
|
| 136 |
-
|
| 137 |
-
# 加载模型 (显示加载转圈)
|
| 138 |
-
with st.spinner('正在启动 V14 生物计算引擎 (首次加载可能需要1分钟)...'):
|
| 139 |
-
model, tokenizer, device = load_engine()
|
| 140 |
-
|
| 141 |
-
# 侧边栏信息
|
| 142 |
-
with st.sidebar:
|
| 143 |
-
st.image("https://img.icons8.com/fluency/96/dna-helix.png", width=60)
|
| 144 |
-
st.title("BioOracle V14")
|
| 145 |
-
st.markdown("---")
|
| 146 |
-
if model:
|
| 147 |
-
# 获取当前门控权重
|
| 148 |
-
gate_val = torch.sigmoid(model.gate_weight).item()
|
| 149 |
-
esm_weight = gate_val
|
| 150 |
-
feat_weight = 1 - gate_val
|
| 151 |
-
|
| 152 |
-
st.header("🧠 模型大脑状态")
|
| 153 |
-
# 用两个进度条直观展示权重对比
|
| 154 |
-
st.write(f"ESM-2 深度语义 (AI直觉): **{esm_weight:.1%}**")
|
| 155 |
-
st.progress(esm_weight)
|
| 156 |
-
st.write(f"生物物理特征 (理化规则): **{feat_weight:.1%}**")
|
| 157 |
-
st.progress(feat_weight)
|
| 158 |
-
|
| 159 |
-
st.info("""
|
| 160 |
-
**自适应融合机制 (Adaptive Fusion)**:
|
| 161 |
-
模型自动学会了主要依赖 ESM-2 大模型的深度理解,同时使用物理化学规则作为辅助校验,以防止死记硬背。
|
| 162 |
-
""")
|
| 163 |
-
st.markdown("---")
|
| 164 |
-
st.markdown("Design for Giant Biogene Internship Project.")
|
| 165 |
-
|
| 166 |
-
# 主页面
|
| 167 |
-
st.write("# 🧬 巨子智筛:AI活性肽发现系统")
|
| 168 |
-
st.caption("Powered by ESM-2 150M Parameter Model & Biophysics Guided Learning")
|
| 169 |
-
|
| 170 |
-
# 输入区
|
| 171 |
-
col1, col2 = st.columns([3, 1])
|
| 172 |
-
with col1:
|
| 173 |
-
seq_input = st.text_input("请输入待筛选的肽序列 (支持单字母缩写,如 GHK)", value="GHK", help="输入氨基酸序列,模型将评估其潜在生物活性。")
|
| 174 |
-
with col2:
|
| 175 |
-
st.write("") # Spacer
|
| 176 |
-
st.write("")
|
| 177 |
-
run_button = st.button("🚀 开始演算 (Analyze)", type="primary", use_container_width=True)
|
| 178 |
-
|
| 179 |
-
if run_button and seq_input:
|
| 180 |
-
seq = seq_input.strip().upper()
|
| 181 |
-
# 基本校验
|
| 182 |
-
valid_aa = set("ACDEFGHIKLMNPQRSTVWY")
|
| 183 |
-
if not seq or not set(seq).issubset(valid_aa):
|
| 184 |
-
st.error("请输入有效的氨基酸序列 (仅限20种标准氨基酸单字母缩写)。")
|
| 185 |
-
else:
|
| 186 |
-
# --- 预测流程 ---
|
| 187 |
-
with st.spinner(f'正在分析序列 {seq} 的构象与活性...'):
|
| 188 |
-
# 1. 数据准备
|
| 189 |
-
inputs = tokenizer(seq, return_tensors="pt", padding="max_length", max_length=128).to(device)
|
| 190 |
-
raw_feats = compute_biophysics(seq)
|
| 191 |
-
feats_tensor = torch.tensor([raw_feats], dtype=torch.float).to(device)
|
| 192 |
-
|
| 193 |
-
# 2. 模型推理
|
| 194 |
-
with torch.no_grad():
|
| 195 |
-
logits, _ = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'], extra_features=feats_tensor)
|
| 196 |
-
probs = torch.sigmoid(logits).cpu().numpy()[0]
|
| 197 |
-
|
| 198 |
-
# --- 结果展示区 ---
|
| 199 |
-
st.divider()
|
| 200 |
-
|
| 201 |
-
# 布局:左侧雷达图,右侧预测结果
|
| 202 |
-
res_col1, res_col2 = st.columns([2, 3])
|
| 203 |
-
|
| 204 |
-
with res_col1:
|
| 205 |
-
st.subheader("🔬 生物物理指纹可视化")
|
| 206 |
-
radar_fig = plot_radar_chart(seq, raw_feats)
|
| 207 |
-
st.plotly_chart(radar_fig, use_container_width=True)
|
| 208 |
-
st.caption("展示了该肽段在5个关键物理化学维度上的特征分布。不同功能的肽通常具有不同的指纹形状。")
|
| 209 |
-
|
| 210 |
-
with res_col2:
|
| 211 |
-
st.subheader("🎯 活性评估结论")
|
| 212 |
-
|
| 213 |
-
# 数据处理
|
| 214 |
-
df_res = pd.DataFrame({"功效标签": LABELS, "置信度": probs})
|
| 215 |
-
df_res = df_res.sort_values(by="置信度", ascending=False).reset_index(drop=True)
|
| 216 |
-
|
| 217 |
-
top_label = df_res.iloc[0]['功效标签']
|
| 218 |
-
top_score = df_res.iloc[0]['置信度']
|
| 219 |
-
|
| 220 |
-
# 核心结论卡片
|
| 221 |
-
if top_score > 0.8:
|
| 222 |
-
st.success(f"### ✅ 高潜力活性肽")
|
| 223 |
-
st.metric(label="主要预测功效", value=top_label, delta=f"置信度: {top_score:.2%}")
|
| 224 |
-
st.write("模型强烈建议将此序列纳入后续湿实验验证流程。")
|
| 225 |
-
elif top_score > 0.3:
|
| 226 |
-
st.warning(f"### ⚠️ 中等���力 / 需进一步改造")
|
| 227 |
-
st.metric(label="主要预测功效", value=top_label, delta=f"置信度: {top_score:.2%}", delta_color="off")
|
| 228 |
-
st.write("该序列可能具有一定活性,或是已知活性肽的突变体。建议结合结构生物学分析。")
|
| 229 |
-
else:
|
| 230 |
-
st.error(f"### ❌ 疑似无效序列 (负样本)")
|
| 231 |
-
st.metric(label="最高置信度", value=f"{top_score:.2%}", delta="未达到活性阈值", delta_color="inverse")
|
| 232 |
-
st.write("模型判断该序列主要表现为负样本特征,建议剔除。")
|
| 233 |
-
|
| 234 |
-
st.divider()
|
| 235 |
-
st.write("**Top 3 可能性分布:**")
|
| 236 |
-
for i in range(3):
|
| 237 |
-
row = df_res.iloc[i]
|
| 238 |
-
st.write(f"{row['功效标签']}")
|
| 239 |
-
st.progress(float(row['置信度']))
|
| 240 |
-
|
| 241 |
-
# 完整数据折叠窗
|
| 242 |
-
with st.expander("查看完整预测数据表"):
|
| 243 |
-
st.dataframe(df_res.style.format({"置信度": "{:.4%}"}), use_container_width=True)
|
| 244 |
-
|
| 245 |
-
# 页脚
|
| 246 |
-
st.divider()
|
| 247 |
-
st.caption("注:本系统基于 ESM-2 150M 大模型微调,预测结果仅供科研参考,实际功效需经实验验证。")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BioOracle_V14/requirements.txt
DELETED
|
@@ -1,8 +0,0 @@
|
|
| 1 |
-
torch
|
| 2 |
-
transformers
|
| 3 |
-
peft
|
| 4 |
-
streamlit
|
| 5 |
-
pandas
|
| 6 |
-
numpy
|
| 7 |
-
plotly
|
| 8 |
-
scikit-learn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BioOracle_V14/special_tokens_map.json
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cls_token": "<cls>",
|
| 3 |
-
"eos_token": "<eos>",
|
| 4 |
-
"mask_token": "<mask>",
|
| 5 |
-
"pad_token": "<pad>",
|
| 6 |
-
"unk_token": "<unk>"
|
| 7 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BioOracle_V14/tokenizer_config.json
DELETED
|
@@ -1,53 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"added_tokens_decoder": {
|
| 3 |
-
"0": {
|
| 4 |
-
"content": "<cls>",
|
| 5 |
-
"lstrip": false,
|
| 6 |
-
"normalized": false,
|
| 7 |
-
"rstrip": false,
|
| 8 |
-
"single_word": false,
|
| 9 |
-
"special": true
|
| 10 |
-
},
|
| 11 |
-
"1": {
|
| 12 |
-
"content": "<pad>",
|
| 13 |
-
"lstrip": false,
|
| 14 |
-
"normalized": false,
|
| 15 |
-
"rstrip": false,
|
| 16 |
-
"single_word": false,
|
| 17 |
-
"special": true
|
| 18 |
-
},
|
| 19 |
-
"2": {
|
| 20 |
-
"content": "<eos>",
|
| 21 |
-
"lstrip": false,
|
| 22 |
-
"normalized": false,
|
| 23 |
-
"rstrip": false,
|
| 24 |
-
"single_word": false,
|
| 25 |
-
"special": true
|
| 26 |
-
},
|
| 27 |
-
"3": {
|
| 28 |
-
"content": "<unk>",
|
| 29 |
-
"lstrip": false,
|
| 30 |
-
"normalized": false,
|
| 31 |
-
"rstrip": false,
|
| 32 |
-
"single_word": false,
|
| 33 |
-
"special": true
|
| 34 |
-
},
|
| 35 |
-
"32": {
|
| 36 |
-
"content": "<mask>",
|
| 37 |
-
"lstrip": false,
|
| 38 |
-
"normalized": false,
|
| 39 |
-
"rstrip": false,
|
| 40 |
-
"single_word": false,
|
| 41 |
-
"special": true
|
| 42 |
-
}
|
| 43 |
-
},
|
| 44 |
-
"clean_up_tokenization_spaces": false,
|
| 45 |
-
"cls_token": "<cls>",
|
| 46 |
-
"eos_token": "<eos>",
|
| 47 |
-
"extra_special_tokens": {},
|
| 48 |
-
"mask_token": "<mask>",
|
| 49 |
-
"model_max_length": 1000000000000000019884624838656,
|
| 50 |
-
"pad_token": "<pad>",
|
| 51 |
-
"tokenizer_class": "EsmTokenizer",
|
| 52 |
-
"unk_token": "<unk>"
|
| 53 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BioOracle_V14/v14_weights.bin
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:c81b5a33d66a61483c771a635f88e586ff534d9380cb4c3eb52487151fc2cd2b
|
| 3 |
-
size 637522573
|
|
|
|
|
|
|
|
|
|
|
|
BioOracle_V14/vocab.txt
DELETED
|
@@ -1,33 +0,0 @@
|
|
| 1 |
-
<cls>
|
| 2 |
-
<pad>
|
| 3 |
-
<eos>
|
| 4 |
-
<unk>
|
| 5 |
-
L
|
| 6 |
-
A
|
| 7 |
-
G
|
| 8 |
-
V
|
| 9 |
-
S
|
| 10 |
-
E
|
| 11 |
-
R
|
| 12 |
-
T
|
| 13 |
-
I
|
| 14 |
-
D
|
| 15 |
-
P
|
| 16 |
-
K
|
| 17 |
-
Q
|
| 18 |
-
N
|
| 19 |
-
F
|
| 20 |
-
Y
|
| 21 |
-
M
|
| 22 |
-
H
|
| 23 |
-
W
|
| 24 |
-
C
|
| 25 |
-
X
|
| 26 |
-
B
|
| 27 |
-
U
|
| 28 |
-
Z
|
| 29 |
-
O
|
| 30 |
-
.
|
| 31 |
-
-
|
| 32 |
-
<null_1>
|
| 33 |
-
<mask>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|