import gradio as gr import torch import numpy as np from modelgenerator.tasks import Embed # 加载模型(第一次会自动下载大权重,比较慢) print("Loading model...") model = Embed.from_config({ "model.backbone": "aido_protein_rag_16b" }).eval() # 支持超长序列 model.backbone.max_length = 12800 def predict_protein(sequence: str): if not sequence or len(sequence) < 5: return "请输入有效的蛋白质序列(至少5个氨基酸)" # 简单输入(仅序列,MSA和结构可选) data = { 'sequences': [sequence], # 'msa': [...], # 可选:多序列比对 # 'str_emb': np.random.randn(1, 50, 384) # 可选:结构嵌入 } transformed_batch = model.transform(data) with torch.no_grad(): embedding = model(transformed_batch) # 返回 embedding 的形状和前几个值作为示例 emb = embedding.cpu().numpy() return f"Embedding shape: {emb.shape}\n\n前10个值示例: {emb.flatten()[:10].tolist()}" # Gradio 界面 iface = gr.Interface( fn=predict_protein, inputs=gr.Textbox(label="输入蛋白质序列 (e.g. ACDEFGHIKLMNPQRSTVWY)", lines=5, placeholder="请输入氨基酸序列..."), outputs=gr.Textbox(label="模型输出 (Embedding)"), title="AIDO.Protein-RAG-16B Demo", description="输入蛋白序列,获取模型的嵌入表示。注意:16B 模型较大,首次加载需要时间。", examples=[["ACDEFGHIKLMNPQRSTVWY"], ["MTEITAAMVKELRESTGAGA"]], allow_flagging="never" ) if __name__ == "__main__": iface.launch()