melvinalves commited on
Commit
b1ecb63
Β·
verified Β·
1 Parent(s): a4287bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -51
app.py CHANGED
@@ -59,7 +59,7 @@ mlb = joblib.load(download_file("data/mlb_597.pkl"))
59
  GO = mlb.classes_
60
 
61
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” UI β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
62
- st.title("πŸ”¬ PrediΓ§Γ£o de FunΓ§Γ΅es Moleculares de ProteΓ­nas")
63
 
64
  st.markdown(
65
  """
@@ -68,59 +68,64 @@ st.markdown(
68
  unsafe_allow_html=True,
69
  )
70
 
71
- fasta_input = st.text_area("Insere a sequΓͺncia FASTA:", height=200)
72
  predict_clicked = st.button("Prever GO terms")
73
 
74
- if predict_clicked:
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- # ——— 1) PRÉ-PROCESSAMENTO FASTA ———
77
- lines = fasta_input.splitlines()
78
- header = next((l for l in lines if l.startswith(">")), None)
79
- seq = "".join(l.strip() for l in lines if not l.startswith(">")).replace(" ", "").upper()
80
 
81
- if not seq:
82
- st.warning("Por favor, insere primeiro uma sequΓͺncia FASTA vΓ‘lida.")
83
  st.stop()
84
 
85
- if header:
86
- st.markdown(f"**🧬 ID da proteína:** `{header[1:].strip()}`")
87
-
88
- # β€”β€”β€” 2) EMBEDDINGS β€”β€”β€”
89
- with st.spinner("⏳ A gerar embeddings…"):
90
- emb_pb = embed_seq("Rostlab/prot_bert", seq, CHUNK_PB)
91
- emb_bfd = embed_seq("Rostlab/prot_bert_bfd", seq, CHUNK_PB)
92
- emb_esm = embed_seq("facebook/esm2_t33_650M_UR50D", seq, CHUNK_ESM)
93
-
94
- # β€”β€”β€” 3) PREDIÇÕES β€”β€”β€”
95
- with st.spinner("🧠 A fazer prediΓ§Γ΅es…"):
96
- y_pb = mlp_pb.predict(emb_pb)
97
- y_bfd = mlp_bfd.predict(emb_bfd)
98
- y_esm = mlp_esm.predict(emb_esm)[:, :597]
99
- X = np.concatenate([y_pb, y_bfd, y_esm], axis=1)
100
- y_ens = stacking.predict(X)
101
-
102
- # β€”β€”β€” 4) RESULTADOS β€”β€”β€”
103
- def mostrar(tag, y_pred):
104
- with st.expander(tag, expanded=(tag == "Ensemble (Stacking)")):
105
- hits = mlb.inverse_transform((y_pred >= THRESH).astype(int))[0]
106
- st.markdown(f"**GO terms com prob β‰₯ {THRESH}**")
107
- if hits:
108
- for go_id in hits:
109
- name, defin = GO_INFO.get(go_id, ("β€” sem nome β€”", "β€” sem definiΓ§Γ£o β€”"))
110
- st.write(f"**{go_id} β€” {name}**")
111
- st.caption(defin)
112
- else:
113
- st.code("β€” nenhum β€”")
114
-
115
- st.markdown(f"**Top {TOP_N} GO terms mais provΓ‘veis**")
116
- for idx in np.argsort(-y_pred[0])[:TOP_N]:
117
- go_id = GO[idx]
118
- name, _ = GO_INFO.get(go_id, ("", ""))
119
- st.write(f"{go_id} β€” {name} : {y_pred[0][idx]:.4f}")
120
-
121
- # β€”β€”β€” MOSTRAR APENAS O ENSEMBLE POR DEFEITO β€”β€”β€”
122
-
123
- # mostrar("ProtBERT (MLP)", y_pb)
124
- # mostrar("ProtBERT-BFD (MLP)", y_bfd)
125
- # mostrar("ESM-2 (MLP)", y_esm)
126
- mostrar("Ensemble (Stacking)", y_ens)
 
59
  GO = mlb.classes_
60
 
61
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” UI β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
62
+ st.title("PrediΓ§Γ£o de FunΓ§Γ΅es Moleculares de ProteΓ­nas")
63
 
64
  st.markdown(
65
  """
 
68
  unsafe_allow_html=True,
69
  )
70
 
71
+ fasta_input = st.text_area("Insere uma ou mais sequΓͺncias FASTA:", height=300)
72
  predict_clicked = st.button("Prever GO terms")
73
 
74
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” PARSE DE MÚLTIPLAS SEQUÊNCIAS β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
75
+ def parse_fasta_multiple(fasta_str):
76
+ entries = fasta_str.strip().split(">")
77
+ parsed = []
78
+ for entry in entries:
79
+ if not entry.strip():
80
+ continue
81
+ lines = entry.strip().splitlines()
82
+ header = lines[0].strip()
83
+ seq = "".join(l.strip() for l in lines[1:]).replace(" ", "").upper()
84
+ if seq:
85
+ parsed.append((header, seq))
86
+ return parsed
87
 
88
+ if predict_clicked:
89
+ parsed_seqs = parse_fasta_multiple(fasta_input)
 
 
90
 
91
+ if not parsed_seqs:
92
+ st.warning("NΓ£o foi possΓ­vel encontrar nenhuma sequΓͺncia vΓ‘lida.")
93
  st.stop()
94
 
95
+ for header, seq in parsed_seqs:
96
+ with st.spinner(f"A processar {header}…"):
97
+ emb_pb = embed_seq("Rostlab/prot_bert", seq, CHUNK_PB)
98
+ emb_bfd = embed_seq("Rostlab/prot_bert_bfd", seq, CHUNK_PB)
99
+ emb_esm = embed_seq("facebook/esm2_t33_650M_UR50D", seq, CHUNK_ESM)
100
+
101
+ y_pb = mlp_pb.predict(emb_pb)
102
+ y_bfd = mlp_bfd.predict(emb_bfd)
103
+ y_esm = mlp_esm.predict(emb_esm)[:, :597]
104
+ X = np.concatenate([y_pb, y_bfd, y_esm], axis=1)
105
+ y_ens = stacking.predict(X)
106
+
107
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” RESULTADOS β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
108
+ def mostrar(tag, y_pred):
109
+ with st.expander(tag, expanded=True):
110
+ hits = mlb.inverse_transform((y_pred >= THRESH).astype(int))[0]
111
+ st.markdown(f"**GO terms com prob β‰₯ {THRESH}**")
112
+ if hits:
113
+ for go_id in hits:
114
+ name, defin = GO_INFO.get(go_id, ("β€” sem nome β€”", "β€” sem definiΓ§Γ£o β€”"))
115
+ st.write(f"**{go_id} β€” {name}**")
116
+ st.caption(defin)
117
+ else:
118
+ st.code("β€” nenhum β€”")
119
+
120
+ st.markdown(f"**Top {TOP_N} GO terms mais provΓ‘veis**")
121
+ for idx in np.argsort(-y_pred[0])[:TOP_N]:
122
+ go_id = GO[idx]
123
+ name, _ = GO_INFO.get(go_id, ("", ""))
124
+ st.write(f"{go_id} β€” {name} : {y_pred[0][idx]:.4f}")
125
+
126
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” ESCOLHE QUAIS MOSTRAR β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€” #
127
+
128
+ # mostrar(f"{header} β€” ProtBERT (MLP)", y_pb)
129
+ # mostrar(f"{header} β€” ProtBERT-BFD (MLP)", y_bfd)
130
+ # mostrar(f"{header} β€” ESM-2 (MLP)", y_esm)
131
+ mostrar(f"{header}", y_ens)