Spaces:
Sleeping
Sleeping
| import torch | |
| from src.snapconfig import config | |
| def generate_percolator_input(l_pep_inds, l_pep_vals, pd_dataset, spec_dataset, res_type): | |
| assert res_type == "target" or res_type == "decoy" | |
| assert len(l_pep_inds) == len(l_pep_vals) | |
| pin_charge = config.get_config(section="search", key="charge") | |
| l_global_out = [] | |
| tot_count = 0 | |
| max_snap = torch.max(l_pep_vals).item() | |
| for idx, (pep_inds_row, pep_vals_row) in enumerate(zip(l_pep_inds, l_pep_vals)): | |
| # Reminder: pep_inds_row length is one less than pep_vals_row | |
| for iidx in range(len(pep_inds_row) - 1): | |
| pep_ind = pep_inds_row[iidx] | |
| pep_val = pep_vals_row[iidx] | |
| if pep_val.item() > 0: | |
| charge = [0] * pin_charge | |
| ch_idx = min(spec_dataset.spec_charge_list[idx], 5) | |
| spec_idx = spec_dataset.spec_ids[idx] | |
| charge[ch_idx - 1] = 1 | |
| label = 1 if res_type == "target" else -1 | |
| out_row = [f"{res_type}-{tot_count}", label, spec_idx, pep_val.item()] | |
| spec_mass = spec_dataset.spec_mass_list[idx] | |
| pep_mass = pd_dataset.pep_mass_list[pep_ind.item()] | |
| out_row.append(spec_mass) | |
| out_row.append(pep_mass) | |
| # out_row.append(((pep_val - pep_vals_row[iidx + 1]).item()) / max_snap) | |
| # out_row.append(((pep_val - pep_vals_row[-1]).item()) / max_snap) | |
| out_row.append(((pep_val - pep_vals_row[-3]).item()) / max(pep_val.item(), 1.0)) | |
| out_row.append(((pep_val - pep_vals_row[iidx + 1]).item()) / max(pep_val.item(), 1.0)) | |
| out_row.extend(charge) | |
| out_row.append(spec_mass - pep_mass) | |
| out_row.append(abs(spec_mass - pep_mass)) | |
| out_pep = pd_dataset.pep_list[pep_ind.item()] | |
| out_pep_array = [] | |
| for aa in out_pep: | |
| if aa.islower(): | |
| out_pep_array.append("["+str(config.AAMass[aa])+"]") | |
| else: | |
| out_pep_array.append(aa) | |
| out_pep = "".join(out_pep_array) | |
| out_row.append((out_pep.count("K") + out_pep.count("R")) - (out_pep.count("KP") + out_pep.count("RP")) - 1) | |
| out_prot = pd_dataset.prot_list[pep_ind.item()] | |
| pep_len = sum([a.isupper() for a in out_pep]) | |
| out_row.append(pep_len) | |
| out_row.append(out_pep) | |
| out_row.append(out_prot) | |
| l_global_out.append(out_row) | |
| tot_count += 1 | |
| return l_global_out |