| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForTokenClassification, AutoTokenizer | |
| title = "Protien Token Classification 🧬." | |
| description = "Finds the position of Helix and Beta strand in the Protein Sequence." | |
| article = 'Created from finetuning ESM2_150M' | |
| model = AutoModelForTokenClassification.from_pretrained('./Model') | |
| tokenizer = AutoTokenizer.from_pretrained('facebook/esm2_t30_150M_UR50D') | |
| example_list = ['MENFTALFGAQADPPPPPTALGFGPGKPPPPPPPPAGGGPGTAPPPTAATAPPGADKSGAGCGPFYLMRELPGSTELTGSTNLITHYNLEQAYNKFCGKKVKEKLSNFLPDLPGMIDLPGSHDNSSLRSLIEKPPILSSSFNPITGTMLAGFRLHTGPLPEQCRLMHIQPPKKKNKHKHKQSRTQDPVPPETPSDSDHKKKKKKKEEDPDRKRKKKEKKKKKNRHSPDHPGMGSSQASSSSSLR', | |
| 'MAFSDLTSRTVHLYDNWIKDADPRVEDWLLMSSPLPQTILLGFYVYFVTSLGPKLMENRKPFELKKAMITYNFFIVLFSVYMCYEFVMSGWGIGYSFRCDIVDYSRSPTALRMARTCWLYYFSKFIELLDTIFFVLRKKNSQVTFLHVFHHTIMPWTWWFGVKFAAGGLGTFHALLNTAVHVVMYSYYGLSALGPAYQKYLWWKKYLTSLQLVQFVIVAIHISQFFFMEDCKYQFPVFACIIMSYSFMFLLLFLHFWYRAYTKGQRLPKTVKNGTCKNKDN', | |
| 'MYPSNKKKKVWREEKERLLKMTLEERRKEYLRDYIPLNSILSWKEEMKGKGQNDEENTQETSQVKKSLTEKVSLYRGDITLLEVDAIVNAANASLLGGGGVDGCIHRAAGPCLLAECRNLNGCDTGHAKITCGYDLPAKYVIHTVGPIARGHINGSHKEDLANCYKSSLKLVKENNIRSVAFPCISTGIYGFPNEPAAVIALNTIKEWLAKNHHEVDRIIFCVFLEVDFKIYKKKMNEFFSVDDNNEEEEDVEMKEDSDENGPEEKQSVEEMEEQSQDADGVNTVTVPGPASEEAVEDCKDEDFAKDENITKGGEVTDHSVRDQDHPDGQENDSTKNEIKIETESQSSYMETEELSSNQEDAVIVEQPEVIPLTEDQEEKEGEKAPGEDTPRMPGKSEGSSDLENTPGPDAGAQDEAKEQRNGTK', | |
| 'MAGQHLPVPRLEGVSREQFMQHLYPQRKPLVLEGIDLGPCTSKWTVDYLSQVGGKKEVKIHVAAVAQMDFISKNFVYRTLPFDQLVQRAAEEKHKEFFVSEDEKYYLRSLGEDPRKDVADIRKQFPLLKGDIKFPEFFKEEQFFSSVFRISSPGLQLWTHYDVMDNLLIQVTGKKRVVLFSPRDAQYLYLKGTKSEVLNIDNPDLAKYPLFSKARRYECSLEAGDVLFIPALWFHNVISEEFGVGVNIFWKHLPSECYDKTDTYGNKDPTAASRAAQILDRALKTLAELPEEYRDFYARRMVLHIQDKAYSKNSE', | |
| 'MEAGPPGSARPAEPGPCLSGQRGADHTASASLQSVAGTEPGRHPQAVAAVLPAGGCGERMGVPTPKQFCPILERPLISYTLQALERVCWIKDIVVAVTGENMEVMKSIIQKYQHKRISLVEAGVTRHRSIFNGLKALAEDQINSKLSKPEVVIIHDAVRPFVEEGVLLKVVTAAKEHGAAGAIRPLVSTVVSPSADGCLDYSLERARHRASEMPQAFLFDVIYEAYQQCSDYDLEFGTECLQLALKYCCTKAKLVEGSPDLWKVTYKRDLYAAESIIKERISQEICVVMDTEEDNKHVGHLLEEVLKSELNHVKVTSEALGHAGRHLQQIILDQCYNFVCVNVTTSDFQETQKLLSMLEESSLCILYPVVVVSVHFLDFKLVPPSQKMENLMQIREFAKEVKERNILLYGLLISYPQDDQKLQESLRQGAIIIASLIKERNSGLIGQLLIA'] | |
| def count_helix(helix): | |
| final = [] | |
| temp = [] | |
| for x in range(1, len(helix)): | |
| if helix[x] == helix[x-1] + 1: | |
| temp.append(helix[x-1]) | |
| temp.append(helix[x]) | |
| elif len(temp) != 0: | |
| final.append((temp[0], temp[-1])) | |
| temp = [] | |
| return final | |
| def count_strand(strand): | |
| final = [] | |
| temp = [] | |
| for x in range(1, len(strand)): | |
| if strand[x] == strand[x-1] + 1: | |
| temp.append(strand[x-1]) | |
| temp.append(strand[x]) | |
| elif len(temp) != 0: | |
| final.append((temp[0], temp[-1])) | |
| temp = [] | |
| return final | |
| def print_output1(helix): | |
| helix_op = count_helix(helix) | |
| if len(helix_op) != 0: | |
| str1 = str(helix_op)[1:-1] | |
| return str1 | |
| else: | |
| return str('No Helix found.') | |
| def print_output2(strand): | |
| strand_op = count_strand(strand) | |
| if len(strand_op) != 0: | |
| str1 = str(strand_op)[1:-1] | |
| return str1 | |
| else: | |
| return str('No Beta strand found.') | |
| def predict(ProtienSequence): | |
| input = tokenizer(ProtienSequence, return_tensors='pt') | |
| with torch.inference_mode(): | |
| outputs = model(**input) | |
| output = outputs.logits.argmax(axis=2)[0].numpy() | |
| helix = [] | |
| strand = [] | |
| for i in range(len(output)): | |
| if output[i] != 0: | |
| if output[i] == 1: | |
| helix.append(i+1) | |
| else: | |
| strand.append(i+1) | |
| return print_output1(helix), print_output2(strand) | |
| iface = gr.Interface(fn=predict, | |
| inputs='text', | |
| outputs=[gr.Text(label='Helix'), | |
| gr.Text(label='Beta Strand')], | |
| title=title, | |
| description=description, | |
| article=article, | |
| examples=example_list) | |
| iface.launch() |