Spaces:
Runtime error
Runtime error
| import tensorflow as tf | |
| import gradio as gr | |
| from webScraping import parseURL | |
| import spacy | |
| ''' | |
| Preprocessing Abstract data from given url | |
| ''' | |
| def preprocess_abstract_data(url): | |
| abstract_paragraph = parseURL(url) | |
| nlp =spacy.load('en_core_web_lg') | |
| nlp.add_pipe("sentencizer") #will add sentencizer to NLP pipeline,responsible for splitting a paragraph into individual sentence | |
| doc= nlp(abstract_paragraph) | |
| abstract_sentences = [str(sentence) for sentence in list(doc.sents)] | |
| abstract_sent_dictList=[] | |
| #Total Line parameter | |
| total_line_in_abstract = len(abstract_sentences)-1 #as indexing will start from 0 in for loop | |
| for index,value in enumerate(abstract_sentences): | |
| abstract_sent_dict={} | |
| abstract_sent_dict['line_number'] = index #line no parameter | |
| abstract_sent_dict['text'] =value #text | |
| abstract_sent_dict['total_lines'] = total_line_in_abstract | |
| abstract_sent_dict['line_position'] = str(index) +"_of_" + str(total_line_in_abstract) | |
| abstract_sent_dictList.append(abstract_sent_dict) | |
| # Get all line_number values from sample abstract | |
| test_abstract_line_numbers = [sent_dictionary['line_number'] for sent_dictionary in abstract_sent_dictList] | |
| # One-hot encode to same depth as training data, so model accepts right input shape | |
| abstract_line_numbers_one_hot = tf.one_hot(test_abstract_line_numbers,depth=15) | |
| test_abstract_total_lines = [sent_dictionary['total_lines'] for sent_dictionary in abstract_sent_dictList] | |
| abstract_total_lines_one_hot = tf.one_hot(test_abstract_total_lines,20) | |
| #list(sample_sentence) will convert into list of characters | |
| abstract_characters = [' '.join(list(sentence)) for sentence in abstract_sentences] | |
| return abstract_sentences,abstract_characters,abstract_line_numbers_one_hot,abstract_total_lines_one_hot | |
| ''' | |
| Making Prediction | |
| ''' | |
| def make_prediction(url): | |
| abstract_sentences,abstract_characters,abstract_line_numbers_one_hot,abstract_total_lines_one_hot=preprocess_abstract_data(url) | |
| class_names= ['BACKGROUND','CONCLUSIONS','METHODS','OBJECTIVE','RESULTS'] #our model has encode in this format that's why | |
| skimlit_model = tf.keras.models.load_model('tribid_token_char_lineNo_totalLine_embedded_model') | |
| skimlit_model_pred_prob = skimlit_model.predict(x=((tf.constant(abstract_sentences), | |
| tf.constant(abstract_characters), | |
| abstract_line_numbers_one_hot, | |
| abstract_total_lines_one_hot | |
| ))) | |
| skimlit_model_preds = tf.argmax(skimlit_model_pred_prob,axis=1) | |
| skimlit_model_prediction_label = [class_names[prediction] for prediction in skimlit_model_preds] | |
| # for text,target in zip(abstract_sentences,skimlit_model_prediction_label): | |
| # #print(f"for Line Position:\n{linePosition}\nText:\n{text}\nPredicted Target Label:\n{target}\n") | |
| # print(f"{target} : {text}") | |
| background_text,conclusion_text,methods_text,objective_text,results_text ="", "", "", "", "" | |
| for text,target in zip(abstract_sentences,skimlit_model_prediction_label): | |
| if target=="BACKGROUND": | |
| background_text +=text | |
| elif target == "CONCLUSIONS": | |
| conclusion_text +=text | |
| elif target == "METHODS": | |
| methods_text += text | |
| elif target == "OBJECTIVE": | |
| objective_text += text | |
| elif target == "RESULTS": | |
| results_text += text | |
| #gradio output component | |
| abstract_output = [objective_text,background_text,methods_text,results_text,conclusion_text] | |
| return abstract_output | |
| inputs = gr.Textbox(placeholder="Paste your PubMed article URL here",interactive=True,label="URL") | |
| outputs=[gr.Textbox(label="OBJECTIVE"), | |
| gr.Textbox(label="BACKGROUND"), | |
| gr.Textbox(label="METHODS"), | |
| gr.Textbox(label="RESULTS"), | |
| gr.Textbox(label="CONCLUSIONS")] | |
| examples = [ | |
| ["https://pubmed.ncbi.nlm.nih.gov/20232240/"], | |
| ["https://pubmed.ncbi.nlm.nih.gov/22244707/"] | |
| ] | |
| app = gr.Interface(fn=make_prediction, | |
| inputs=inputs, | |
| outputs=outputs, | |
| title="PubMed Article Abstract Skimming Tool", | |
| description="Classifies abstract sentences of your PubMed article into the role they play (e.g. objective, methods, results, etc) to get the overview of literature within seconds.", | |
| theme="soft", | |
| examples=examples, | |
| ) | |
| app.launch() |