Spaces:

puji4ml
/

PubMedAbstractSkimmingTool

Runtime error

App Files Files Community

PubMedAbstractSkimmingTool / app.py

puji4ml

Update app.py

d038b75 verified over 2 years ago

raw

history blame contribute delete

4.68 kB

	import tensorflow as tf
	import gradio as gr
	from webScraping import parseURL
	import spacy


	'''
	Preprocessing Abstract data from given url
	'''
	def preprocess_abstract_data(url):
	abstract_paragraph = parseURL(url)
	nlp =spacy.load('en_core_web_lg')
	nlp.add_pipe("sentencizer") #will add sentencizer to NLP pipeline,responsible for splitting a paragraph into individual sentence
	doc= nlp(abstract_paragraph)
	abstract_sentences = [str(sentence) for sentence in list(doc.sents)]
	abstract_sent_dictList=[]

	#Total Line parameter
	total_line_in_abstract = len(abstract_sentences)-1 #as indexing will start from 0 in for loop

	for index,value in enumerate(abstract_sentences):
	abstract_sent_dict={}
	abstract_sent_dict['line_number'] = index #line no parameter
	abstract_sent_dict['text'] =value #text
	abstract_sent_dict['total_lines'] = total_line_in_abstract
	abstract_sent_dict['line_position'] = str(index) +"_of_" + str(total_line_in_abstract)
	abstract_sent_dictList.append(abstract_sent_dict)

	# Get all line_number values from sample abstract
	test_abstract_line_numbers = [sent_dictionary['line_number'] for sent_dictionary in abstract_sent_dictList]
	# One-hot encode to same depth as training data, so model accepts right input shape
	abstract_line_numbers_one_hot = tf.one_hot(test_abstract_line_numbers,depth=15)

	test_abstract_total_lines = [sent_dictionary['total_lines'] for sent_dictionary in abstract_sent_dictList]
	abstract_total_lines_one_hot = tf.one_hot(test_abstract_total_lines,20)

	#list(sample_sentence) will convert into list of characters
	abstract_characters = [' '.join(list(sentence)) for sentence in abstract_sentences]
	return abstract_sentences,abstract_characters,abstract_line_numbers_one_hot,abstract_total_lines_one_hot

	'''
	Making Prediction
	'''

	def make_prediction(url):
	abstract_sentences,abstract_characters,abstract_line_numbers_one_hot,abstract_total_lines_one_hot=preprocess_abstract_data(url)
	class_names= ['BACKGROUND','CONCLUSIONS','METHODS','OBJECTIVE','RESULTS'] #our model has encode in this format that's why
	skimlit_model = tf.keras.models.load_model('tribid_token_char_lineNo_totalLine_embedded_model')
	skimlit_model_pred_prob = skimlit_model.predict(x=((tf.constant(abstract_sentences),
	tf.constant(abstract_characters),
	abstract_line_numbers_one_hot,
	abstract_total_lines_one_hot
	)))
	skimlit_model_preds = tf.argmax(skimlit_model_pred_prob,axis=1)
	skimlit_model_prediction_label = [class_names[prediction] for prediction in skimlit_model_preds]
	# for text,target in zip(abstract_sentences,skimlit_model_prediction_label):
	# #print(f"for Line Position:\n{linePosition}\nText:\n{text}\nPredicted Target Label:\n{target}\n")
	# print(f"{target} : {text}")


	background_text,conclusion_text,methods_text,objective_text,results_text ="", "", "", "", ""
	for text,target in zip(abstract_sentences,skimlit_model_prediction_label):
	if target=="BACKGROUND":
	background_text +=text
	elif target == "CONCLUSIONS":
	conclusion_text +=text
	elif target == "METHODS":
	methods_text += text
	elif target == "OBJECTIVE":
	objective_text += text
	elif target == "RESULTS":
	results_text += text

	#gradio output component
	abstract_output = [objective_text,background_text,methods_text,results_text,conclusion_text]
	return abstract_output

	inputs = gr.Textbox(placeholder="Paste your PubMed article URL here",interactive=True,label="URL")
	outputs=[gr.Textbox(label="OBJECTIVE"),
	gr.Textbox(label="BACKGROUND"),
	gr.Textbox(label="METHODS"),
	gr.Textbox(label="RESULTS"),
	gr.Textbox(label="CONCLUSIONS")]

	examples = [
	["https://pubmed.ncbi.nlm.nih.gov/20232240/"],
	["https://pubmed.ncbi.nlm.nih.gov/22244707/"]
	]

	app = gr.Interface(fn=make_prediction,
	inputs=inputs,
	outputs=outputs,
	title="PubMed Article Abstract Skimming Tool",
	description="Classifies abstract sentences of your PubMed article into the role they play (e.g. objective, methods, results, etc) to get the overview of literature within seconds.",
	theme="soft",
	examples=examples,
	)
	app.launch()