Spaces:

itskavya
/

TitleGeneration-KeywordExtraction

Sleeping

App Files Files Community

TitleGeneration-KeywordExtraction / app.py

itskavya

Update app.py

4597b4d verified about 1 year ago

raw

history blame contribute delete

1.71 kB

	import gradio as gr
	from keybert import KeyBERT
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	import nltk
	nltk.download('punkt_tab')

	device="cpu"
	kw_model = KeyBERT()
	tokenizer = AutoTokenizer.from_pretrained("itskavya/t5-small-finetuned-titlegen2") # this is where the model is saved on hf, can load it n use it
	model = AutoModelForSeq2SeqLM.from_pretrained("itskavya/t5-small-finetuned-titlegen2")
	model.to(device)
	max_input_length=512

	def predict(text):
	inputs = ["summarize: " + text]

	inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, return_tensors="pt").to(device)
	output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64) # num beans 8 means explore 8 sequences, sample introduces randomness
	decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
	predicted_title = nltk.sent_tokenize(decoded_output.strip())[0]

	keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), top_n=5)
	formatted_keywords = ", ".join([kw[0] for kw in keywords])

	return predicted_title, formatted_keywords

	# Create the Gradio interface
	interface = gr.Interface(
	fn=predict, # function to call for prediction
	inputs=[ # inputs that user will provide
	gr.Textbox(label="Enter abstract..."),


	],
	outputs=[gr.Textbox(label="Title"), # outputs for title n keyword
	gr.Textbox(label="Keywords"),],
	title="Automated Title and Keyword Extraction from Research Abstracts",
	description="This app uses the abstract of a scientific research article to automatically generate relevant and impactful titles and keywords!"
	)

	# Launch the app
	interface.launch(share=True)