|
|
import gradio as gr |
|
|
from keybert import KeyBERT |
|
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
import nltk |
|
|
nltk.download('punkt_tab') |
|
|
|
|
|
device="cpu" |
|
|
kw_model = KeyBERT() |
|
|
tokenizer = AutoTokenizer.from_pretrained("itskavya/t5-small-finetuned-titlegen2") |
|
|
model = AutoModelForSeq2SeqLM.from_pretrained("itskavya/t5-small-finetuned-titlegen2") |
|
|
model.to(device) |
|
|
max_input_length=512 |
|
|
|
|
|
def predict(text): |
|
|
inputs = ["summarize: " + text] |
|
|
|
|
|
inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, return_tensors="pt").to(device) |
|
|
output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64) |
|
|
decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0] |
|
|
predicted_title = nltk.sent_tokenize(decoded_output.strip())[0] |
|
|
|
|
|
keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), top_n=5) |
|
|
formatted_keywords = ", ".join([kw[0] for kw in keywords]) |
|
|
|
|
|
return predicted_title, formatted_keywords |
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=predict, |
|
|
inputs=[ |
|
|
gr.Textbox(label="Enter abstract..."), |
|
|
|
|
|
|
|
|
], |
|
|
outputs=[gr.Textbox(label="Title"), |
|
|
gr.Textbox(label="Keywords"),], |
|
|
title="Automated Title and Keyword Extraction from Research Abstracts", |
|
|
description="This app uses the abstract of a scientific research article to automatically generate relevant and impactful titles and keywords!" |
|
|
) |
|
|
|
|
|
|
|
|
interface.launch(share=True) |