File size: 1,714 Bytes
8c2d24b 4597b4d 8c2d24b 4597b4d 8c2d24b bd49eeb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
from keybert import KeyBERT
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import nltk
nltk.download('punkt_tab')
device="cpu"
kw_model = KeyBERT()
tokenizer = AutoTokenizer.from_pretrained("itskavya/t5-small-finetuned-titlegen2") # this is where the model is saved on hf, can load it n use it
model = AutoModelForSeq2SeqLM.from_pretrained("itskavya/t5-small-finetuned-titlegen2")
model.to(device)
max_input_length=512
def predict(text):
inputs = ["summarize: " + text]
inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, return_tensors="pt").to(device)
output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64) # num beans 8 means explore 8 sequences, sample introduces randomness
decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
predicted_title = nltk.sent_tokenize(decoded_output.strip())[0]
keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), top_n=5)
formatted_keywords = ", ".join([kw[0] for kw in keywords])
return predicted_title, formatted_keywords
# Create the Gradio interface
interface = gr.Interface(
fn=predict, # function to call for prediction
inputs=[ # inputs that user will provide
gr.Textbox(label="Enter abstract..."),
],
outputs=[gr.Textbox(label="Title"), # outputs for title n keyword
gr.Textbox(label="Keywords"),],
title="Automated Title and Keyword Extraction from Research Abstracts",
description="This app uses the abstract of a scientific research article to automatically generate relevant and impactful titles and keywords!"
)
# Launch the app
interface.launch(share=True) |