itskavya's picture
Update app.py
4597b4d verified
import gradio as gr
from keybert import KeyBERT
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import nltk
nltk.download('punkt_tab')
device="cpu"
kw_model = KeyBERT()
tokenizer = AutoTokenizer.from_pretrained("itskavya/t5-small-finetuned-titlegen2") # this is where the model is saved on hf, can load it n use it
model = AutoModelForSeq2SeqLM.from_pretrained("itskavya/t5-small-finetuned-titlegen2")
model.to(device)
max_input_length=512
def predict(text):
inputs = ["summarize: " + text]
inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, return_tensors="pt").to(device)
output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64) # num beans 8 means explore 8 sequences, sample introduces randomness
decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
predicted_title = nltk.sent_tokenize(decoded_output.strip())[0]
keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), top_n=5)
formatted_keywords = ", ".join([kw[0] for kw in keywords])
return predicted_title, formatted_keywords
# Create the Gradio interface
interface = gr.Interface(
fn=predict, # function to call for prediction
inputs=[ # inputs that user will provide
gr.Textbox(label="Enter abstract..."),
],
outputs=[gr.Textbox(label="Title"), # outputs for title n keyword
gr.Textbox(label="Keywords"),],
title="Automated Title and Keyword Extraction from Research Abstracts",
description="This app uses the abstract of a scientific research article to automatically generate relevant and impactful titles and keywords!"
)
# Launch the app
interface.launch(share=True)