import gradio as gr from keybert import KeyBERT from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import nltk nltk.download('punkt_tab') device="cpu" kw_model = KeyBERT() tokenizer = AutoTokenizer.from_pretrained("itskavya/t5-small-finetuned-titlegen2") # this is where the model is saved on hf, can load it n use it model = AutoModelForSeq2SeqLM.from_pretrained("itskavya/t5-small-finetuned-titlegen2") model.to(device) max_input_length=512 def predict(text): inputs = ["summarize: " + text] inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, return_tensors="pt").to(device) output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64) # num beans 8 means explore 8 sequences, sample introduces randomness decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0] predicted_title = nltk.sent_tokenize(decoded_output.strip())[0] keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), top_n=5) formatted_keywords = ", ".join([kw[0] for kw in keywords]) return predicted_title, formatted_keywords # Create the Gradio interface interface = gr.Interface( fn=predict, # function to call for prediction inputs=[ # inputs that user will provide gr.Textbox(label="Enter abstract..."), ], outputs=[gr.Textbox(label="Title"), # outputs for title n keyword gr.Textbox(label="Keywords"),], title="Automated Title and Keyword Extraction from Research Abstracts", description="This app uses the abstract of a scientific research article to automatically generate relevant and impactful titles and keywords!" ) # Launch the app interface.launch(share=True)