import gradio as gr
from keybert import KeyBERT
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import nltk
nltk.download('punkt_tab')

device="cpu"
kw_model = KeyBERT()
tokenizer = AutoTokenizer.from_pretrained("itskavya/t5-small-finetuned-titlegen2") # this is where the model is saved on hf, can load it n use it
model = AutoModelForSeq2SeqLM.from_pretrained("itskavya/t5-small-finetuned-titlegen2")
model.to(device)
max_input_length=512

def predict(text):
  inputs = ["summarize: " + text]

  inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, return_tensors="pt").to(device)
  output = model.generate(**inputs, num_beams=8, do_sample=True, min_length=10, max_length=64) # num beans 8 means explore 8 sequences, sample introduces randomness
  decoded_output = tokenizer.batch_decode(output, skip_special_tokens=True)[0]
  predicted_title = nltk.sent_tokenize(decoded_output.strip())[0]

  keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), top_n=5)
  formatted_keywords = ", ".join([kw[0] for kw in keywords])

  return predicted_title, formatted_keywords

# Create the Gradio interface
interface = gr.Interface(
    fn=predict, # function to call for prediction
    inputs=[ # inputs that user will provide
        gr.Textbox(label="Enter abstract..."),


    ],
    outputs=[gr.Textbox(label="Title"), # outputs for title n keyword
             gr.Textbox(label="Keywords"),],
    title="Automated Title and Keyword Extraction from Research Abstracts",
    description="This app uses the abstract of a scientific research article to automatically generate relevant and impactful titles and keywords!"
)

# Launch the app
interface.launch(share=True)