import gradio as gr import torch from transformers import GPT2Tokenizer, GPT2LMHeadModel checkpoint = "gpt2" tokenizer = GPT2Tokenizer.from_pretrained(checkpoint) # Load the fine-tuned model and tokenizer my_model = GPT2LMHeadModel.from_pretrained("jeevana/GenerativeQnASystem") my_tokenizer = GPT2Tokenizer.from_pretrained("jeevana/GenerativeQnASystem") def generate_response(model, tokenizer, prompt): input_ids = tokenizer.encode(prompt, return_tensors="pt",truncation=True, max_length=1000) # Create the attention mask and pad token id attention_mask = torch.ones_like(input_ids) pad_token_id = tokenizer.eos_token_id output = model.generate( input_ids, max_new_tokens=70, min_new_tokens = 1, num_return_sequences=1, attention_mask=attention_mask, pad_token_id=pad_token_id ) qna = tokenizer.decode(output[0], skip_special_tokens=True) answer = qna[len(prompt)+9: ] return answer def generative_qna(input): response = generate_response(my_model, my_tokenizer, input) return response # def generative_qna(input): # print(input) # return input app = gr.Interface(fn=generative_qna, inputs=[gr.Textbox(label="Question", lines=3)], outputs=[gr.Textbox(label="Answer", lines=6)], title="Generative QnA System", description="Generative QnA with GPT2" ) app.launch(share=True, debug=True) # gr.load("models/jeevana/GenerativeQnASystem").launch()