heramb04's picture
Uploaded main app files
6b952ac verified
import os
import torch
import gradio as gr
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from dotenv import load_dotenv
# Load environment variables from .env (if needed)
load_dotenv()
# Path to the fine-tuned model (ensure the folder 'gpt2-finetuned' is in this project directory)
model_path = "gpt2-finetuned"
# Load the fine-tuned model and tokenizer
model = GPT2LMHeadModel.from_pretrained("heramb04/GPT2-Azure-DevOps")
tokenizer = GPT2Tokenizer.from_pretrained("heramb04/GPT2-Azure-DevOps")
# Ensure a padding token exists (GPT-2 doesn't have one by default)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Move model to appropriate device (GPU if available, else CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
def generate_response(prompt, max_new_tokens=100):
encoded = tokenizer(prompt, return_tensors="pt").to(device)
input_ids = encoded["input_ids"]
attention_mask = encoded["attention_mask"]
output_ids = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
max_new_tokens=max_new_tokens,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=1.0,
)
return tokenizer.decode(output_ids[0], skip_special_tokens=True)
# Create Gradio interface
demo = gr.Interface(
fn=generate_response,
inputs="text",
outputs="text",
title="Fine-tuned GPT-2 Q&A",
description="Try: what is Azure DevOps?."
)
if __name__ == "__main__":
# 'share=True' generates a public link while the app is running
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)