mayankpuvvala's picture
Update app.py
5a481d2 verified
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
model_name = "mayankpuvvala/peft_lora_t5_merged_model_pytorch_issues"
try:
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained("t5-small") # adjust if needed
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print("✅ Model loaded successfully.")
except Exception as e:
print("❌ Error loading model:", e)
model = None
def generate_response(prompt):
if model is None:
return "Model not loaded properly."
prompt = prompt.strip()
if not prompt:
return "Empty prompt."
if len(prompt.split()) > 150:
return "Prompt too long. Limit to ~150 words."
try:
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=200,
do_sample=True,
temperature=0.90,
top_k=50, # ADD THIS
top_p=0.95, # ADD THIS
eos_token_id=tokenizer.eos_token_id
)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
return result
except torch.cuda.OutOfMemoryError:
torch.cuda.empty_cache()
return "CUDA out of memory."
except Exception as e:
return f"Unexpected error: {str(e)}"
# Gradio interface
gr.Interface(
fn=generate_response,
inputs="text",
outputs="text",
title="PyTorch Issue Generator",
description="Enter a prompt describing a PyTorch issue scenario or mainly title."
).launch(share= True)