esp-ai-lora / app.py
DilipKY's picture
Update app.py
3e29315 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import torch
# Load the model and tokenizer with 4-bit quantization
model_name = "codellama/CodeLlama-3b-hf" # Use a smaller model
tokenizer = AutoTokenizer.from_pretrained(model_name)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="auto"
)
# Create a text generation pipeline
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
# Define the Gradio interface
def generate_code(instruction):
prompt = f"Instruction: {instruction}\nResponse:"
output = generator(prompt, max_length=100, num_return_sequences=1) # Limit max_length
return output[0]["generated_text"]
gr.Interface(
fn=generate_code,
inputs=gr.Textbox(label="Enter your coding task:"),
outputs=gr.Textbox(label="Generated Code"),
title="Espa AI - Code Assistant"
).launch()