sedaklc's picture
Upload app.py with huggingface_hub
efe6fb2 verified
raw
history blame
3.66 kB
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
MODEL_ID = "codellama/CodeLlama-7b-hf"
ADAPTER_ID = "sedaklc/codellama-7b-qlora-humaneval"
print("Loading model...")
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(ADAPTER_ID)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
base_model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
quantization_config=bnb_config,
device_map="auto",
torch_dtype=torch.bfloat16,
)
model = PeftModel.from_pretrained(base_model, ADAPTER_ID)
model.eval()
print("Model ready.")
def generate_completion(docstring: str, temperature: float, max_new_tokens: int) -> str:
if not docstring.strip():
return ""
prompt = f"[INST] {docstring.strip()} [/INST]\n"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=int(max_new_tokens),
temperature=temperature,
top_p=0.95,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
)
new_tokens = output[0][inputs["input_ids"].shape[1]:]
return tokenizer.decode(new_tokens, skip_special_tokens=True)
EXAMPLES = [
["Return n-th Fibonacci number.", 0.2, 256],
["Filter an input list of strings only for ones that start with a given prefix.", 0.2, 256],
["Return True if list elements are monotonically increasing or decreasing.\n>>> monotonic([1, 2, 4, 20])\nTrue\n>>> monotonic([1, 20, 4, 10])\nFalse", 0.2, 256],
["Return median of elements in the list l.\n>>> median([3, 1, 2, 4, 5])\n3\n>>> median([-10, 4, 6, 1000, 10, 3])\n8.0", 0.2, 256],
["Return list of prime factors of given integer in the order from smallest to largest.\n>>> factorize(8)\n[2, 2, 2]\n>>> factorize(25)\n[5, 5]", 0.2, 256],
]
with gr.Blocks(title="CodeLlama-7B QLoRA — Python Code Completion") as demo:
gr.Markdown(
"""
# CodeLlama-7B QLoRA — Python Code Completion
Fine-tuned on CodeSearchNet Python with LoRA (rank=8) and evaluated on HumanEval.
**Results:** pass@1 = 26.83% · pass@5 = 35.91% · pass@10 = 38.41%
Model: [`sedaklc/codellama-7b-qlora-humaneval`](https://huggingface.co/sedaklc/codellama-7b-qlora-humaneval)
"""
)
with gr.Row():
with gr.Column():
docstring = gr.Textbox(
label="Python function docstring",
placeholder="Describe the function you want implemented...",
lines=6,
)
with gr.Row():
temperature = gr.Slider(
minimum=0.01, maximum=1.0, value=0.2, step=0.01, label="Temperature"
)
max_tokens = gr.Slider(
minimum=64, maximum=512, value=256, step=32, label="Max new tokens"
)
submit_btn = gr.Button("Generate", variant="primary")
with gr.Column():
output = gr.Textbox(label="Generated code", lines=16, show_copy_button=True)
gr.Examples(
examples=EXAMPLES,
inputs=[docstring, temperature, max_tokens],
outputs=output,
fn=generate_completion,
cache_examples=False,
)
submit_btn.click(fn=generate_completion, inputs=[docstring, temperature, max_tokens], outputs=output)
if __name__ == "__main__":
demo.launch()