sedaklc commited on
Commit
efe6fb2
·
verified ·
1 Parent(s): 947fd08

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +99 -0
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
+ from peft import PeftModel
5
+
6
+ MODEL_ID = "codellama/CodeLlama-7b-hf"
7
+ ADAPTER_ID = "sedaklc/codellama-7b-qlora-humaneval"
8
+
9
+ print("Loading model...")
10
+ bnb_config = BitsAndBytesConfig(
11
+ load_in_4bit=True,
12
+ bnb_4bit_quant_type="nf4",
13
+ bnb_4bit_use_double_quant=True,
14
+ bnb_4bit_compute_dtype=torch.bfloat16,
15
+ )
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained(ADAPTER_ID)
18
+ tokenizer.pad_token = tokenizer.eos_token
19
+ tokenizer.padding_side = "right"
20
+
21
+ base_model = AutoModelForCausalLM.from_pretrained(
22
+ MODEL_ID,
23
+ quantization_config=bnb_config,
24
+ device_map="auto",
25
+ torch_dtype=torch.bfloat16,
26
+ )
27
+ model = PeftModel.from_pretrained(base_model, ADAPTER_ID)
28
+ model.eval()
29
+ print("Model ready.")
30
+
31
+
32
+ def generate_completion(docstring: str, temperature: float, max_new_tokens: int) -> str:
33
+ if not docstring.strip():
34
+ return ""
35
+ prompt = f"[INST] {docstring.strip()} [/INST]\n"
36
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
37
+ with torch.no_grad():
38
+ output = model.generate(
39
+ **inputs,
40
+ max_new_tokens=int(max_new_tokens),
41
+ temperature=temperature,
42
+ top_p=0.95,
43
+ do_sample=True,
44
+ pad_token_id=tokenizer.eos_token_id,
45
+ )
46
+ new_tokens = output[0][inputs["input_ids"].shape[1]:]
47
+ return tokenizer.decode(new_tokens, skip_special_tokens=True)
48
+
49
+
50
+ EXAMPLES = [
51
+ ["Return n-th Fibonacci number.", 0.2, 256],
52
+ ["Filter an input list of strings only for ones that start with a given prefix.", 0.2, 256],
53
+ ["Return True if list elements are monotonically increasing or decreasing.\n>>> monotonic([1, 2, 4, 20])\nTrue\n>>> monotonic([1, 20, 4, 10])\nFalse", 0.2, 256],
54
+ ["Return median of elements in the list l.\n>>> median([3, 1, 2, 4, 5])\n3\n>>> median([-10, 4, 6, 1000, 10, 3])\n8.0", 0.2, 256],
55
+ ["Return list of prime factors of given integer in the order from smallest to largest.\n>>> factorize(8)\n[2, 2, 2]\n>>> factorize(25)\n[5, 5]", 0.2, 256],
56
+ ]
57
+
58
+ with gr.Blocks(title="CodeLlama-7B QLoRA — Python Code Completion") as demo:
59
+ gr.Markdown(
60
+ """
61
+ # CodeLlama-7B QLoRA — Python Code Completion
62
+
63
+ Fine-tuned on CodeSearchNet Python with LoRA (rank=8) and evaluated on HumanEval.
64
+ **Results:** pass@1 = 26.83% · pass@5 = 35.91% · pass@10 = 38.41%
65
+ Model: [`sedaklc/codellama-7b-qlora-humaneval`](https://huggingface.co/sedaklc/codellama-7b-qlora-humaneval)
66
+ """
67
+ )
68
+
69
+ with gr.Row():
70
+ with gr.Column():
71
+ docstring = gr.Textbox(
72
+ label="Python function docstring",
73
+ placeholder="Describe the function you want implemented...",
74
+ lines=6,
75
+ )
76
+ with gr.Row():
77
+ temperature = gr.Slider(
78
+ minimum=0.01, maximum=1.0, value=0.2, step=0.01, label="Temperature"
79
+ )
80
+ max_tokens = gr.Slider(
81
+ minimum=64, maximum=512, value=256, step=32, label="Max new tokens"
82
+ )
83
+ submit_btn = gr.Button("Generate", variant="primary")
84
+
85
+ with gr.Column():
86
+ output = gr.Textbox(label="Generated code", lines=16, show_copy_button=True)
87
+
88
+ gr.Examples(
89
+ examples=EXAMPLES,
90
+ inputs=[docstring, temperature, max_tokens],
91
+ outputs=output,
92
+ fn=generate_completion,
93
+ cache_examples=False,
94
+ )
95
+
96
+ submit_btn.click(fn=generate_completion, inputs=[docstring, temperature, max_tokens], outputs=output)
97
+
98
+ if __name__ == "__main__":
99
+ demo.launch()