Arko007 commited on
Commit
05fb6b7
·
verified ·
1 Parent(s): 208db11

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ==============================================================================
2
+ # V5 GRADIO DEPLOYMENT SCRIPT
3
+ # ==============================================================================
4
+ # This script creates a web UI to test your v5 model.
5
+ #
6
+ # TO DEPLOY ON HUGGING FACE SPACES:
7
+ # 1. Create a new Space and choose the "Gradio" SDK.
8
+ # 2. Select the free "T4 small" GPU hardware.
9
+ # 3. Create a file named `app.py` and paste this code into it.
10
+ # 4. Create a `requirements.txt` file and add the libraries listed below.
11
+ # ==============================================================================
12
+
13
+ # requirements.txt file contents:
14
+ # gradio
15
+ # transformers
16
+ # peft
17
+ # accelerate
18
+ # bitsandbytes
19
+ # torch
20
+
21
+ import torch
22
+ import gradio as gr
23
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
24
+
25
+ # --- Configuration ---
26
+ # Your final, v5 model on the Hugging Face Hub
27
+ MODEL_ID = "Arko007/my-awesome-code-assistant-v5"
28
+ BASE_MODEL_ID = "codellama/CodeLlama-7b-hf"
29
+
30
+ # --- Load the Model (Memory-Optimized) ---
31
+ print("Setting up 4-bit quantization...")
32
+ quantization_config = BitsAndBytesConfig(
33
+ load_in_4bit=True,
34
+ bnb_4bit_quant_type="nf4",
35
+ bnb_4bit_compute_dtype=torch.float16,
36
+ )
37
+
38
+ print(f"Loading fine-tuned model: {MODEL_ID}...")
39
+ model = AutoModelForCausalLM.from_pretrained(
40
+ MODEL_ID,
41
+ quantization_config=quantization_config,
42
+ device_map="auto"
43
+ )
44
+
45
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
46
+ tokenizer.pad_token = tokenizer.eos_token
47
+
48
+ print("✅ Model loaded successfully!")
49
+
50
+ # --- Inference Function ---
51
+ def generate_code(instruction):
52
+ """
53
+ Generates code from an instruction using the v5 model.
54
+ """
55
+ prompt = f"""### Instruction:
56
+ {instruction}
57
+
58
+ ### Response:"""
59
+
60
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
61
+
62
+ outputs = model.generate(
63
+ **inputs,
64
+ max_new_tokens=1024, # Give it plenty of room to write
65
+ temperature=0.1,
66
+ top_p=0.9,
67
+ eos_token_id=tokenizer.eos_token_id
68
+ )
69
+
70
+ response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
71
+ # Extract only the code part of the response
72
+ code_part = response_text.split("### Response:")[1].strip()
73
+ return code_part
74
+
75
+ # --- Create and Launch the Gradio Web App ---
76
+ print("Launching Gradio app...")
77
+
78
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
79
+ gr.Markdown("# 🤖 My Awesome Code Assistant (v5)")
80
+ gr.Markdown("Enter an instruction and I'll generate the code for you!")
81
+
82
+ with gr.Row():
83
+ instruction_box = gr.Textbox(lines=5, label="Instruction", placeholder="e.g., Write a Python function to sort a list of numbers.")
84
+ output_box = gr.Code(label="Generated Code", language="python")
85
+
86
+ generate_button = gr.Button("Generate Code", variant="primary")
87
+
88
+ generate_button.click(fn=generate_code, inputs=instruction_box, outputs=output_box)
89
+
90
+ # This will launch the app when deployed on Hugging Face Spaces
91
+ demo.launch()