RedNinja6440 commited on
Commit
87297ac
·
verified ·
1 Parent(s): b3b5614

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -149
app.py CHANGED
@@ -1,11 +1,11 @@
1
  """
2
- Gradio App for Hugging Face Spaces - Qwen2.5-VL Verilog Assistant
3
- Clean version without unnecessary imports
4
  """
5
 
6
  import gradio as gr
7
  import torch
8
- from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer
9
  from peft import PeftModel
10
  import warnings
11
 
@@ -19,15 +19,23 @@ def load_model():
19
  try:
20
  base_model = "Qwen/Qwen2.5-VL-7B-Instruct"
21
 
22
- print("Loading base model...")
 
 
 
 
 
 
 
23
  model = Qwen2VLForConditionalGeneration.from_pretrained(
24
  base_model,
25
- torch_dtype=torch.float16,
26
  device_map="auto",
27
- trust_remote_code=True
 
28
  )
29
 
30
- print("Loading your adapter...")
31
  model = PeftModel.from_pretrained(
32
  model,
33
  "Shrestha2007/Qwen2.5-VL-7B-Verilog-Hackathon",
@@ -35,192 +43,81 @@ def load_model():
35
  )
36
 
37
  print("Loading tokenizer...")
38
- tokenizer = AutoTokenizer.from_pretrained(
39
- base_model,
40
- trust_remote_code=True
41
- )
42
 
43
- return "✅ Model loaded successfully! Ready to generate Verilog code."
44
 
45
  except Exception as e:
46
  import traceback
47
- error_details = traceback.format_exc()
48
- return f"❌ Error loading model:\n{str(e)}\n\nFull traceback:\n{error_details}"
49
 
50
  def generate(prompt, max_tokens, temperature):
51
- if model is None or tokenizer is None:
52
- return "❌ Please load the model first by clicking 'Load Model' button!"
53
 
54
  if not prompt.strip():
55
- return "❌ Please enter a prompt!"
56
 
57
  try:
58
- # Create chat messages
59
  messages = [
60
- {"role": "system", "content": "You are a helpful AI assistant specialized in Verilog hardware description language."},
61
  {"role": "user", "content": prompt}
62
  ]
63
 
64
- # Apply chat template
65
- text = tokenizer.apply_chat_template(
66
- messages,
67
- tokenize=False,
68
- add_generation_prompt=True
69
- )
70
-
71
- # Tokenize
72
- inputs = tokenizer(
73
- [text],
74
- return_tensors="pt",
75
- padding=True
76
- ).to(model.device)
77
 
78
- # Generate
79
  with torch.no_grad():
80
  output_ids = model.generate(
81
  **inputs,
82
  max_new_tokens=int(max_tokens),
83
  temperature=float(temperature) if temperature > 0 else 1e-6,
84
  do_sample=True if temperature > 0 else False,
85
- top_p=0.9,
86
- pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id,
87
- eos_token_id=tokenizer.eos_token_id,
88
  )
89
 
90
- # Decode only the generated part
91
  generated_ids = output_ids[:, inputs['input_ids'].shape[1]:]
92
- response = tokenizer.batch_decode(
93
- generated_ids,
94
- skip_special_tokens=True,
95
- clean_up_tokenization_spaces=True
96
- )[0]
97
-
98
- return response
99
 
100
  except Exception as e:
101
- import traceback
102
- error_details = traceback.format_exc()
103
- return f"❌ Generation error:\n{str(e)}\n\nTraceback:\n{error_details}"
104
 
105
- # Create Gradio Interface
106
- with gr.Blocks(title="Verilog AI Assistant") as demo:
107
-
108
  gr.Markdown("""
109
- # 🔧 Qwen2.5-VL-7B Verilog Assistant
110
-
111
- Fine-tuned model specialized for **Verilog Hardware Description Language**
112
-
113
- **What I can do:**
114
- - ⚡ Generate Verilog modules for digital circuits
115
- - 📚 Explain Verilog concepts and syntax
116
- - 🧪 Create testbenches and test cases
117
- - 💡 Answer hardware design questions
118
- - 🔍 Debug and improve Verilog code
119
  """)
120
 
121
  with gr.Row():
122
- with gr.Column(scale=2):
123
- load_btn = gr.Button(
124
- "🚀 Load Model",
125
- variant="primary",
126
- size="lg"
127
- )
128
- with gr.Column(scale=3):
129
- status = gr.Textbox(
130
- label="Model Status",
131
- value="⏳ Click 'Load Model' to initialize the AI assistant",
132
- interactive=False,
133
- lines=2
134
- )
135
 
136
- load_btn.click(fn=load_model, outputs=status)
137
 
138
  gr.Markdown("---")
139
 
140
  with gr.Row():
141
  with gr.Column():
142
- prompt = gr.Textbox(
143
- label="💬 Your Prompt",
144
- placeholder="Example: Write a Verilog module for a 4-bit adder with carry...",
145
- lines=7
146
- )
147
 
148
- with gr.Accordion("⚙️ Generation Settings", open=False):
149
- max_tokens = gr.Slider(
150
- minimum=128,
151
- maximum=1024,
152
- value=512,
153
- step=64,
154
- label="Max Output Tokens",
155
- info="Higher = longer responses"
156
- )
157
- temperature = gr.Slider(
158
- minimum=0.0,
159
- maximum=1.0,
160
- value=0.7,
161
- step=0.1,
162
- label="Temperature",
163
- info="0 = focused, 1 = creative"
164
- )
165
 
166
- generate_btn = gr.Button(
167
- "✨ Generate Response",
168
- variant="primary",
169
- size="lg"
170
- )
171
 
172
- gr.Markdown("### 📝 Example Prompts")
173
- gr.Examples(
174
- examples=[
175
- "Write a Verilog module for a 4-bit ripple carry adder with carry in and carry out.",
176
- "Create a D flip-flop with asynchronous reset in Verilog.",
177
- "Explain the difference between blocking (=) and non-blocking (<=) assignments in Verilog.",
178
- "Write a testbench for a 2-to-1 multiplexer with all test cases.",
179
- "Design a 4-bit binary counter with enable signal and synchronous reset in Verilog.",
180
- "Create a finite state machine for a traffic light controller in Verilog.",
181
- ],
182
- inputs=prompt,
183
- label=None
184
- )
185
 
186
  with gr.Column():
187
- output = gr.Textbox(
188
- label="📤 Generated Output",
189
- lines=25,
190
- placeholder="Your generated Verilog code and explanations will appear here..."
191
- )
192
 
193
- generate_btn.click(
194
- fn=generate,
195
- inputs=[prompt, max_tokens, temperature],
196
- outputs=output
197
- )
198
 
199
- gr.Markdown("""
200
- ---
201
- ### 💡 Usage Tips
202
-
203
- | Setting | Low (0.1-0.3) | Medium (0.5-0.7) | High (0.8-1.0) |
204
- |---------|---------------|------------------|----------------|
205
- | **Temperature** | Focused, deterministic | Balanced | Creative, varied |
206
- | **Best for** | Code generation | Explanations | Design exploration |
207
-
208
- - **Be specific** in your prompts for best results
209
- - **Include details** like bit widths, signal names, and functionality
210
- - **Ask follow-up questions** to refine the output
211
-
212
- ### 📊 Model Information
213
- - **Base Model**: Qwen/Qwen2.5-VL-7B-Instruct (7 billion parameters)
214
- - **Fine-tuned Adapter**: Shrestha2007/Qwen2.5-VL-7B-Verilog-Hackathon
215
- - **Type**: LoRA Fine-tuned (176MB adapter)
216
- - **Specialization**: Verilog Hardware Description Language
217
- - **License**: MIT
218
-
219
- ### 🔗 Links
220
- - [Model on HuggingFace](https://huggingface.co/Shrestha2007/Qwen2.5-VL-7B-Verilog-Hackathon)
221
- - [Base Model](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)
222
- """)
223
 
224
- # Launch the app
225
  if __name__ == "__main__":
226
  demo.launch()
 
1
  """
2
+ Memory-Optimized Gradio App for CPU - Uses 8-bit quantization
3
+ For HuggingFace Spaces free tier
4
  """
5
 
6
  import gradio as gr
7
  import torch
8
+ from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, BitsAndBytesConfig
9
  from peft import PeftModel
10
  import warnings
11
 
 
19
  try:
20
  base_model = "Qwen/Qwen2.5-VL-7B-Instruct"
21
 
22
+ print("Loading base model with 8-bit quantization...")
23
+
24
+ # 8-bit quantization to save memory
25
+ quantization_config = BitsAndBytesConfig(
26
+ load_in_8bit=True,
27
+ llm_int8_threshold=6.0
28
+ )
29
+
30
  model = Qwen2VLForConditionalGeneration.from_pretrained(
31
  base_model,
32
+ quantization_config=quantization_config,
33
  device_map="auto",
34
+ trust_remote_code=True,
35
+ low_cpu_mem_usage=True
36
  )
37
 
38
+ print("Loading adapter...")
39
  model = PeftModel.from_pretrained(
40
  model,
41
  "Shrestha2007/Qwen2.5-VL-7B-Verilog-Hackathon",
 
43
  )
44
 
45
  print("Loading tokenizer...")
46
+ tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
 
 
 
47
 
48
+ return "✅ Model loaded (8-bit mode for memory efficiency)"
49
 
50
  except Exception as e:
51
  import traceback
52
+ return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}"
 
53
 
54
  def generate(prompt, max_tokens, temperature):
55
+ if model is None:
56
+ return "❌ Load model first!"
57
 
58
  if not prompt.strip():
59
+ return "❌ Enter a prompt!"
60
 
61
  try:
 
62
  messages = [
63
+ {"role": "system", "content": "You are a Verilog expert."},
64
  {"role": "user", "content": prompt}
65
  ]
66
 
67
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
68
+ inputs = tokenizer([text], return_tensors="pt").to(model.device)
 
 
 
 
 
 
 
 
 
 
 
69
 
 
70
  with torch.no_grad():
71
  output_ids = model.generate(
72
  **inputs,
73
  max_new_tokens=int(max_tokens),
74
  temperature=float(temperature) if temperature > 0 else 1e-6,
75
  do_sample=True if temperature > 0 else False,
76
+ pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id
 
 
77
  )
78
 
 
79
  generated_ids = output_ids[:, inputs['input_ids'].shape[1]:]
80
+ return tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
 
 
 
 
 
81
 
82
  except Exception as e:
83
+ return f"❌ Error: {str(e)}"
 
 
84
 
85
+ with gr.Blocks(title="Verilog Assistant") as demo:
 
 
86
  gr.Markdown("""
87
+ # 🔧 Qwen2.5-VL Verilog Assistant
88
+ Fine-tuned for Verilog HDL (Running in 8-bit mode)
 
 
 
 
 
 
 
 
89
  """)
90
 
91
  with gr.Row():
92
+ load_btn = gr.Button("🚀 Load Model", variant="primary", scale=1)
93
+ status = gr.Textbox(label="Status", value="⏳ Click Load Model", scale=2, interactive=False)
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ load_btn.click(load_model, outputs=status)
96
 
97
  gr.Markdown("---")
98
 
99
  with gr.Row():
100
  with gr.Column():
101
+ prompt = gr.Textbox(label="Prompt", lines=7, placeholder="Write a Verilog module for...")
 
 
 
 
102
 
103
+ with gr.Accordion("Settings", open=False):
104
+ max_tokens = gr.Slider(128, 512, 256, label="Max Tokens", info="Reduced for CPU")
105
+ temperature = gr.Slider(0.0, 1.0, 0.7, label="Temperature")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
+ generate_btn = gr.Button("✨ Generate", variant="primary")
 
 
 
 
108
 
109
+ gr.Examples([
110
+ "Write a 4-bit adder in Verilog",
111
+ "Create a D flip-flop",
112
+ "Explain wire vs reg"
113
+ ], inputs=prompt)
 
 
 
 
 
 
 
 
114
 
115
  with gr.Column():
116
+ output = gr.Textbox(label="Output", lines=20)
 
 
 
 
117
 
118
+ generate_btn.click(generate, inputs=[prompt, max_tokens, temperature], outputs=output)
 
 
 
 
119
 
120
+ gr.Markdown("⚠️ Running in 8-bit quantized mode on CPU - generation may be slow")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
 
122
  if __name__ == "__main__":
123
  demo.launch()