Spaces:

RedNinja6440
/

Verilog_AI_assistance

Sleeping

App Files Files Community

RedNinja6440 commited on Feb 15

Commit

87297ac

verified ·

1 Parent(s): b3b5614

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -149

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 """
-Gradio App for Hugging Face Spaces - Qwen2.5-VL Verilog Assistant
-Clean version without unnecessary imports
 """
 import gradio as gr
 import torch
-from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer
 from peft import PeftModel
 import warnings
@@ -19,15 +19,23 @@ def load_model():
     try:
         base_model = "Qwen/Qwen2.5-VL-7B-Instruct"
-        print("Loading base model...")
         model = Qwen2VLForConditionalGeneration.from_pretrained(
             base_model,
-            torch_dtype=torch.float16,
             device_map="auto",
-            trust_remote_code=True
         )
-        print("Loading your adapter...")
         model = PeftModel.from_pretrained(
             model,
             "Shrestha2007/Qwen2.5-VL-7B-Verilog-Hackathon",
@@ -35,192 +43,81 @@ def load_model():
         )
         print("Loading tokenizer...")
-        tokenizer = AutoTokenizer.from_pretrained(
-            base_model,
-            trust_remote_code=True
-        )
-        return "✅ Model loaded successfully! Ready to generate Verilog code."
     except Exception as e:
         import traceback
-        error_details = traceback.format_exc()
-        return f"❌ Error loading model:\n{str(e)}\n\nFull traceback:\n{error_details}"
 def generate(prompt, max_tokens, temperature):
-    if model is None or tokenizer is None:
-        return "❌ Please load the model first by clicking 'Load Model' button!"
     if not prompt.strip():
-        return "❌ Please enter a prompt!"
     try:
-        # Create chat messages
         messages = [
-            {"role": "system", "content": "You are a helpful AI assistant specialized in Verilog hardware description language."},
             {"role": "user", "content": prompt}
         ]
-        # Apply chat template
-        text = tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True
-        )
-        # Tokenize
-        inputs = tokenizer(
-            [text],
-            return_tensors="pt",
-            padding=True
-        ).to(model.device)
-        # Generate
         with torch.no_grad():
             output_ids = model.generate(
                 **inputs,
                 max_new_tokens=int(max_tokens),
                 temperature=float(temperature) if temperature > 0 else 1e-6,
                 do_sample=True if temperature > 0 else False,
-                top_p=0.9,
-                pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id,
-                eos_token_id=tokenizer.eos_token_id,
             )
-        # Decode only the generated part
         generated_ids = output_ids[:, inputs['input_ids'].shape[1]:]
-        response = tokenizer.batch_decode(
-            generated_ids,
-            skip_special_tokens=True,
-            clean_up_tokenization_spaces=True
-        )[0]
-        return response
     except Exception as e:
-        import traceback
-        error_details = traceback.format_exc()
-        return f"❌ Generation error:\n{str(e)}\n\nTraceback:\n{error_details}"
-# Create Gradio Interface
-with gr.Blocks(title="Verilog AI Assistant") as demo:
     gr.Markdown("""
-    # 🔧 Qwen2.5-VL-7B Verilog Assistant
-    Fine-tuned model specialized for **Verilog Hardware Description Language**
-    **What I can do:**
-    - ⚡ Generate Verilog modules for digital circuits
-    - 📚 Explain Verilog concepts and syntax
-    - 🧪 Create testbenches and test cases
-    - 💡 Answer hardware design questions
-    - 🔍 Debug and improve Verilog code
     """)
     with gr.Row():
-        with gr.Column(scale=2):
-            load_btn = gr.Button(
-                "🚀 Load Model",
-                variant="primary",
-                size="lg"
-            )
-        with gr.Column(scale=3):
-            status = gr.Textbox(
-                label="Model Status",
-                value="⏳ Click 'Load Model' to initialize the AI assistant",
-                interactive=False,
-                lines=2
-            )
-    load_btn.click(fn=load_model, outputs=status)
     gr.Markdown("---")
     with gr.Row():
         with gr.Column():
-            prompt = gr.Textbox(
-                label="💬 Your Prompt",
-                placeholder="Example: Write a Verilog module for a 4-bit adder with carry...",
-                lines=7
-            )
-            with gr.Accordion("⚙️ Generation Settings", open=False):
-                max_tokens = gr.Slider(
-                    minimum=128,
-                    maximum=1024,
-                    value=512,
-                    step=64,
-                    label="Max Output Tokens",
-                    info="Higher = longer responses"
-                )
-                temperature = gr.Slider(
-                    minimum=0.0,
-                    maximum=1.0,
-                    value=0.7,
-                    step=0.1,
-                    label="Temperature",
-                    info="0 = focused, 1 = creative"
-                )
-            generate_btn = gr.Button(
-                "✨ Generate Response",
-                variant="primary",
-                size="lg"
-            )
-            gr.Markdown("### 📝 Example Prompts")
-            gr.Examples(
-                examples=[
-                    "Write a Verilog module for a 4-bit ripple carry adder with carry in and carry out.",
-                    "Create a D flip-flop with asynchronous reset in Verilog.",
-                    "Explain the difference between blocking (=) and non-blocking (<=) assignments in Verilog.",
-                    "Write a testbench for a 2-to-1 multiplexer with all test cases.",
-                    "Design a 4-bit binary counter with enable signal and synchronous reset in Verilog.",
-                    "Create a finite state machine for a traffic light controller in Verilog.",
-                ],
-                inputs=prompt,
-                label=None
-            )
         with gr.Column():
-            output = gr.Textbox(
-                label="📤 Generated Output",
-                lines=25,
-                placeholder="Your generated Verilog code and explanations will appear here..."
-            )
-    generate_btn.click(
-        fn=generate,
-        inputs=[prompt, max_tokens, temperature],
-        outputs=output
-    )
-    gr.Markdown("""
-    ---
-    ### 💡 Usage Tips
-    | Setting | Low (0.1-0.3) | Medium (0.5-0.7) | High (0.8-1.0) |
-    |---------|---------------|------------------|----------------|
-    | **Temperature** | Focused, deterministic | Balanced | Creative, varied |
-    | **Best for** | Code generation | Explanations | Design exploration |
-    - **Be specific** in your prompts for best results
-    - **Include details** like bit widths, signal names, and functionality
-    - **Ask follow-up questions** to refine the output
-    ### 📊 Model Information
-    - **Base Model**: Qwen/Qwen2.5-VL-7B-Instruct (7 billion parameters)
-    - **Fine-tuned Adapter**: Shrestha2007/Qwen2.5-VL-7B-Verilog-Hackathon
-    - **Type**: LoRA Fine-tuned (176MB adapter)
-    - **Specialization**: Verilog Hardware Description Language
-    - **License**: MIT
-    ### 🔗 Links
-    - [Model on HuggingFace](https://huggingface.co/Shrestha2007/Qwen2.5-VL-7B-Verilog-Hackathon)
-    - [Base Model](https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct)
-    """)
-# Launch the app
 if __name__ == "__main__":
     demo.launch()

 """
+Memory-Optimized Gradio App for CPU - Uses 8-bit quantization
+For HuggingFace Spaces free tier
 """
 import gradio as gr
 import torch
+from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, BitsAndBytesConfig
 from peft import PeftModel
 import warnings
     try:
         base_model = "Qwen/Qwen2.5-VL-7B-Instruct"
+        print("Loading base model with 8-bit quantization...")
+        # 8-bit quantization to save memory
+        quantization_config = BitsAndBytesConfig(
+            load_in_8bit=True,
+            llm_int8_threshold=6.0
+        )
         model = Qwen2VLForConditionalGeneration.from_pretrained(
             base_model,
+            quantization_config=quantization_config,
             device_map="auto",
+            trust_remote_code=True,
+            low_cpu_mem_usage=True
         )
+        print("Loading adapter...")
         model = PeftModel.from_pretrained(
             model,
             "Shrestha2007/Qwen2.5-VL-7B-Verilog-Hackathon",
         )
         print("Loading tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
+        return "✅ Model loaded (8-bit mode for memory efficiency)"
     except Exception as e:
         import traceback
+        return f"❌ Error: {str(e)}\n\n{traceback.format_exc()}"
 def generate(prompt, max_tokens, temperature):
+    if model is None:
+        return "❌ Load model first!"
     if not prompt.strip():
+        return "❌ Enter a prompt!"
     try:
         messages = [
+            {"role": "system", "content": "You are a Verilog expert."},
             {"role": "user", "content": prompt}
         ]
+        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        inputs = tokenizer([text], return_tensors="pt").to(model.device)
         with torch.no_grad():
             output_ids = model.generate(
                 **inputs,
                 max_new_tokens=int(max_tokens),
                 temperature=float(temperature) if temperature > 0 else 1e-6,
                 do_sample=True if temperature > 0 else False,
+                pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id
             )
         generated_ids = output_ids[:, inputs['input_ids'].shape[1]:]
+        return tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     except Exception as e:
+        return f"❌ Error: {str(e)}"
+with gr.Blocks(title="Verilog Assistant") as demo:
     gr.Markdown("""
+    # 🔧 Qwen2.5-VL Verilog Assistant
+    Fine-tuned for Verilog HDL (Running in 8-bit mode)
     """)
     with gr.Row():
+        load_btn = gr.Button("🚀 Load Model", variant="primary", scale=1)
+        status = gr.Textbox(label="Status", value="⏳ Click Load Model", scale=2, interactive=False)
+    load_btn.click(load_model, outputs=status)
     gr.Markdown("---")
     with gr.Row():
         with gr.Column():
+            prompt = gr.Textbox(label="Prompt", lines=7, placeholder="Write a Verilog module for...")
+            with gr.Accordion("Settings", open=False):
+                max_tokens = gr.Slider(128, 512, 256, label="Max Tokens", info="Reduced for CPU")
+                temperature = gr.Slider(0.0, 1.0, 0.7, label="Temperature")
+            generate_btn = gr.Button("✨ Generate", variant="primary")
+            gr.Examples([
+                "Write a 4-bit adder in Verilog",
+                "Create a D flip-flop",
+                "Explain wire vs reg"
+            ], inputs=prompt)
         with gr.Column():
+            output = gr.Textbox(label="Output", lines=20)
+    generate_btn.click(generate, inputs=[prompt, max_tokens, temperature], outputs=output)
+    gr.Markdown("⚠️ Running in 8-bit quantized mode on CPU - generation may be slow")
 if __name__ == "__main__":
     demo.launch()