Spaces:

my-ai-stack
/

stack-2-9-demo

Sleeping

Walid commited on Apr 8

Commit

267c212

1 Parent(s): e26fedd

Use fine-tuned Stack-2-9 model instead of base Qwen2.5-Coder

- Change MODEL_NAME from Qwen/Qwen2.5-Coder-1.5B to my-ai-stack/Stack-2-9-finetuned
- Update README to reflect fine-tuned model
- Improve UI messaging to indicate fine-tuned model is running

Files changed (2) hide show

README.md +17 -16
app.py +16 -17

README.md CHANGED Viewed

@@ -10,16 +10,26 @@ tags:
 - python
 - qwen
 - coding-assistant
 ---
-# Stack 2.9 - Code Assistant
-A coding assistant powered by Qwen2.5-Coder-1.5B, fine-tuned on Stack Overflow data.
 ## Features
-- **Code Generation** - Write Python, SQL, JavaScript, and more
-- **Code Debugging** - Find and fix bugs in your code
 - **Programming Help** - Get explanations and refactoring suggestions
 - **Chat Interface** - Easy-to-use Gradio UI
@@ -29,17 +39,8 @@ A coding assistant powered by Qwen2.5-Coder-1.5B, fine-tuned on Stack Overflow d
 2. Adjust settings (max tokens, temperature)
 3. Click "Generate" to get your response
-## Model
-- **Base Model:** Qwen/Qwen2.5-Coder-1.5B
-- **Context Length:** 32K tokens
-- **Fine-tuned on:** Stack Overflow Q&A data
-## Note
-This demo uses the base Qwen2.5-Coder-1.5B model. The full fine-tuned model (5.75GB) is available at:
-https://huggingface.co/my-ai-stack/Stack-2-9-finetuned
-## License
-Apache 2.0

 - python
 - qwen
 - coding-assistant
+- fine-tuned
 ---
+# 💻 Stack 2.9 - Fine-tuned Code Assistant
+A **fine-tuned** coding assistant powered by Qwen2.5-Coder-1.5B, trained on Stack Overflow Q&A data.
+## Model
+- **Base Model:** Qwen/Qwen2.5-Coder-1.5B
+- **Fine-tuned on:** Stack Overflow Q&A (Python-heavy)
+- **Context Length:** 32K tokens
+- **Parameters:** 1.5B
+- **License:** Apache 2.0
+- **Hub:** [my-ai-stack/Stack-2-9-finetuned](https://huggingface.co/my-ai-stack/Stack-2-9-finetuned)
 ## Features
+- **Code Generation** - Write Python, SQL, JavaScript, TypeScript, and more
+- **Code Debugging** - Find and fix bugs in your code
 - **Programming Help** - Get explanations and refactoring suggestions
 - **Chat Interface** - Easy-to-use Gradio UI
 2. Adjust settings (max tokens, temperature)
 3. Click "Generate" to get your response
+This demo runs the **actual fine-tuned model**, not the base Qwen2.5-Coder.
+## Hardware
+The 1.5B model fits on free T4 GPU on HuggingFace Spaces (~4GB VRAM FP16).

app.py CHANGED Viewed

@@ -1,13 +1,13 @@
 """
 Stack 2.9 - HuggingFace Space
-Code Assistant using Qwen2.5-Coder
 """
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-# Load base model - uses 1.5B model which fits in free tier
-MODEL_NAME = "Qwen/Qwen2.5-Coder-1.5B"
 print(f"Loading {MODEL_NAME}...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
@@ -17,10 +17,10 @@ model = AutoModelForCausalLM.from_pretrained(
     device_map="auto",
     trust_remote_code=True
 )
-print("Model loaded!")
 def generate(prompt, system_prompt="You are a helpful coding assistant.", max_tokens=512, temperature=0.7):
-    """Generate response from the model"""
     messages = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": prompt}
@@ -37,24 +37,22 @@ def generate(prompt, system_prompt="You are a helpful coding assistant.", max_to
         pad_token_id=tokenizer.pad_token_id
     )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Remove the input prompt from response
-    return response[len(text):].strip()
-# Build Gradio UI
-with gr.Blocks(title="Stack 2.9 - Code Assistant") as demo:
     gr.Markdown("""
-    # Stack 2.9 - Code Assistant
-    **Powered by Qwen2.5-Coder-1.5B** fine-tuned on Stack Overflow data
-    Write code, debug, or ask programming questions!
     """)
     with gr.Row():
         with gr.Column(scale=1):
             system_prompt = gr.Textbox(
                 label="System Prompt",
-                value="You are a helpful coding assistant specialized in programming.",
                 lines=3
             )
             prompt = gr.Textbox(
@@ -65,7 +63,7 @@ with gr.Blocks(title="Stack 2.9 - Code Assistant") as demo:
             with gr.Row():
                 max_tokens = gr.Slider(32, 1024, value=512, step=32, label="Max Tokens")
                 temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
-            submit = gr.Button("Generate", variant="primary")
         with gr.Column(scale=2):
             output = gr.Textbox(label="Response", lines=15)
@@ -75,6 +73,8 @@ with gr.Blocks(title="Stack 2.9 - Code Assistant") as demo:
         ["Explain what this code does: def foo(x): return x * 2"],
         ["Debug this code: for i in range(10): print(i)"],
         ["Write a SQL query to find duplicate emails"],
     ]
     gr.Examples(examples=examples, inputs=[prompt])
@@ -84,7 +84,6 @@ with gr.Blocks(title="Stack 2.9 - Code Assistant") as demo:
         inputs=[prompt, system_prompt, max_tokens, temperature],
         outputs=output
     )
     prompt.submit(
         fn=generate,
         inputs=[prompt, system_prompt, max_tokens, temperature],

 """
 Stack 2.9 - HuggingFace Space
+Fine-tuned code assistant powered by Qwen2.5-Coder-1.5B
 """
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+# Load FINE-TUNED model
+MODEL_NAME = "my-ai-stack/Stack-2-9-finetuned"
 print(f"Loading {MODEL_NAME}...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
     device_map="auto",
     trust_remote_code=True
 )
+print("Fine-tuned model loaded!")
 def generate(prompt, system_prompt="You are a helpful coding assistant.", max_tokens=512, temperature=0.7):
+    """Generate response from the fine-tuned model"""
     messages = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": prompt}
         pad_token_id=tokenizer.pad_token_id
     )
+    response = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
+    return response.strip()
+with gr.Blocks(title="Stack 2.9 - Fine-tuned Code Assistant") as demo:
     gr.Markdown("""
+    # 💻 Stack 2.9 - Fine-tuned Code Assistant
+    **Fine-tuned on Stack Overflow data** · 1.5B parameters · Qwen2.5-Coder base
+    *This demo runs the actual fine-tuned model, not the base.*
     """)
     with gr.Row():
         with gr.Column(scale=1):
             system_prompt = gr.Textbox(
                 label="System Prompt",
+                value="You are Stack 2.9, a helpful coding assistant specialized in programming.",
                 lines=3
             )
             prompt = gr.Textbox(
             with gr.Row():
                 max_tokens = gr.Slider(32, 1024, value=512, step=32, label="Max Tokens")
                 temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
+            submit = gr.Button("Generate 💻", variant="primary")
         with gr.Column(scale=2):
             output = gr.Textbox(label="Response", lines=15)
         ["Explain what this code does: def foo(x): return x * 2"],
         ["Debug this code: for i in range(10): print(i)"],
         ["Write a SQL query to find duplicate emails"],
+        ["Write a function to reverse a string in Python"],
+        ["How do I handle exceptions in Python?"],
     ]
     gr.Examples(examples=examples, inputs=[prompt])
         inputs=[prompt, system_prompt, max_tokens, temperature],
         outputs=output
     )
     prompt.submit(
         fn=generate,
         inputs=[prompt, system_prompt, max_tokens, temperature],