Walid commited on
Commit
267c212
·
1 Parent(s): e26fedd

Use fine-tuned Stack-2-9 model instead of base Qwen2.5-Coder

Browse files

- Change MODEL_NAME from Qwen/Qwen2.5-Coder-1.5B to my-ai-stack/Stack-2-9-finetuned
- Update README to reflect fine-tuned model
- Improve UI messaging to indicate fine-tuned model is running

Files changed (2) hide show
  1. README.md +17 -16
  2. app.py +16 -17
README.md CHANGED
@@ -10,16 +10,26 @@ tags:
10
  - python
11
  - qwen
12
  - coding-assistant
 
13
  ---
14
 
15
- # Stack 2.9 - Code Assistant
16
 
17
- A coding assistant powered by Qwen2.5-Coder-1.5B, fine-tuned on Stack Overflow data.
 
 
 
 
 
 
 
 
 
18
 
19
  ## Features
20
 
21
- - **Code Generation** - Write Python, SQL, JavaScript, and more
22
- - **Code Debugging** - Find and fix bugs in your code
23
  - **Programming Help** - Get explanations and refactoring suggestions
24
  - **Chat Interface** - Easy-to-use Gradio UI
25
 
@@ -29,17 +39,8 @@ A coding assistant powered by Qwen2.5-Coder-1.5B, fine-tuned on Stack Overflow d
29
  2. Adjust settings (max tokens, temperature)
30
  3. Click "Generate" to get your response
31
 
32
- ## Model
33
-
34
- - **Base Model:** Qwen/Qwen2.5-Coder-1.5B
35
- - **Context Length:** 32K tokens
36
- - **Fine-tuned on:** Stack Overflow Q&A data
37
-
38
- ## Note
39
-
40
- This demo uses the base Qwen2.5-Coder-1.5B model. The full fine-tuned model (5.75GB) is available at:
41
- https://huggingface.co/my-ai-stack/Stack-2-9-finetuned
42
 
43
- ## License
44
 
45
- Apache 2.0
 
10
  - python
11
  - qwen
12
  - coding-assistant
13
+ - fine-tuned
14
  ---
15
 
16
+ # 💻 Stack 2.9 - Fine-tuned Code Assistant
17
 
18
+ A **fine-tuned** coding assistant powered by Qwen2.5-Coder-1.5B, trained on Stack Overflow Q&A data.
19
+
20
+ ## Model
21
+
22
+ - **Base Model:** Qwen/Qwen2.5-Coder-1.5B
23
+ - **Fine-tuned on:** Stack Overflow Q&A (Python-heavy)
24
+ - **Context Length:** 32K tokens
25
+ - **Parameters:** 1.5B
26
+ - **License:** Apache 2.0
27
+ - **Hub:** [my-ai-stack/Stack-2-9-finetuned](https://huggingface.co/my-ai-stack/Stack-2-9-finetuned)
28
 
29
  ## Features
30
 
31
+ - **Code Generation** - Write Python, SQL, JavaScript, TypeScript, and more
32
+ - **Code Debugging** - Find and fix bugs in your code
33
  - **Programming Help** - Get explanations and refactoring suggestions
34
  - **Chat Interface** - Easy-to-use Gradio UI
35
 
 
39
  2. Adjust settings (max tokens, temperature)
40
  3. Click "Generate" to get your response
41
 
42
+ This demo runs the **actual fine-tuned model**, not the base Qwen2.5-Coder.
 
 
 
 
 
 
 
 
 
43
 
44
+ ## Hardware
45
 
46
+ The 1.5B model fits on free T4 GPU on HuggingFace Spaces (~4GB VRAM FP16).
app.py CHANGED
@@ -1,13 +1,13 @@
1
  """
2
  Stack 2.9 - HuggingFace Space
3
- Code Assistant using Qwen2.5-Coder
4
  """
5
  import gradio as gr
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
  import torch
8
 
9
- # Load base model - uses 1.5B model which fits in free tier
10
- MODEL_NAME = "Qwen/Qwen2.5-Coder-1.5B"
11
 
12
  print(f"Loading {MODEL_NAME}...")
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
@@ -17,10 +17,10 @@ model = AutoModelForCausalLM.from_pretrained(
17
  device_map="auto",
18
  trust_remote_code=True
19
  )
20
- print("Model loaded!")
21
 
22
  def generate(prompt, system_prompt="You are a helpful coding assistant.", max_tokens=512, temperature=0.7):
23
- """Generate response from the model"""
24
  messages = [
25
  {"role": "system", "content": system_prompt},
26
  {"role": "user", "content": prompt}
@@ -37,24 +37,22 @@ def generate(prompt, system_prompt="You are a helpful coding assistant.", max_to
37
  pad_token_id=tokenizer.pad_token_id
38
  )
39
 
40
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
41
- # Remove the input prompt from response
42
- return response[len(text):].strip()
43
 
44
- # Build Gradio UI
45
- with gr.Blocks(title="Stack 2.9 - Code Assistant") as demo:
46
  gr.Markdown("""
47
- # Stack 2.9 - Code Assistant
48
- **Powered by Qwen2.5-Coder-1.5B** fine-tuned on Stack Overflow data
49
-
50
- Write code, debug, or ask programming questions!
51
  """)
52
 
53
  with gr.Row():
54
  with gr.Column(scale=1):
55
  system_prompt = gr.Textbox(
56
  label="System Prompt",
57
- value="You are a helpful coding assistant specialized in programming.",
58
  lines=3
59
  )
60
  prompt = gr.Textbox(
@@ -65,7 +63,7 @@ with gr.Blocks(title="Stack 2.9 - Code Assistant") as demo:
65
  with gr.Row():
66
  max_tokens = gr.Slider(32, 1024, value=512, step=32, label="Max Tokens")
67
  temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
68
- submit = gr.Button("Generate", variant="primary")
69
 
70
  with gr.Column(scale=2):
71
  output = gr.Textbox(label="Response", lines=15)
@@ -75,6 +73,8 @@ with gr.Blocks(title="Stack 2.9 - Code Assistant") as demo:
75
  ["Explain what this code does: def foo(x): return x * 2"],
76
  ["Debug this code: for i in range(10): print(i)"],
77
  ["Write a SQL query to find duplicate emails"],
 
 
78
  ]
79
 
80
  gr.Examples(examples=examples, inputs=[prompt])
@@ -84,7 +84,6 @@ with gr.Blocks(title="Stack 2.9 - Code Assistant") as demo:
84
  inputs=[prompt, system_prompt, max_tokens, temperature],
85
  outputs=output
86
  )
87
-
88
  prompt.submit(
89
  fn=generate,
90
  inputs=[prompt, system_prompt, max_tokens, temperature],
 
1
  """
2
  Stack 2.9 - HuggingFace Space
3
+ Fine-tuned code assistant powered by Qwen2.5-Coder-1.5B
4
  """
5
  import gradio as gr
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
  import torch
8
 
9
+ # Load FINE-TUNED model
10
+ MODEL_NAME = "my-ai-stack/Stack-2-9-finetuned"
11
 
12
  print(f"Loading {MODEL_NAME}...")
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
 
17
  device_map="auto",
18
  trust_remote_code=True
19
  )
20
+ print("Fine-tuned model loaded!")
21
 
22
  def generate(prompt, system_prompt="You are a helpful coding assistant.", max_tokens=512, temperature=0.7):
23
+ """Generate response from the fine-tuned model"""
24
  messages = [
25
  {"role": "system", "content": system_prompt},
26
  {"role": "user", "content": prompt}
 
37
  pad_token_id=tokenizer.pad_token_id
38
  )
39
 
40
+ response = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
41
+ return response.strip()
 
42
 
43
+ with gr.Blocks(title="Stack 2.9 - Fine-tuned Code Assistant") as demo:
 
44
  gr.Markdown("""
45
+ # 💻 Stack 2.9 - Fine-tuned Code Assistant
46
+ **Fine-tuned on Stack Overflow data** · 1.5B parameters · Qwen2.5-Coder base
47
+
48
+ *This demo runs the actual fine-tuned model, not the base.*
49
  """)
50
 
51
  with gr.Row():
52
  with gr.Column(scale=1):
53
  system_prompt = gr.Textbox(
54
  label="System Prompt",
55
+ value="You are Stack 2.9, a helpful coding assistant specialized in programming.",
56
  lines=3
57
  )
58
  prompt = gr.Textbox(
 
63
  with gr.Row():
64
  max_tokens = gr.Slider(32, 1024, value=512, step=32, label="Max Tokens")
65
  temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
66
+ submit = gr.Button("Generate 💻", variant="primary")
67
 
68
  with gr.Column(scale=2):
69
  output = gr.Textbox(label="Response", lines=15)
 
73
  ["Explain what this code does: def foo(x): return x * 2"],
74
  ["Debug this code: for i in range(10): print(i)"],
75
  ["Write a SQL query to find duplicate emails"],
76
+ ["Write a function to reverse a string in Python"],
77
+ ["How do I handle exceptions in Python?"],
78
  ]
79
 
80
  gr.Examples(examples=examples, inputs=[prompt])
 
84
  inputs=[prompt, system_prompt, max_tokens, temperature],
85
  outputs=output
86
  )
 
87
  prompt.submit(
88
  fn=generate,
89
  inputs=[prompt, system_prompt, max_tokens, temperature],