walidsobhie-code commited on
Commit
107429a
1 Parent(s): b58b524

Fix: Pin Gradio 4.12.0, simplify app, CPU mode

Browse files
Files changed (2) hide show
  1. app.py +59 -65
  2. requirements.txt +5 -0
app.py CHANGED
@@ -1,94 +1,88 @@
1
  """
2
  Stack 2.9 - HuggingFace Space
3
- Fine-tuned code assistant powered by Qwen2.5-Coder-1.5B
 
4
  """
5
  import gradio as gr
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
  import torch
8
 
9
- # Load FINE-TUNED model
10
- MODEL_NAME = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
11
 
12
- print(f"Loading {MODEL_NAME}...")
 
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
14
  model = AutoModelForCausalLM.from_pretrained(
15
  MODEL_NAME,
16
- torch_dtype=torch.float16,
17
- device_map="auto",
18
- trust_remote_code=True
 
19
  )
20
- print("Fine-tuned model loaded!")
21
 
22
- def generate(prompt, system_prompt="You are a helpful coding assistant.", max_tokens=512, temperature=0.7):
23
- """Generate response from the fine-tuned model"""
24
  messages = [
25
- {"role": "system", "content": system_prompt},
26
  {"role": "user", "content": prompt}
27
  ]
28
-
 
 
29
  text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
30
- inputs = tokenizer([text], return_tensors="pt").to(model.device)
31
-
32
- outputs = model.generate(
33
- **inputs,
34
- max_new_tokens=max_tokens,
35
- temperature=temperature,
36
- do_sample=True,
37
- pad_token_id=tokenizer.pad_token_id
38
- )
39
-
 
40
  response = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
41
  return response.strip()
42
 
43
- with gr.Blocks(title="Stack 2.9 - Fine-tuned Code Assistant") as demo:
44
- gr.Markdown("""
45
- # 馃捇 Stack 2.9 - Fine-tuned Code Assistant
46
- **Fine-tuned on Stack Overflow data** 路 1.5B parameters 路 Qwen2.5-Coder base
47
 
48
- *This demo runs the actual fine-tuned model, not the base.*
49
- """)
50
-
51
  with gr.Row():
52
- with gr.Column(scale=1):
53
- system_prompt = gr.Textbox(
54
- label="System Prompt",
55
- value="You are Stack 2.9, a helpful coding assistant specialized in programming.",
56
- lines=3
57
- )
58
  prompt = gr.Textbox(
59
- label="Your Message",
60
- placeholder="Write a Python function to calculate fibonacci numbers...",
61
  lines=6
62
  )
 
 
 
 
 
63
  with gr.Row():
64
- max_tokens = gr.Slider(32, 1024, value=512, step=32, label="Max Tokens")
65
- temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
66
- submit = gr.Button("Generate 馃捇", variant="primary")
67
-
68
- with gr.Column(scale=2):
69
- output = gr.Textbox(label="Response", lines=15)
70
-
71
- examples = [
72
- ["Write a Python function to calculate fibonacci numbers"],
73
- ["Explain what this code does: def foo(x): return x * 2"],
74
- ["Debug this code: for i in range(10): print(i)"],
75
- ["Write a SQL query to find duplicate emails"],
76
- ["Write a function to reverse a string in Python"],
77
- ["How do I handle exceptions in Python?"],
78
- ]
79
-
80
- gr.Examples(examples=examples, inputs=[prompt])
81
-
82
- submit.click(
83
- fn=generate,
84
- inputs=[prompt, system_prompt, max_tokens, temperature],
85
- outputs=output
86
- )
87
- prompt.submit(
88
- fn=generate,
89
- inputs=[prompt, system_prompt, max_tokens, temperature],
90
- outputs=output
91
  )
 
 
 
 
92
 
93
  if __name__ == "__main__":
94
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  """
2
  Stack 2.9 - HuggingFace Space
3
+ Minimal Gradio UI for code generation
4
+ Pinned to Gradio 4.12.0 for compatibility
5
  """
6
  import gradio as gr
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
  import torch
9
 
10
+ print("Loading model...")
 
11
 
12
+ # Load model on CPU to fit free tier
13
+ MODEL_NAME = "Qwen/Qwen2.5-Coder-1.5B-Instruct"
14
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
15
  model = AutoModelForCausalLM.from_pretrained(
16
  MODEL_NAME,
17
+ torch_dtype=torch.float32, # CPU-friendly
18
+ device_map="cpu",
19
+ trust_remote_code=True,
20
+ low_cpu_mem_usage=True
21
  )
22
+ print("Model loaded!")
23
 
24
+ def generate_code(prompt, system_prompt="You are a helpful coding assistant.", max_tokens=256, temperature=0.7):
25
+ """Generate code from prompt"""
26
  messages = [
 
27
  {"role": "user", "content": prompt}
28
  ]
29
+ if system_prompt:
30
+ messages.insert(0, {"role": "system", "content": system_prompt})
31
+
32
  text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
33
+ inputs = tokenizer([text], return_tensors="pt")
34
+
35
+ with torch.no_grad():
36
+ outputs = model.generate(
37
+ **inputs,
38
+ max_new_tokens=max_tokens,
39
+ temperature=temperature,
40
+ do_sample=True,
41
+ pad_token_id=tokenizer.pad_token_id or tokenizer.eos_token_id
42
+ )
43
+
44
  response = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
45
  return response.strip()
46
 
47
+ # Simple UI
48
+ with gr.Blocks(title="Stack 2.9", theme=gr.themes.Soft()) as demo:
49
+ gr.Markdown("# 馃捇 Stack 2.9 Code Assistant")
50
+ gr.Markdown(f"Running: `{MODEL_NAME}` 路 1.5B parameters 路 CPU")
51
 
 
 
 
52
  with gr.Row():
53
+ with gr.Column():
 
 
 
 
 
54
  prompt = gr.Textbox(
55
+ label="Your prompt",
56
+ placeholder="Write a Python function to calculate fibonacci...",
57
  lines=6
58
  )
59
+ system = gr.Textbox(
60
+ label="System prompt",
61
+ value="You are a helpful coding assistant.",
62
+ lines=2
63
+ )
64
  with gr.Row():
65
+ max_tokens = gr.Slider(64, 512, value=256, label="Max tokens")
66
+ temp = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
67
+ btn = gr.Button("Generate", variant="primary")
68
+
69
+ with gr.Column():
70
+ output = gr.Code(label="Generated code", language="python", lines=20)
71
+ clear = gr.Button("Clear")
72
+
73
+ gr.Examples(
74
+ examples=[
75
+ ["Write a Python function to calculate fibonacci numbers"],
76
+ ["Explain what this code does: def foo(x): return x * 2"],
77
+ ["Write a SQL query to find duplicate emails"],
78
+ ["How do I handle exceptions in Python?"],
79
+ ],
80
+ inputs=prompt
 
 
 
 
 
 
 
 
 
 
 
81
  )
82
+
83
+ btn.click(generate_code, [prompt, system, max_tokens, temp], output)
84
+ prompt.submit(generate_code, [prompt, system, max_tokens, temp], output)
85
+ clear.click(lambda: "", None, prompt)
86
 
87
  if __name__ == "__main__":
88
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio==4.12.0
2
+ transformers==4.40.0
3
+ torch==2.1.2
4
+ accelerate==0.25.0
5
+ sentencepiece==0.2.0