teamaMohamed115 commited on
Commit
e0a5676
·
verified ·
1 Parent(s): f0b1102

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -35
app.py CHANGED
@@ -3,15 +3,12 @@ import os
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
6
-
7
  MODEL_ID = os.environ.get("HF_MODEL_ID", "teamaMohamed115/smollm-360m-code-lora")
8
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
-
11
  # Safe loader: try with device_map for HF inference if possible
12
  print(f"Loading tokenizer and model from {MODEL_ID} on {DEVICE}")
13
 
14
-
15
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
16
 
17
  # Safe loader
@@ -21,58 +18,79 @@ except Exception:
21
  model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
22
 
23
 
24
-
25
  model.to(DEVICE)
26
  model.eval()
27
 
28
-
29
  # Generation helper
30
  GEN_KWARGS = dict(
31
- max_new_tokens=256,
32
- do_sample=True,
33
- temperature=0.2,
34
- top_p=0.95,
35
- top_k=50,
36
- num_return_sequences=1,
37
  )
38
 
39
-
40
  PROMPT_TEMPLATE = (
41
- "# Instruction:\n{instruction}\n\n# Response (provide a Python module with multiple functions):\n"
42
  )
43
 
44
 
 
 
 
45
 
 
 
 
 
 
 
46
 
47
- def generate_code(instruction: str, max_tokens: int = 256, temperature: float = 0.2, top_p: float = 0.95):
48
- if not instruction.strip():
49
- return "Please provide an instruction or problem statement."
 
 
 
 
 
 
 
 
50
 
 
 
 
51
 
52
- prompt = PROMPT_TEMPLATE.format(instruction=instruction.strip())
53
- inputs = tokenizer(prompt, return_tensors="pt")
54
- input_ids = inputs["input_ids"].to(DEVICE)
55
- attention_mask = inputs.get("attention_mask")
56
- if attention_mask is not None:
57
- attention_mask = attention_mask.to(DEVICE)
58
 
59
 
60
- gen_kwargs = GEN_KWARGS.copy()
61
- gen_kwargs.update({
62
- "max_new_tokens": int(max_tokens),
63
- "temperature": float(temperature),
64
- "top_p": float(top_p),
65
- })
66
 
 
 
 
 
 
 
 
67
 
68
- with torch.no_grad():
69
- outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, **gen_kwargs)
70
 
 
 
 
 
 
71
 
72
- decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
73
 
 
 
 
 
74
 
75
- # Strip the prompt prefix from the decoded text if present
76
- if decoded.startswith(prompt):
77
- decoded = decoded[len(prompt):]
78
- demo.launch()
 
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
 
6
  MODEL_ID = os.environ.get("HF_MODEL_ID", "teamaMohamed115/smollm-360m-code-lora")
7
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
8
 
 
9
  # Safe loader: try with device_map for HF inference if possible
10
  print(f"Loading tokenizer and model from {MODEL_ID} on {DEVICE}")
11
 
 
12
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
13
 
14
  # Safe loader
 
18
  model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
19
 
20
 
 
21
  model.to(DEVICE)
22
  model.eval()
23
 
 
24
  # Generation helper
25
  GEN_KWARGS = dict(
26
+ max_new_tokens=256,
27
+ do_sample=True,
28
+ temperature=0.2,
29
+ top_p=0.95,
30
+ top_k=50,
31
+ num_return_sequences=1,
32
  )
33
 
 
34
  PROMPT_TEMPLATE = (
35
+ "# Instruction:\n{instruction}\n\n# Response (provide a Python module with multiple functions):\n"
36
  )
37
 
38
 
39
+ def generate_code(instruction: str, max_tokens: int = 256, temperature: float = 0.2, top_p: float = 0.95):
40
+ if not instruction.strip():
41
+ return "Please provide an instruction or problem statement."
42
 
43
+ prompt = PROMPT_TEMPLATE.format(instruction=instruction.strip())
44
+ inputs = tokenizer(prompt, return_tensors="pt")
45
+ input_ids = inputs["input_ids"].to(DEVICE)
46
+ attention_mask = inputs.get("attention_mask")
47
+ if attention_mask is not None:
48
+ attention_mask = attention_mask.to(DEVICE)
49
 
50
+ gen_kwargs = GEN_KWARGS.copy()
51
+ gen_kwargs.update({
52
+ "max_new_tokens": int(max_tokens),
53
+ "temperature": float(temperature),
54
+ "top_p": float(top_p),
55
+ })
56
+
57
+ with torch.no_grad():
58
+ outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, **gen_kwargs)
59
+
60
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
61
 
62
+ # Strip the prompt prefix from the decoded text if present
63
+ if decoded.startswith(prompt):
64
+ decoded = decoded[len(prompt):]
65
 
66
+ return decoded.strip()
 
 
 
 
 
67
 
68
 
69
+ with gr.Blocks(title="SmolLM Python Code Assistant") as demo:
70
+ gr.Markdown("# SmolLM — Python Code Generation\nEnter an instruction and get a multi-function Python module.")
 
 
 
 
71
 
72
+ with gr.Row():
73
+ instr = gr.Textbox(lines=6, placeholder="Describe the Python module you want...", label="Instruction")
74
+ with gr.Column(scale=1):
75
+ max_t = gr.Slider(minimum=32, maximum=1024, value=256, step=32, label="Max new tokens")
76
+ temp = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Temperature")
77
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.01, label="Top-p")
78
+ run_btn = gr.Button("Generate")
79
 
80
+ output = gr.Code(label="Generated Python module", language="python")
 
81
 
82
+ def run(instruction, max_tokens, temperature, top_p):
83
+ try:
84
+ return generate_code(instruction, max_tokens, temperature, top_p)
85
+ except Exception as e:
86
+ return f"Error during generation: {e}"
87
 
88
+ run_btn.click(run, inputs=[instr, max_t, temp, top_p], outputs=[output])
89
 
90
+ gr.Examples(examples=[
91
+ "Implement a Python module that includes: a function to compute Fibonacci sequence, a function to check primality, and a function to compute factorial, all with type hints and docstrings.",
92
+ "Create a Python module for basic matrix operations (add, multiply, transpose) with appropriate error handling and tests.",
93
+ ], inputs=instr)
94
 
95
+ if __name__ == "__main__":
96
+ demo.launch()