ranggafermata commited on
Commit
5c24f7c
Β·
verified Β·
1 Parent(s): fe03a00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -17
app.py CHANGED
@@ -10,17 +10,9 @@ login(token=os.getenv("HF_TOKEN"))
10
 
11
  # Load the model and tokenizer
12
  model_name = "ranggafermata/Fermata-v1.2-lightcoder"
13
- device = "cuda" if torch.cuda.is_available() else "cpu"
14
- dtype = torch.float16 if device == "cuda" else torch.float32
15
-
16
- # Load tokenizer and model with eager attention
17
- tokenizer = AutoTokenizer.from_pretrained(model_name)
18
- model = AutoModelForCausalLM.from_pretrained(
19
- model_id,
20
- torch_dtype=dtype,
21
- attn_implementation="eager",
22
- trust_remote_code=True
23
- ).to(device)
24
 
25
  def generate_code(prompt, max_tokens, temperature, top_p):
26
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
@@ -35,18 +27,17 @@ def generate_code(prompt, max_tokens, temperature, top_p):
35
  )
36
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
37
 
38
- # Gradio Interface
39
  iface = gr.Interface(
40
  fn=generate_code,
41
  inputs=[
42
- gr.Textbox(lines=5, label="Prompt", placeholder="Write your prompt here..."),
43
- gr.Slider(minimum=10, maximum=512, value=128, step=8, label="Max Tokens"),
44
- gr.Slider(minimum=0.1, maximum=1.5, value=0.8, step=0.1, label="Temperature"),
45
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
46
  ],
47
  outputs=gr.Textbox(lines=20, label="Generated Code"),
48
  title="Fermata v1.2 LightCoder",
49
- description="Code generator powered by TinyLlama fine-tuned on math/code tasks."
50
  )
51
 
52
  iface.launch(mcp_server=True)
 
10
 
11
  # Load the model and tokenizer
12
  model_name = "ranggafermata/Fermata-v1.2-lightcoder"
13
+ tokenizer = LlamaTokenizerFast.from_pretrained(model_name)
14
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
15
+ model.eval()
 
 
 
 
 
 
 
 
16
 
17
  def generate_code(prompt, max_tokens, temperature, top_p):
18
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
27
  )
28
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
29
 
 
30
  iface = gr.Interface(
31
  fn=generate_code,
32
  inputs=[
33
+ gr.Textbox(lines=5, label="Prompt"),
34
+ gr.Slider(10, 512, value=128, label="Max Tokens"),
35
+ gr.Slider(0.1, 1.5, value=0.8, label="Temperature"),
36
+ gr.Slider(0.1, 1.0, value=0.95, label="Top-p")
37
  ],
38
  outputs=gr.Textbox(lines=20, label="Generated Code"),
39
  title="Fermata v1.2 LightCoder",
40
+ description="A fine-tuned code model based on TinyLlama."
41
  )
42
 
43
  iface.launch(mcp_server=True)