ejschwartz commited on
Commit
f944b14
·
1 Parent(s): e73ac0b
Files changed (1) hide show
  1. app.py +40 -18
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  os.environ["TOKENIZERS_PARALLELISM"] = "true"
3
 
4
  import frontmatter
@@ -12,51 +13,72 @@ from modeling_nova import NovaTokenizer, NovaForCausalLM
12
 
13
  print("Downloading model")
14
 
15
- tokenizer = AutoTokenizer.from_pretrained('lt-asset/nova-6.7b-bcr', trust_remote_code=True)
 
 
16
  tokenizer.pad_token = tokenizer.eos_token
17
  tokenizer.pad_token_id = tokenizer.eos_token_id
18
  nova_tokenizer = NovaTokenizer(tokenizer)
19
 
20
- model = NovaForCausalLM.from_pretrained('lt-asset/nova-6.7b-bcr', torch_dtype=torch.bfloat16, device_map="auto").eval()
 
 
21
 
22
  examples = json.load(open("humaneval_decompile_nova_6.7b.json", "r"))
23
 
 
24
  @spaces.GPU
25
  def predict(type, normalized_asm):
26
 
27
- prompt_before = f'# This is the assembly code with {type} optimization:\n<func0>:'
28
  asm = normalized_asm.strip()
29
- assert asm.startswith('<func0>:')
30
- asm = asm[len('<func0>:'): ]
31
- prompt_after = '\nWhat is the source code?\n'
32
-
33
  inputs = prompt_before + asm + prompt_after
34
  print("Inputs:", inputs)
35
 
36
  # 0 for non-assembly code characters and 1 for assembly characters, required by nova tokenizer
37
- char_types = '0' * len(prompt_before) + '1' * len(asm) + '0' * len(prompt_after)
38
-
39
- tokenizer_output = nova_tokenizer.encode(inputs, '', char_types)
40
- input_ids = torch.LongTensor(tokenizer_output['input_ids'].tolist()).unsqueeze(0)
41
  print("Input IDs:", input_ids.shape)
42
- nova_attention_mask = torch.LongTensor(tokenizer_output['nova_attention_mask']).unsqueeze(0)
 
 
43
 
44
  output = model.generate(
45
- inputs=input_ids.cuda(), max_new_tokens=512, temperature=0.2, top_p=0.95,
46
- num_return_sequences=1, do_sample=True, nova_attention_mask=nova_attention_mask.cuda(),
47
- no_mask_idx=torch.LongTensor([tokenizer_output['no_mask_idx']]).cuda(),
48
- pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id
 
 
 
 
 
 
49
  )
50
  print("Output 1:", output)
51
 
52
- output = tokenizer.decode(output[0][input_ids.size(1): ], skip_special_tokens=True, clean_up_tokenization_spaces=True)
 
 
 
 
53
  print("Output 2:", output)
54
 
55
  return output
56
 
 
57
  demo = gr.Interface(
58
  fn=predict,
59
- inputs=[gr.Text(label="Optimization Type", value="O0"), gr.Text(label="Normalized Assembly Code")],
 
 
 
60
  outputs=gr.Text(label="Raw Nova Output"),
61
  description=frontmatter.load("README.md").content,
62
  examples=[[ex["type"], ex["normalized_asm"]] for ex in examples],
 
1
  import os
2
+
3
  os.environ["TOKENIZERS_PARALLELISM"] = "true"
4
 
5
  import frontmatter
 
13
 
14
  print("Downloading model")
15
 
16
+ tokenizer = AutoTokenizer.from_pretrained(
17
+ "lt-asset/nova-6.7b-bcr", trust_remote_code=True
18
+ )
19
  tokenizer.pad_token = tokenizer.eos_token
20
  tokenizer.pad_token_id = tokenizer.eos_token_id
21
  nova_tokenizer = NovaTokenizer(tokenizer)
22
 
23
+ model = NovaForCausalLM.from_pretrained(
24
+ "lt-asset/nova-6.7b-bcr", torch_dtype=torch.bfloat16, device_map="auto"
25
+ ).eval()
26
 
27
  examples = json.load(open("humaneval_decompile_nova_6.7b.json", "r"))
28
 
29
+
30
  @spaces.GPU
31
  def predict(type, normalized_asm):
32
 
33
+ prompt_before = f"# This is the assembly code with {type} optimization:\n<func0>:"
34
  asm = normalized_asm.strip()
35
+ assert asm.startswith("<func0>:")
36
+ asm = asm[len("<func0>:") :]
37
+ prompt_after = "\nWhat is the source code?\n"
38
+
39
  inputs = prompt_before + asm + prompt_after
40
  print("Inputs:", inputs)
41
 
42
  # 0 for non-assembly code characters and 1 for assembly characters, required by nova tokenizer
43
+ char_types = "0" * len(prompt_before) + "1" * len(asm) + "0" * len(prompt_after)
44
+
45
+ tokenizer_output = nova_tokenizer.encode(inputs, "", char_types)
46
+ input_ids = torch.LongTensor(tokenizer_output["input_ids"].tolist()).unsqueeze(0)
47
  print("Input IDs:", input_ids.shape)
48
+ nova_attention_mask = torch.LongTensor(
49
+ tokenizer_output["nova_attention_mask"]
50
+ ).unsqueeze(0)
51
 
52
  output = model.generate(
53
+ inputs=input_ids.cuda(),
54
+ max_new_tokens=512,
55
+ temperature=0.2,
56
+ top_p=0.95,
57
+ num_return_sequences=1,
58
+ do_sample=True,
59
+ nova_attention_mask=nova_attention_mask.cuda(),
60
+ no_mask_idx=torch.LongTensor([tokenizer_output["no_mask_idx"]]).cuda(),
61
+ pad_token_id=tokenizer.pad_token_id,
62
+ eos_token_id=tokenizer.eos_token_id,
63
  )
64
  print("Output 1:", output)
65
 
66
+ output = tokenizer.decode(
67
+ output[0][input_ids.size(1) :],
68
+ skip_special_tokens=True,
69
+ clean_up_tokenization_spaces=True,
70
+ )
71
  print("Output 2:", output)
72
 
73
  return output
74
 
75
+
76
  demo = gr.Interface(
77
  fn=predict,
78
+ inputs=[
79
+ gr.Text(label="Optimization Type", value="O0"),
80
+ gr.Text(label="Normalized Assembly Code"),
81
+ ],
82
  outputs=gr.Text(label="Raw Nova Output"),
83
  description=frontmatter.load("README.md").content,
84
  examples=[[ex["type"], ex["normalized_asm"]] for ex in examples],