nebulaResearch commited on
Commit
744d3ff
·
verified ·
1 Parent(s): 6a508ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -41,26 +41,26 @@ def compile_and_load_model():
41
  )
42
  temp_model = temp_model.to('cuda')
43
 
44
- # Capture example inputs (dummy prompt for export; adjust if needed for your model)
45
- example_prompt = "Hello world" # Simple example; use a representative prompt for better optimization
46
  example_inputs = tokenizer(example_prompt, return_tensors="pt").to('cuda')
47
 
48
- # Capture the computation graph without executing
49
- with spaces.aoti_capture():
50
- _ = temp_model(**example_inputs)
51
 
52
- # Export the model graph
53
  exported_model = export(
54
- temp_model,
55
- example_inputs['input_ids'],
56
- example_inputs['attention_mask']
57
  )
58
 
59
  # Compile to optimized binary
60
  compiled_model = spaces.aoti_compile(exported_model)
61
 
62
  # Apply the compiled model (patches the forward method)
63
- spaces.aoti_apply(temp_model, compiled_model)
64
 
65
  model = temp_model
66
  print("Model compiled and ready for inference!")
 
41
  )
42
  temp_model = temp_model.to('cuda')
43
 
44
+ # Example inputs for capture (representative prompt)
45
+ example_prompt = "Hello world" # Simple; adjust for better optimization if needed
46
  example_inputs = tokenizer(example_prompt, return_tensors="pt").to('cuda')
47
 
48
+ # Capture the computation graph (on the core transformer module for efficiency)
49
+ with spaces.aoti_capture(temp_model.model) as call:
50
+ _ = temp_model.model(**example_inputs) # Forward pass only (not generate)
51
 
52
+ # Export the captured graph
53
  exported_model = export(
54
+ temp_model.model,
55
+ args=call.args,
56
+ kwargs=call.kwargs
57
  )
58
 
59
  # Compile to optimized binary
60
  compiled_model = spaces.aoti_compile(exported_model)
61
 
62
  # Apply the compiled model (patches the forward method)
63
+ spaces.aoti_apply(compiled_model, temp_model.model)
64
 
65
  model = temp_model
66
  print("Model compiled and ready for inference!")