Spaces:

ua-l
/

question-and-answer

No application file

Yehor commited on Mar 12, 2025

Commit

2c8a4e3

verified ·

1 Parent(s): be7f41a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import spaces
 import torch
-import torch._dynamo
 # torch._dynamo.config.suppress_errors = True
-torch._dynamo.disable()
-torch._dynamo.disallow_in_graph()
 import gradio as gr
@@ -20,8 +21,7 @@ model = AutoModelForCausalLM.from_pretrained(
     device_map='auto',
     torch_dtype='auto',
 )
-# model = torch.compile(model, backend="eager")
 print('Model dtype:', model.dtype)
@@ -36,7 +36,7 @@ def predict(question):
     ### Answer:
 '''], return_tensors = "pt").to("cuda")
-    outputs = model.generate(**inputs, max_new_tokens = 128)
     results = tokenizer.batch_decode(outputs, skip_special_tokens=True)

 import spaces
 import torch
+# import torch._dynamo
 # torch._dynamo.config.suppress_errors = True
+# torch._dynamo.disable()
+# torch._dynamo.disallow_in_graph()
 import gradio as gr
     device_map='auto',
     torch_dtype='auto',
 )
+compiled_model = torch.compile(model, mode="reduce-overhead", fullgraph=True)
 print('Model dtype:', model.dtype)
     ### Answer:
 '''], return_tensors = "pt").to("cuda")
+    outputs = compiled_model.generate(**inputs, max_new_tokens = 128)
     results = tokenizer.batch_decode(outputs, skip_special_tokens=True)