RayBe commited on
Commit
4d4ccb0
·
verified ·
1 Parent(s): dd83544

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -10
app.py CHANGED
@@ -9,16 +9,10 @@ model_name = "./t5-finetuned-final"
9
  tokenizer = T5Tokenizer.from_pretrained(model_name)
10
  model = T5ForConditionalGeneration.from_pretrained(model_name)
11
 
12
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
13
  model.to(device)
14
 
15
- if torch.cuda.is_available():
16
- model.half()
17
- try:
18
- model = torch.compile(model)
19
- except:
20
- pass
21
-
22
  def extract_amount(input_text):
23
  """
24
  Extracts the amount from the input text using a robust regex.
@@ -67,10 +61,11 @@ def generate_command(input_command):
67
  prompt = "extract: " + input_command
68
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
69
 
 
70
  output_ids = model.generate(
71
  input_ids,
72
- max_length=64,
73
- num_beams=3, #
74
  early_stopping=True
75
  )
76
 
 
9
  tokenizer = T5Tokenizer.from_pretrained(model_name)
10
  model = T5ForConditionalGeneration.from_pretrained(model_name)
11
 
12
+ # Move model to CPU (explicitly)
13
+ device = torch.device("cpu")
14
  model.to(device)
15
 
 
 
 
 
 
 
 
16
  def extract_amount(input_text):
17
  """
18
  Extracts the amount from the input text using a robust regex.
 
61
  prompt = "extract: " + input_command
62
  input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
63
 
64
+ # Generate output with reduced max_length and beams for faster inference
65
  output_ids = model.generate(
66
  input_ids,
67
+ max_length=32, # Reduced for faster inference
68
+ num_beams=2, # Reduced for faster inference
69
  early_stopping=True
70
  )
71