Spaces:

RayBe
/

Intent-Recognition

Sleeping

RayBe commited on Feb 11, 2025

Commit

4d4ccb0

verified ·

1 Parent(s): dd83544

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,16 +9,10 @@ model_name = "./t5-finetuned-final"
 tokenizer = T5Tokenizer.from_pretrained(model_name)
 model = T5ForConditionalGeneration.from_pretrained(model_name)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
-if torch.cuda.is_available():
-    model.half()
-    try:
-        model = torch.compile(model)
-    except:
-        pass
 def extract_amount(input_text):
     """
     Extracts the amount from the input text using a robust regex.
@@ -67,10 +61,11 @@ def generate_command(input_command):
     prompt = "extract: " + input_command
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     output_ids = model.generate(
         input_ids,
-        max_length=64,
-        num_beams=3, #
         early_stopping=True
     )

 tokenizer = T5Tokenizer.from_pretrained(model_name)
 model = T5ForConditionalGeneration.from_pretrained(model_name)
+# Move model to CPU (explicitly)
+device = torch.device("cpu")
 model.to(device)
 def extract_amount(input_text):
     """
     Extracts the amount from the input text using a robust regex.
     prompt = "extract: " + input_command
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    # Generate output with reduced max_length and beams for faster inference
     output_ids = model.generate(
         input_ids,
+        max_length=32,  # Reduced for faster inference
+        num_beams=2,    # Reduced for faster inference
         early_stopping=True
     )