Spaces:

RayBe
/

Intent-Recognition

Sleeping

App Files Files Community

RayBe commited on Feb 9, 2025

Commit

e89ec39

verified ·

1 Parent(s): 7216ccc

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -28

app.py CHANGED Viewed

@@ -3,39 +3,37 @@ import torch
 import gradio as gr
 from transformers import T5Tokenizer, T5ForConditionalGeneration
-# Load the fine-tuned model from the local folder
 model_name = "./t5-finetuned-final"
 tokenizer = T5Tokenizer.from_pretrained(model_name)
 model = T5ForConditionalGeneration.from_pretrained(model_name)
-# Move model to GPU if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
-# Optimize for GPU: half precision and compilation (if supported)
 if torch.cuda.is_available():
-    model.half()  # Use half-precision for faster computation
     try:
-        model = torch.compile(model)  # Optimize with torch.compile() (PyTorch 2.0+)
     except Exception:
-        pass  # Continue if torch.compile() isn't available
 def fix_amount_in_output(input_command, output_str):
     """
-    This function extracts the first decimal number found in the input_command
-    and then replaces the "amount" field in the model output with that number.
     """
-    # Extract the first number that has a decimal point (or comma) from the input.
     match = re.search(r'(\d+(?:[.,]\d+))', input_command)
     if match:
-        # Normalize to use a period as the decimal separator.
         correct_amount_str = match.group(1).replace(',', '.')
     else:
-        # If nothing is found, return the output unchanged.
         return output_str
-    # Replace the amount value in the output.
-    # This expects the output to contain a pattern like: "amount": some_number
     fixed_output = re.sub(
         r'("amount"\s*:\s*)(\d+(?:\.\d+)?)',
         r'\1' + correct_amount_str,
@@ -45,26 +43,29 @@ def fix_amount_in_output(input_command, output_str):
 def generate_command(input_command):
     prompt = "extract: " + input_command
-    # Tokenize input and send to the correct device.
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
-    # Generate output using optimized parameters.
-    output_ids = model.generate(
-        input_ids,
-        max_length=64,   # Reduced length for faster generation.
-        num_beams=3,     # Fewer beams for faster inference.
-        early_stopping=True
-    )
-    # Decode the generated tokens.
-    result = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # Fix the "amount" field in the output using the input value.
     result_fixed = fix_amount_in_output(input_command, result)
     return result_fixed
-# Define a Gradio interface.
 iface = gr.Interface(
     fn=generate_command,
     inputs=gr.Textbox(lines=2, placeholder="Enter a command..."),
@@ -77,3 +78,4 @@ if __name__ == "__main__":
     iface.launch()

 import gradio as gr
 from transformers import T5Tokenizer, T5ForConditionalGeneration
+# Load the fine-tuned model and tokenizer from the local folder
 model_name = "./t5-finetuned-final"
 tokenizer = T5Tokenizer.from_pretrained(model_name)
 model = T5ForConditionalGeneration.from_pretrained(model_name)
+# Move model to GPU if available; otherwise, it will run on CPU
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
+# If using GPU, enable half precision and try torch.compile (PyTorch 2.0+)
 if torch.cuda.is_available():
+    model.half()  # Use half-precision for faster computation on GPU
     try:
+        model = torch.compile(model)
     except Exception:
+        pass  # Continue if torch.compile is unavailable
 def fix_amount_in_output(input_command, output_str):
     """
+    Extracts the first decimal number from the input and replaces the "amount" value
+    in the output with that exact value.
     """
+    # Look for a number with optional decimal separator in the input command.
     match = re.search(r'(\d+(?:[.,]\d+))', input_command)
     if match:
+        # Normalize any commas to a period.
         correct_amount_str = match.group(1).replace(',', '.')
     else:
         return output_str
+    # Replace the "amount" value in the output with the extracted amount.
     fixed_output = re.sub(
         r'("amount"\s*:\s*)(\d+(?:\.\d+)?)',
         r'\1' + correct_amount_str,
 def generate_command(input_command):
     prompt = "extract: " + input_command
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    # Use greedy decoding (num_beams=1) on CPU for speed; otherwise, use beam search on GPU.
+    if device.type == "cpu":
+        output_ids = model.generate(
+            input_ids,
+            max_length=64,
+            num_beams=1,         # Greedy decoding for faster output on CPU
+            early_stopping=True
+        )
+    else:
+        output_ids = model.generate(
+            input_ids,
+            max_length=64,
+            num_beams=3,         # Beam search for potentially higher quality on GPU
+            early_stopping=True
+        )
+    result = tokenizer.decode(output_ids[0], skip_special_tokens=True)
     result_fixed = fix_amount_in_output(input_command, result)
     return result_fixed
+# Create a Gradio interface.
 iface = gr.Interface(
     fn=generate_command,
     inputs=gr.Textbox(lines=2, placeholder="Enter a command..."),
     iface.launch()