Spaces:

RayBe
/

Intent-Recognition

Sleeping

App Files Files Community

RayBe commited on Feb 11, 2025

Commit

56991a9

verified ·

1 Parent(s): 727d394

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -18

app.py CHANGED Viewed

@@ -16,13 +16,23 @@ model.to(device)
 def extract_amount(input_text):
     """
     Extracts the amount from the input text using a robust regex.
     """
-    # Improved regex to match amounts preceded by keywords or currency symbols
-    amount_match = re.search(r'(?:send|loan|pay|transfer)\s*(\d+(?:\.\d+)?)', input_text, re.IGNORECASE)
     if not amount_match:
-        amount_match = re.search(r'\b(\d+(?:\.\d+)?)\s*(?:AUD|USD|USDT|ETH|BTC)\b', input_text, re.IGNORECASE)
     if amount_match:
-        return amount_match.group(1)
     return None
 def fix_json_output(output):
@@ -43,21 +53,26 @@ def merge_json_with_amount(model_output, amount):
     leaving all other fields as produced by the model.
     """
     try:
-        # Attempt to load the model output directly as JSON.
         data = json.loads(model_output)
     except json.JSONDecodeError:
-        # If parsing fails, just return the model output unmodified.
-        # (You might choose to log an error here.)
-        return model_output
-    # Replace (or add) the "amount" field using the extracted amount.
     if amount:
-        data["amount"] = float(amount) if '.' in amount else int(amount)
-    # Dump back to JSON without altering other keys.
     return json.dumps(data, ensure_ascii=False)
 def generate_command(input_command):
     # Extract the amount from the input
     amount = extract_amount(input_command)
@@ -66,18 +81,17 @@ def generate_command(input_command):
     prompt = "extract: " + input_command
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
-    # Generate output with increased max_length for complete JSON
     output_ids = model.generate(
         input_ids,
-        max_length=128,  # Increased to allow complete JSON output
-        num_beams=2,     # Reduced for faster inference
         early_stopping=True
     )
-    # Decode the model's output
     model_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # Merge the model's output with the extracted amount
     if amount:
         result = merge_json_with_amount(model_output, amount)
     else:
@@ -94,4 +108,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch()

 def extract_amount(input_text):
     """
     Extracts the amount from the input text using a robust regex.
+    The negative lookahead (?!\S) ensures we stop capturing as soon as a non-space character appears.
     """
+    # First try: match when the amount follows keywords like send, loan, pay, or transfer.
+    amount_match = re.search(
+        r'(?:send|loan|pay|transfer)\s*(\d+(?:\.\d+)?)(?!\S)',
+        input_text,
+        re.IGNORECASE
+    )
     if not amount_match:
+        # Fallback: match a number that is immediately followed by a currency symbol/abbreviation.
+        amount_match = re.search(
+            r'\b(\d+(?:\.\d+)?)\s*(?:AUD|USD|USDT|ETH|BTC|EUR)\b',
+            input_text,
+            re.IGNORECASE
+        )
     if amount_match:
+        return amount_match.group(1).strip()
     return None
 def fix_json_output(output):
     leaving all other fields as produced by the model.
     """
     try:
         data = json.loads(model_output)
     except json.JSONDecodeError:
+        # If JSON parsing fails, attempt to fix common formatting issues.
+        fixed_output = fix_json_output(model_output)
+        try:
+            data = json.loads(fixed_output)
+        except json.JSONDecodeError:
+            # If it still fails, return the model output unmodified.
+            return model_output
     if amount:
+        try:
+            # Convert the cleaned string to a float
+            data["amount"] = float(amount.strip())
+        except ValueError:
+            # In case conversion fails, keep the original string.
+            data["amount"] = amount
     return json.dumps(data, ensure_ascii=False)
 def generate_command(input_command):
     # Extract the amount from the input
     amount = extract_amount(input_command)
     prompt = "extract: " + input_command
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     output_ids = model.generate(
         input_ids,
+        max_length=128,  # Increased max_length to allow for complete JSON output
+        num_beams=2,     # Using beam search for better output quality
         early_stopping=True
     )
     model_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    # Merge the model's output with the extracted amount.
+    # The merge function only replaces the "amount" field, leaving all other keys intact.
     if amount:
         result = merge_json_with_amount(model_output, amount)
     else:
 )
 if __name__ == "__main__":
+    iface.launch()