Spaces:

RayBe
/

Intent-Recognition

Sleeping

App Files Files Community

RayBe commited on Feb 11, 2025

Commit

aa20016

verified ·

1 Parent(s): db6252e

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -65

app.py CHANGED Viewed

@@ -9,102 +9,58 @@ model_name = "./t5-finetuned-final"
 tokenizer = T5Tokenizer.from_pretrained(model_name)
 model = T5ForConditionalGeneration.from_pretrained(model_name)
-# Move model to CPU (explicitly)
 device = torch.device("cpu")
 model.to(device)
 def extract_amount(input_text):
     """
-    Extracts the amount from the input text using a robust regex.
-    The negative lookahead (?!\S) ensures we stop capturing as soon as a non-space character appears.
     """
-    # First try: match when the amount follows keywords like send, loan, pay, or transfer.
-    amount_match = re.search(
-        r'(?:send|loan|pay|transfer)\s*(\d+(?:\.\d+)?)(?!\S)',
-        input_text,
-        re.IGNORECASE
-    )
-    if not amount_match:
-        # Fallback: match a number that is immediately followed by a currency symbol/abbreviation.
-        amount_match = re.search(
-            r'\b(\d+(?:\.\d+)?)\s*(?:AUD|USD|USDT|ETH|BTC|EUR)\b',
-            input_text,
-            re.IGNORECASE
-        )
-    if amount_match:
-        return amount_match.group(1).strip()
     return None
-def fix_json_output(output):
-    """
-    Fixes common JSON formatting issues in the model's output .
-    """
-    # Remove trailing commas before closing braces/brackets
-    output = re.sub(r',\s*([}\]])', r'\1', output)
-    # Fix missing or extra quotes around keys
-    output = re.sub(r'([{,])\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":', output)
-    # Fix missing or extra quotes around string values
-    output = re.sub(r':\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*([,}])', r':"\1"\2', output)
-    return output
 def merge_json_with_amount(model_output, amount):
     """
-    Updates only the 'amount' field in the model's JSON output,
-    leaving all other fields as produced by the model.
     """
     try:
         data = json.loads(model_output)
-    except json.JSONDecodeError:
-        # If JSON parsing fails, attempt to fix common formatting issues.
-        fixed_output = fix_json_output(model_output)
-        try:
-            data = json.loads(fixed_output)
-        except json.JSONDecodeError:
-            # If it still fails, return the model output unmodified.
-            return model_output
     if amount:
         try:
-            # Convert the cleaned string to a float
-            data["amount"] = float(amount.strip())
-        except ValueError:
-            # In case conversion fails, keep the original string.
             data["amount"] = amount
     return json.dumps(data, ensure_ascii=False)
 def generate_command(input_command):
-    # Extract the amount from the input
     amount = extract_amount(input_command)
-    # Generate the JSON output using the model
     prompt = "extract: " + input_command
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
-    output_ids = model.generate(
-        input_ids,
-        max_length=128,  # Increased max_length to allow for complete JSON output
-        num_beams=2,     # Using beam search for better output quality
-        early_stopping=True
-    )
     model_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # Merge the model's output with the extracted amount.
-    # The merge function only replaces the "amount" field, leaving all other keys intact.
-    if amount:
-        result = merge_json_with_amount(model_output, amount)
-    else:
-        result = model_output  # Use the model's output as-is if no amount is found
     return result
 iface = gr.Interface(
     fn=generate_command,
     inputs=gr.Textbox(lines=2, placeholder="Enter a command..."),
     outputs=gr.Textbox(label="Extracted JSON Output"),
-    title="T5 Fine-Tuned Command Extractor",
-    description="Extracts details in JSON format and replaces the amount with the exact value from the input.",
 )
 if __name__ == "__main__":

 tokenizer = T5Tokenizer.from_pretrained(model_name)
 model = T5ForConditionalGeneration.from_pretrained(model_name)
+# Move model to CPU explicitly
 device = torch.device("cpu")
 model.to(device)
 def extract_amount(input_text):
     """
+    Extracts the first number (with optional decimals) from the input text.
+    For example, in:
+      "Should I send 2659.53464 EUR to my wife today?"
+    it returns the string "2659.53464".
     """
+    match = re.search(r'\b(\d+(?:\.\d+)?)\b', input_text)
+    if match:
+        return match.group(1)
     return None
 def merge_json_with_amount(model_output, amount):
     """
+    Parses the model's JSON output and overrides the "amount" key
+    with the manually extracted value.
     """
     try:
         data = json.loads(model_output)
+    except Exception:
+        data = {}
     if amount:
         try:
+            data["amount"] = float(amount)
+        except Exception:
             data["amount"] = amount
     return json.dumps(data, ensure_ascii=False)
 def generate_command(input_command):
+    # Manually extract the amount from the input.
     amount = extract_amount(input_command)
+    # Generate the JSON output from the model.
     prompt = "extract: " + input_command
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+    output_ids = model.generate(input_ids, max_length=128, num_beams=2, early_stopping=True)
     model_output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    # Merge the manually extracted amount into the model output.
+    result = merge_json_with_amount(model_output, amount)
     return result
 iface = gr.Interface(
     fn=generate_command,
     inputs=gr.Textbox(lines=2, placeholder="Enter a command..."),
     outputs=gr.Textbox(label="Extracted JSON Output"),
+    title="T5 Command Extractor",
+    description="The model provides action, currency, and recipient. The amount is manually extracted from the input."
 )
 if __name__ == "__main__":