Spaces:

RayBe
/

Intent-Recognition

Sleeping

App Files Files Community

RayBe commited on Feb 11, 2025

Commit

5ddb235

verified ·

1 Parent(s): a09d2a8

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -36

app.py CHANGED Viewed

@@ -18,55 +18,56 @@ if torch.cuda.is_available():
     model.half()  # Use half-precision for faster computation
     try:
         model = torch.compile(model)  # PyTorch 2.0+ optimization
-    except:
         pass  # Ignore if torch.compile is not available
-def sanitize_amount(output):
     """
-    Sanitizes the amount field to ensure it is correctly formatted.
     """
-    # Fix malformed amounts like "46307.0" -> "4630.07" or "4630327.0" -> "463032.07"
-    def fix_malformed_amount(match):
-        full_match = match.group(0)
-        integer_part = match.group(1)
-        decimal_part = match.group(2)
-        return f"{integer_part}.{decimal_part}"  # Reconstruct the correct format
-    # Match numbers with misplaced decimal points
-    output = re.sub(r'(\d+)(\d{2})\.0', fix_malformed_amount, output)
-    return output
 def generate_command(input_command):
-    """
-    Generates the command and ensures the exact amount is displayed without changes.
-    """
     prompt = "extract: " + input_command
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
-    # Generate output from the model
     output_ids = model.generate(
         input_ids,
         max_length=64,  # Reduced for speed
         num_beams=3,    # Lowered from 5 to 3 for faster output
         early_stopping=True
     )
     result = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # Sanitize the output to fix malformed amounts
-    sanitized_result = sanitize_amount(result)
-    try:
-        # Attempt to parse the sanitized result as JSON
-        data = json.loads(sanitized_result)
-        # Convert numeric amounts to strings to preserve exact formatting
-        if isinstance(data.get("amount"), (int, float)):
-            data["amount"] = str(data["amount"])
-        return json.dumps(data, ensure_ascii=False)  # Return as JSON string
-    except json.JSONDecodeError:
-        # If not valid JSON, return the raw sanitized output
-        return sanitized_result
 # Create a Gradio interface
 iface = gr.Interface(
@@ -77,6 +78,5 @@ iface = gr.Interface(
     description="Enter a command, and the fine-tuned T5 model will extract relevant details in JSON format.",
 )
-# Launch the app
 if __name__ == "__main__":
-    iface.launch()

     model.half()  # Use half-precision for faster computation
     try:
         model = torch.compile(model)  # PyTorch 2.0+ optimization
+    except Exception:
         pass  # Ignore if torch.compile is not available
+def correct_amount_format(output):
     """
+    This function attempts to fix the numeric formatting issues in the generated output:
+      1. It replaces a comma used as a decimal separator (i.e. followed by exactly two digits) with a period.
+      2. It converts the number to a float and rounds it to two decimal places.
+    If the output is valid JSON, it will update the "amount" field accordingly.
+    Otherwise, it falls back to a regex-based fix.
     """
+    try:
+        # Try to parse the output as JSON
+        data = json.loads(output)
+        if "amount" in data and isinstance(data["amount"], str):
+            # Replace a comma that is likely a decimal separator (e.g., "10,50" -> "10.50")
+            amount_str = re.sub(r'(\d+),(\d{2})\b', r'\1.\2', data["amount"])
+            try:
+                # Convert to float, round to two decimals, then reformat
+                num = float(amount_str)
+                rounded = round(num, 2)
+                data["amount"] = "{:.2f}".format(rounded)
+            except ValueError:
+                # If conversion fails, leave the original value
+                pass
+        return json.dumps(data, ensure_ascii=False)
+    except json.JSONDecodeError:
+        # Fallback if output is not valid JSON:
+        # Replace commas used as decimal separators (only if followed by exactly 2 digits)
+        output = re.sub(r'(\d+),(\d{2})\b', r'\1.\2', output)
+        # Fallback: truncate any extra digits (note: this does not round)
+        output = re.sub(r'(\d+\.\d{2})\d+', r'\1', output)
+        return output
 def generate_command(input_command):
     prompt = "extract: " + input_command
     input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
     output_ids = model.generate(
         input_ids,
         max_length=64,  # Reduced for speed
         num_beams=3,    # Lowered from 5 to 3 for faster output
         early_stopping=True
     )
     result = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    # Apply the updated post-processing to correct the amount formatting
+    result = correct_amount_format(result)
+    return result
 # Create a Gradio interface
 iface = gr.Interface(
     description="Enter a command, and the fine-tuned T5 model will extract relevant details in JSON format.",
 )
 if __name__ == "__main__":
+    iface.launch()