Spaces:
Running
Running
File size: 3,366 Bytes
ca3ccd1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | """
fintext-extractor: Transaction Extraction from Bank SMS
Gradio demo for the two-stage NER pipeline that extracts structured
transaction data from bank SMS/notifications using ONNX Runtime.
"""
import json
import time
import gradio as gr
from fintext import FintextExtractor
# Load model at startup (downloads ~1.8GB on first run)
print("Loading fintext-extractor model...")
extractor = FintextExtractor.from_pretrained("Sowrabhm/fintext-extractor", precision="fp16")
print("Model loaded!")
def extract_transaction(sms_text: str) -> str:
"""Run two-stage extraction and return formatted JSON."""
if not sms_text or not sms_text.strip():
return json.dumps({"error": "Please enter SMS text"}, indent=2)
start = time.time()
# Stage 1: Classification
cls_result = extractor.classify(sms_text)
# Stage 2: Full extraction
result = extractor.extract(sms_text)
elapsed_ms = (time.time() - start) * 1000
# Build rich output
output = {
"is_transaction": result["is_transaction"],
"confidence": round(cls_result["confidence"], 3),
}
if result["is_transaction"]:
output["transaction_amount"] = result["transaction_amount"]
output["transaction_type"] = result["transaction_type"]
output["transaction_date"] = result["transaction_date"]
output["transaction_description"] = result["transaction_description"]
output["masked_account_digits"] = result["masked_account_digits"]
output["inference_time_ms"] = round(elapsed_ms, 1)
return json.dumps(output, indent=2, default=str)
# Example SMS messages (all synthetic)
examples = [
["Rs.5,000 debited from a/c XX1234 for Amazon Pay on 08-Mar-26"],
["Credit Alert: INR 25,000 credited to a/c XX5678 on 15-Jan-2026"],
["INR 3,499.00 paid to Netflix via card ending 9876 on 01-Feb-2026"],
["Dear Customer, Rs.850.50 has been credited to your a/c XX2468 on 05-Mar-2026. UPI Ref: 678912345"],
["Transaction of Rs.15,750 at Flipkart on 28-Feb-2026 from card XX3579. Avl bal: Rs.42,300"],
["OTP 483921 for transaction of Rs.1,200. Do not share with anyone."],
["Your FD of Rs.50,000 matures on 20-Apr-2026. Visit branch."],
["Reminder: EMI of Rs.12,500 due on 10-Mar-2026 for loan XX4321"],
]
# Build Gradio interface
demo = gr.Interface(
fn=extract_transaction,
inputs=gr.Textbox(
label="SMS / Notification Text",
placeholder="Paste a bank SMS here...",
lines=3,
),
outputs=gr.Code(
label="Extracted Transaction Data",
language="json",
),
title="fintext-extractor",
description=(
"Extract structured transaction data from bank SMS using on-device NER. "
"Two-stage pipeline: DeBERTa classifier filters non-transactions, "
"then GLiNER2 extracts amount, date, type, description, and account digits.\n\n"
"**Try the examples below or paste your own SMS text.**"
),
article=(
"**Links:** "
"[Model](https://huggingface.co/Sowrabhm/fintext-extractor) | "
"[GitHub](https://github.com/sowrabhmv/fintext-extractor) | "
"License: CC-BY-4.0"
),
examples=examples,
cache_examples=False,
)
if __name__ == "__main__":
demo.launch()
|