File size: 3,366 Bytes
ca3ccd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""

fintext-extractor: Transaction Extraction from Bank SMS



Gradio demo for the two-stage NER pipeline that extracts structured

transaction data from bank SMS/notifications using ONNX Runtime.

"""

import json
import time
import gradio as gr
from fintext import FintextExtractor

# Load model at startup (downloads ~1.8GB on first run)
print("Loading fintext-extractor model...")
extractor = FintextExtractor.from_pretrained("Sowrabhm/fintext-extractor", precision="fp16")
print("Model loaded!")


def extract_transaction(sms_text: str) -> str:
    """Run two-stage extraction and return formatted JSON."""
    if not sms_text or not sms_text.strip():
        return json.dumps({"error": "Please enter SMS text"}, indent=2)

    start = time.time()

    # Stage 1: Classification
    cls_result = extractor.classify(sms_text)

    # Stage 2: Full extraction
    result = extractor.extract(sms_text)

    elapsed_ms = (time.time() - start) * 1000

    # Build rich output
    output = {
        "is_transaction": result["is_transaction"],
        "confidence": round(cls_result["confidence"], 3),
    }

    if result["is_transaction"]:
        output["transaction_amount"] = result["transaction_amount"]
        output["transaction_type"] = result["transaction_type"]
        output["transaction_date"] = result["transaction_date"]
        output["transaction_description"] = result["transaction_description"]
        output["masked_account_digits"] = result["masked_account_digits"]

    output["inference_time_ms"] = round(elapsed_ms, 1)

    return json.dumps(output, indent=2, default=str)


# Example SMS messages (all synthetic)
examples = [
    ["Rs.5,000 debited from a/c XX1234 for Amazon Pay on 08-Mar-26"],
    ["Credit Alert: INR 25,000 credited to a/c XX5678 on 15-Jan-2026"],
    ["INR 3,499.00 paid to Netflix via card ending 9876 on 01-Feb-2026"],
    ["Dear Customer, Rs.850.50 has been credited to your a/c XX2468 on 05-Mar-2026. UPI Ref: 678912345"],
    ["Transaction of Rs.15,750 at Flipkart on 28-Feb-2026 from card XX3579. Avl bal: Rs.42,300"],
    ["OTP 483921 for transaction of Rs.1,200. Do not share with anyone."],
    ["Your FD of Rs.50,000 matures on 20-Apr-2026. Visit branch."],
    ["Reminder: EMI of Rs.12,500 due on 10-Mar-2026 for loan XX4321"],
]

# Build Gradio interface
demo = gr.Interface(
    fn=extract_transaction,
    inputs=gr.Textbox(
        label="SMS / Notification Text",
        placeholder="Paste a bank SMS here...",
        lines=3,
    ),
    outputs=gr.Code(
        label="Extracted Transaction Data",
        language="json",
    ),
    title="fintext-extractor",
    description=(
        "Extract structured transaction data from bank SMS using on-device NER. "
        "Two-stage pipeline: DeBERTa classifier filters non-transactions, "
        "then GLiNER2 extracts amount, date, type, description, and account digits.\n\n"
        "**Try the examples below or paste your own SMS text.**"
    ),
    article=(
        "**Links:** "
        "[Model](https://huggingface.co/Sowrabhm/fintext-extractor) | "
        "[GitHub](https://github.com/sowrabhmv/fintext-extractor) | "
        "License: CC-BY-4.0"
    ),
    examples=examples,
    cache_examples=False,
)

if __name__ == "__main__":
    demo.launch()