|
|
import gradio as gr |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import torch |
|
|
import json |
|
|
import re |
|
|
|
|
|
MODEL_ID = "rawsun00001/banking-sms-json-parser-v6-merged" |
|
|
|
|
|
print("🔄 Loading banking‑SMS JSON parser model...") |
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_ID, |
|
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
|
device_map="auto" if torch.cuda.is_available() else None |
|
|
) |
|
|
if tokenizer.pad_token is None: |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
print("✅ Model loaded successfully!") |
|
|
|
|
|
def parse_banking_sms(raw_text: str) -> dict: |
|
|
sms_text = " ".join(raw_text.strip().split()) |
|
|
prompt = sms_text + "|" |
|
|
inputs = tokenizer(prompt, return_tensors="pt") |
|
|
if torch.cuda.is_available(): |
|
|
inputs = {k: v.cuda() for k, v in inputs.items()} |
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=120, |
|
|
do_sample=False, |
|
|
repetition_penalty=1.05, |
|
|
pad_token_id=tokenizer.eos_token_id, |
|
|
eos_token_id=tokenizer.eos_token_id, |
|
|
) |
|
|
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
json_part = decoded[len(prompt):].strip() |
|
|
m = re.search(r"\{[^{}]+\}", json_part) |
|
|
if m: |
|
|
try: |
|
|
parsed = json.loads(m.group()) |
|
|
return { |
|
|
"date": parsed.get("date"), |
|
|
"type": parsed.get("type"), |
|
|
"amount": parsed.get("amount"), |
|
|
"category": parsed.get("category"), |
|
|
"last4": parsed.get("last4"), |
|
|
"is_transaction": parsed.get("is_transaction", False), |
|
|
} |
|
|
except json.JSONDecodeError: |
|
|
pass |
|
|
return { |
|
|
"date": None, "type": None, |
|
|
"amount": None, "category": None, |
|
|
"last4": None, "is_transaction": False |
|
|
} |
|
|
|
|
|
def predict(raw_text: str) -> str: |
|
|
parsed = parse_banking_sms(raw_text) |
|
|
if parsed["is_transaction"]: |
|
|
summary = ( |
|
|
f"✅ Transaction Detected!\n\n" |
|
|
f"- 📅 Date: **{parsed.get('date', 'N/A')}**\n" |
|
|
f"- 💳 Type: **{parsed.get('type', '').title()}**\n" |
|
|
f"- 💰 Amount: **{parsed.get('amount')}**\n" |
|
|
f"- 🏪 Category: **{parsed.get('category')}**\n" |
|
|
f"- 🔢 Last 4 Digits: **{parsed.get('last4')}**\n\n" |
|
|
"**Full Parsed JSON:**\n```json\n" |
|
|
f"{json.dumps(parsed, indent=2)}\n```" |
|
|
) |
|
|
else: |
|
|
summary = ( |
|
|
"ℹ️ Non‑transactional SMS / Promotional / Info message.\n\n" |
|
|
"**Parsed Classification JSON:**\n```json\n" |
|
|
f"{json.dumps(parsed, indent=2)}\n```" |
|
|
) |
|
|
return summary |
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=predict, |
|
|
inputs=gr.Textbox( |
|
|
lines=3, |
|
|
placeholder="Paste your banking SMS or email here…", |
|
|
label="Input SMS / Email" |
|
|
), |
|
|
outputs=gr.Markdown(label="Parsed Output"), |
|
|
title="🏦 Banking SMS JSON Parser", |
|
|
description=( |
|
|
"Paste any banking SMS (or email) below — the app will detect transaction " |
|
|
"vs non-transaction, and display structured JSON output." |
|
|
), |
|
|
allow_flagging="never", |
|
|
analytics_enabled=False, |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
iface.launch(server_name="0.0.0.0", server_port=7860) |
|
|
|