|
|
""" |
|
|
Generate ICICI-Specific Training Data. |
|
|
|
|
|
Creates 100 ICICI Bank email samples to improve model accuracy. |
|
|
|
|
|
Author: Ranjit Behera |
|
|
""" |
|
|
|
|
|
import json |
|
|
import random |
|
|
from pathlib import Path |
|
|
from datetime import datetime, timedelta |
|
|
|
|
|
OUTPUT_FILE = Path("data/synthetic/icici_samples.jsonl") |
|
|
|
|
|
|
|
|
ICICI_TEMPLATES = [ |
|
|
|
|
|
"Dear Customer, Rs.{amount} is debited from A/c XX{account} for UPI txn to VPA-{merchant}@{suffix} on {date}. Ref:{reference}", |
|
|
|
|
|
|
|
|
"INR {amount} credited to ICICI Bank A/c {account} on {date} from {sender}. UPI Ref: {reference}", |
|
|
|
|
|
|
|
|
"ICICI Bank Acct XX{account} credited with INR {amount} on {date}. IMPS Ref {reference}.", |
|
|
|
|
|
|
|
|
"INR {amount} debited from ICICI Bank A/c {account} on {date} to {merchant} via UPI. Ref: {reference}", |
|
|
|
|
|
|
|
|
"Alert: Rs {amount} debited from your ICICI Bank account ending {account} on {date}. UPI Ref No. {reference}. Not you? Call 1800-200-3344", |
|
|
|
|
|
|
|
|
"Dear Customer, your ICICI Bank A/c XX{account} has been credited with Rs.{amount} on {date}. Ref: {reference}", |
|
|
|
|
|
|
|
|
"ICICI Bank: Rs.{amount} has been debited from your account XX{account} on {date} for UPI payment to {merchant}. Ref:{reference}", |
|
|
|
|
|
|
|
|
"Dear Customer, INR {amount} debited from ICICI A/c XX{account} on {date}. Info: UPI-{merchant}. Avl Bal: Rs.{balance}", |
|
|
|
|
|
|
|
|
"Rs {amount} has been credited to your ICICI Bank account {account} on {date} via NEFT. Reference: {reference}", |
|
|
|
|
|
|
|
|
"ICICI Bank: Fund Transfer of Rs.{amount} to {merchant} successful. A/c: XX{account}. Date: {date}. Ref: {reference}" |
|
|
] |
|
|
|
|
|
VPA_SUFFIXES = ['ybl', 'paytm', 'okicici', 'okhdfcbank', 'axl', 'sbi', 'icici'] |
|
|
|
|
|
MERCHANTS = [ |
|
|
('swiggy', 'food'), ('zomato', 'food'), ('amazon', 'shopping'), |
|
|
('flipkart', 'shopping'), ('uber', 'transport'), ('ola', 'transport'), |
|
|
('rapido', 'transport'), ('bigbasket', 'grocery'), ('blinkit', 'grocery'), |
|
|
('zepto', 'grocery'), ('dmart', 'grocery'), ('jio', 'bills'), |
|
|
('airtel', 'bills'), ('electricity', 'bills'), ('water', 'bills'), |
|
|
('myntra', 'shopping'), ('ajio', 'shopping'), ('bookmyshow', 'entertainment'), |
|
|
('netflix', 'entertainment'), ('hotstar', 'entertainment') |
|
|
] |
|
|
|
|
|
SENDERS = [ |
|
|
'Amit Kumar', 'Priya Singh', 'Rahul Sharma', 'Neha Gupta', |
|
|
'Suresh Patel', 'Anita Verma', 'Cashback', 'Refund - Amazon', |
|
|
'Interest Credit', 'Dividend - Mutual Fund', 'Salary' |
|
|
] |
|
|
|
|
|
DATE_FORMATS = [ |
|
|
'%d-%m-%Y', '%d/%m/%Y', '%d-%m-%y', '%d %b %Y', |
|
|
'%d%m%Y', '%d %B %Y' |
|
|
] |
|
|
|
|
|
def generate_random_date(): |
|
|
"""Generate random date in past 3 months.""" |
|
|
days_ago = random.randint(1, 90) |
|
|
d = datetime.now() - timedelta(days=days_ago) |
|
|
fmt = random.choice(DATE_FORMATS) |
|
|
return d.strftime(fmt) |
|
|
|
|
|
def generate_reference(): |
|
|
"""Generate 12-digit reference number.""" |
|
|
return ''.join([str(random.randint(0, 9)) for _ in range(12)]) |
|
|
|
|
|
def generate_account(): |
|
|
"""Generate 4-digit account number.""" |
|
|
return str(random.randint(1000, 9999)) |
|
|
|
|
|
def generate_amount(is_high=False): |
|
|
"""Generate realistic amount.""" |
|
|
if is_high: |
|
|
return str(random.randint(5000, 50000)) |
|
|
else: |
|
|
amounts = [ |
|
|
round(random.uniform(50, 500), 2), |
|
|
round(random.uniform(100, 2000), 2), |
|
|
random.randint(100, 5000), |
|
|
round(random.uniform(500, 3000), 2) |
|
|
] |
|
|
return str(random.choice(amounts)) |
|
|
|
|
|
def generate_icici_samples(n_samples=100): |
|
|
"""Generate ICICI-specific training samples.""" |
|
|
samples = [] |
|
|
|
|
|
for i in range(n_samples): |
|
|
template = random.choice(ICICI_TEMPLATES) |
|
|
is_credit = 'credited' in template.lower() or 'credit' in template.lower() |
|
|
|
|
|
merchant_info = random.choice(MERCHANTS) |
|
|
merchant = merchant_info[0] |
|
|
category = merchant_info[1] |
|
|
|
|
|
account = generate_account() |
|
|
reference = generate_reference() |
|
|
amount = generate_amount(is_high=random.random() < 0.2) |
|
|
date = generate_random_date() |
|
|
balance = str(random.randint(10000, 500000)) |
|
|
sender = random.choice(SENDERS) |
|
|
suffix = random.choice(VPA_SUFFIXES) |
|
|
|
|
|
|
|
|
text = template.format( |
|
|
amount=amount, |
|
|
account=account, |
|
|
reference=reference, |
|
|
date=date, |
|
|
merchant=merchant, |
|
|
category=category, |
|
|
balance=balance, |
|
|
sender=sender, |
|
|
suffix=suffix |
|
|
) |
|
|
|
|
|
|
|
|
entities = { |
|
|
'amount': amount.replace(',', ''), |
|
|
'type': 'credit' if is_credit else 'debit', |
|
|
'date': date, |
|
|
'account': account, |
|
|
'reference': reference |
|
|
} |
|
|
|
|
|
if not is_credit: |
|
|
entities['merchant'] = merchant |
|
|
entities['category'] = category |
|
|
|
|
|
|
|
|
prompt = f"""Extract financial entities from this ICICI Bank email: |
|
|
|
|
|
{text} |
|
|
|
|
|
Extract: amount, type, date, account, reference{', merchant, category' if not is_credit else ''} |
|
|
Output JSON:""" |
|
|
|
|
|
completion = json.dumps(entities, indent=2) |
|
|
|
|
|
sample = { |
|
|
'prompt': prompt, |
|
|
'completion': completion, |
|
|
'bank': 'icici', |
|
|
'entities': entities |
|
|
} |
|
|
samples.append(sample) |
|
|
|
|
|
|
|
|
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True) |
|
|
with open(OUTPUT_FILE, 'w') as f: |
|
|
for sample in samples: |
|
|
f.write(json.dumps(sample) + '\n') |
|
|
|
|
|
print(f"✅ Generated {len(samples)} ICICI samples") |
|
|
print(f" Saved to {OUTPUT_FILE}") |
|
|
|
|
|
|
|
|
print("\n📧 Sample:") |
|
|
print(samples[0]['prompt'][:300]) |
|
|
|
|
|
return samples |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
generate_icici_samples(100) |
|
|
|