Update README.md
Browse files
README.md
CHANGED
|
@@ -35,8 +35,7 @@ tokenizer = AutoTokenizer.from_pretrained("paytm/text2graph1.5b-original")
|
|
| 35 |
model = model.to(device)
|
| 36 |
|
| 37 |
# Prepare your input text
|
| 38 |
-
text =
|
| 39 |
-
''
|
| 40 |
{
|
| 41 |
"task": "Extract structured information from SMS messages to identify financial details, message purpose, and related information",
|
| 42 |
"instructions": [
|
|
@@ -71,16 +70,17 @@ text = """
|
|
| 71 |
"key_themes: Extract only 3-5 main themes actually discussed in the SMS as an array of strings - avoid redundant or fabricated themes",
|
| 72 |
"message_type: Categorize as exactly one of: alert message, delayed payment, repayment reminder, auto-pay reminder, refund information, payment failed information, settlement notification by payment aggregator, loan_emi_reminder, emi_confirmation, loan_offer, new_merchant_acquisition, product_offer",
|
| 73 |
"product_type: Identify as one of: credit card, debit card, home loan, vehicle loan, merchant_qr_kit, soundbox (or null if not mentioned)",
|
| 74 |
-
"aggregator_name: Extract payment aggregator name like paytm, phonepe, gpay , bharatpe, one 97 communications (or null if not mentioned). The namees can be written with differen caps lock , inside urls etc. Pay attention and extract it correctly.",
|
| 75 |
"bank_nbfc_fintech_name: Extract the name of the financial institution the message is from (or null if not present). A bank might have aggregator service but dont confuse the bank with aggregator",
|
| 76 |
"emi_amount: Extract only a clearly identified EMI amount as a float number (or null if not mentioned)",
|
| 77 |
"Total_overdue_amount: Extract only a clearly identified total overdue or bill amount as a float (or null if not mentioned)",
|
| 78 |
-
"available_bank_balance: Extract only a clearly identified available bank balance as a float (or null if not mentioned)",
|
| 79 |
"min_overdue_amount: Extract only a clearly identified minimum amount due as a float (or null if not mentioned)",
|
| 80 |
"overdue_date: Extract only a clearly identified payment due date as string (or null if not mentioned)",
|
| 81 |
"collection_team_lead: Extract only explicit mentions of collection team involvement (or null if not present)",
|
| 82 |
"legal_notice: Extract only explicit mentions of legal notices (or null if not mentioned)",
|
| 83 |
-
"customer_name: Extract only an explicitly identified customer name (or null if not mentioned)"
|
|
|
|
| 84 |
],
|
| 85 |
**example**
|
| 86 |
<input>"Dear Qagent, your XYZ credit card bill of Rs. 24,500 is due on 15-Jul-2023. Min amount due: Rs. 2,450. Account balance on 3-Jul-2023 is Rs 5,000.Pay now to avoid late fees. - XYZ Bank"</input>
|
|
@@ -97,19 +97,19 @@ text = """
|
|
| 97 |
"overdue_date": "2023-07-15",
|
| 98 |
"collection_team_lead": null,
|
| 99 |
"legal_notice": null,
|
| 100 |
-
"customer_name": "Qagent"
|
|
|
|
| 101 |
}</output>
|
| 102 |
-
|
| 103 |
-
<input>"Dear SBI Cardholder, outstanding of Rs. 9491.00, on your credit card ending 2852 is due on 31-JAN-25. Min. Amount Due: Rs. 475.00. Please ignore if already paid. Click here: https://sbicard.com/quickpaynet to make online payment."</input>
|
| 104 |
<output>
|
| 105 |
-
|
| 106 |
|
| 107 |
# Tokenize input and generate output
|
| 108 |
inputs = tokenizer(text, return_tensors="pt")
|
| 109 |
outputs = model.generate(**inputs.to("cuda"), max_new_tokens=185,do_sample=False,top_k=None,temperature=1.0,top_p=None)
|
| 110 |
|
| 111 |
# Decode the output
|
| 112 |
-
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 113 |
print(result)
|
| 114 |
```
|
| 115 |
|
|
|
|
| 35 |
model = model.to(device)
|
| 36 |
|
| 37 |
# Prepare your input text
|
| 38 |
+
text = '''
|
|
|
|
| 39 |
{
|
| 40 |
"task": "Extract structured information from SMS messages to identify financial details, message purpose, and related information",
|
| 41 |
"instructions": [
|
|
|
|
| 70 |
"key_themes: Extract only 3-5 main themes actually discussed in the SMS as an array of strings - avoid redundant or fabricated themes",
|
| 71 |
"message_type: Categorize as exactly one of: alert message, delayed payment, repayment reminder, auto-pay reminder, refund information, payment failed information, settlement notification by payment aggregator, loan_emi_reminder, emi_confirmation, loan_offer, new_merchant_acquisition, product_offer",
|
| 72 |
"product_type: Identify as one of: credit card, debit card, home loan, vehicle loan, merchant_qr_kit, soundbox (or null if not mentioned)",
|
| 73 |
+
"aggregator_name: Extract payment aggregator name like paytm, phonepe, gpay , bharatpe, one 97 communications (or null if not mentioned). The namees can be written with differen caps lock , inside urls etc. Pay attention and extract it correctly.Do not return bank names in aggregators",
|
| 74 |
"bank_nbfc_fintech_name: Extract the name of the financial institution the message is from (or null if not present). A bank might have aggregator service but dont confuse the bank with aggregator",
|
| 75 |
"emi_amount: Extract only a clearly identified EMI amount as a float number (or null if not mentioned)",
|
| 76 |
"Total_overdue_amount: Extract only a clearly identified total overdue or bill amount as a float (or null if not mentioned)",
|
| 77 |
+
"available_bank_balance: Extract only a clearly identified available bank balance as a float (or null if not mentioned). Do not mention available balance when not given",
|
| 78 |
"min_overdue_amount: Extract only a clearly identified minimum amount due as a float (or null if not mentioned)",
|
| 79 |
"overdue_date: Extract only a clearly identified payment due date as string (or null if not mentioned)",
|
| 80 |
"collection_team_lead: Extract only explicit mentions of collection team involvement (or null if not present)",
|
| 81 |
"legal_notice: Extract only explicit mentions of legal notices (or null if not mentioned)",
|
| 82 |
+
"customer_name: Extract only an explicitly identified customer name (or null if not mentioned)",
|
| 83 |
+
"settlement_amount": Amount credited by the aggregator to the bank account of the merchant/customer,
|
| 84 |
],
|
| 85 |
**example**
|
| 86 |
<input>"Dear Qagent, your XYZ credit card bill of Rs. 24,500 is due on 15-Jul-2023. Min amount due: Rs. 2,450. Account balance on 3-Jul-2023 is Rs 5,000.Pay now to avoid late fees. - XYZ Bank"</input>
|
|
|
|
| 97 |
"overdue_date": "2023-07-15",
|
| 98 |
"collection_team_lead": null,
|
| 99 |
"legal_notice": null,
|
| 100 |
+
"customer_name": "Qagent",
|
| 101 |
+
"settlement_amount":null
|
| 102 |
}</output>
|
| 103 |
+
<input>"Dear Customer, INR 9,307.00 credited to your A/c No XX1644 on 12/03/2025 through NEFT with UTR AXNPN07162650312 by PHONEPE PRIVATE LIMITED-PAYMENT AGGREGATOR ESCROW ACCOUNT, INFO: BATCHID:0010 F09 M22YWRZNKJE4SREDEMPTION-SBI"</input>
|
|
|
|
| 104 |
<output>
|
| 105 |
+
'''
|
| 106 |
|
| 107 |
# Tokenize input and generate output
|
| 108 |
inputs = tokenizer(text, return_tensors="pt")
|
| 109 |
outputs = model.generate(**inputs.to("cuda"), max_new_tokens=185,do_sample=False,top_k=None,temperature=1.0,top_p=None)
|
| 110 |
|
| 111 |
# Decode the output
|
| 112 |
+
result = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
|
| 113 |
print(result)
|
| 114 |
```
|
| 115 |
|