|
|
|
|
|
BUREAU_TERMINOLOGY = [
|
|
|
"Bureau credit score: Numerical representation of creditworthiness ranging from 300 (poor) to 900 (excellent). Higher scores indicate better credit history and lower risk.",
|
|
|
"DPD (Days Past Due): Number of days a payment is overdue beyond the due date. Common thresholds monitored are 30+, 60+, and 90+ DPD.",
|
|
|
"30+ DPD: Count of accounts with payments overdue by 30 or more days in the specified monitoring period. Indicates early stage delinquency.",
|
|
|
"60+ DPD: Count of accounts with payments overdue by 60 or more days in the specified monitoring period. Indicates moderate delinquency.",
|
|
|
"90+ DPD: Count of accounts with payments overdue by 90 or more days in the specified monitoring period. Indicates serious delinquency.",
|
|
|
"Settlement: Debt resolved by borrower paying less than the full amount owed, typically after negotiation with creditor. Marked negatively on credit report.",
|
|
|
"Write-off: Debt declared unrecoverable by lender and removed from active accounts. Severely impacts credit score and indicates non-payment.",
|
|
|
"NTC (No-Track-Case): Credit applicants with insufficient credit history or no previous credit accounts in bureau database. Also called 'New to Credit'.",
|
|
|
"Suit Filed: Legal action initiated by creditor for debt recovery through courts. Indicates serious delinquency and unwillingness to pay.",
|
|
|
"Wilful Default: Deliberate non-payment of debt despite having the financial ability to pay. Considered fraudulent behavior and severely impacts creditworthiness.",
|
|
|
"Live PL/BL: Active Personal Loan or Business Loan currently being serviced by the borrower with regular payments.",
|
|
|
"Overdue amount: Total unpaid amount across all accounts that is past the due date. Sum of all overdue balances.",
|
|
|
"Credit inquiry: Request made by lender to check credit report when applicant applies for credit. Too many inquiries indicate credit hunger.",
|
|
|
"Active loans: Loans currently being serviced by borrower, not yet closed or settled. Indicates current credit obligations.",
|
|
|
"Loan exposure: Total outstanding amount across all loans. Also called total debt or credit exposure.",
|
|
|
]
|
|
|
|
|
|
|
|
|
GST_TERMINOLOGY = [
|
|
|
"GSTR-3B: Monthly return filing summarizing outward supplies, input tax credit claimed, and net tax liability for the tax period.",
|
|
|
"Table 3.1(a): Section in GSTR-3B reporting outward taxable supplies (other than zero rated, nil rated and exempted). This is the main sales figure.",
|
|
|
"Outward supplies: Goods or services provided by the registered GST taxpayer to customers. This is the sales/revenue of the business.",
|
|
|
"Taxable supplies: Supplies on which GST is levied at applicable rates (5%, 12%, 18%, or 28%). Excludes exempted and nil-rated supplies.",
|
|
|
"Taxable value: The base value on which GST is calculated, excluding the GST amount itself. This is the pre-tax revenue.",
|
|
|
"Outward taxable supplies: Sales of goods/services on which GST is applicable. Found in GSTR-3B Table 3.1, row (a).",
|
|
|
"GSTR-3B structure: Contains multiple tables - Table 3.1 for outward supplies, Table 3.2 for inter-state supplies, Table 4 for input tax credit.",
|
|
|
"Tax period: The month and year for which the GST return is filed. Format is usually 'Month YYYY' (e.g., January 2025).",
|
|
|
"GSTIN: GST Identification Number, unique 15-digit alphanumeric code assigned to each registered taxpayer.",
|
|
|
]
|
|
|
|
|
|
|
|
|
VALIDATION_RULES = [
|
|
|
"Valid bureau credit scores: Must be between 300 and 900 inclusive. Scores outside this range are invalid.",
|
|
|
"Credit score interpretation: 300-579 is Poor, 580-669 is Fair, 670-739 is Good, 740-799 is Very Good, 800-900 is Excellent.",
|
|
|
"DPD hierarchy rule: 90+ DPD count ≤ 60+ DPD count ≤ 30+ DPD count. If this is violated, data may be incorrect.",
|
|
|
"GST sales validation: Taxable value should be non-negative numbers. Negative sales indicate data entry error.",
|
|
|
"Suspicious GST amounts: Values over 10 crore (100,000,000 rupees) should be flagged for verification as potentially incorrect.",
|
|
|
"Written-off debt amount: Should be non-negative. Negative values indicate error in extraction or data.",
|
|
|
"Loan counts validation: Max loans and max active loans should be non-negative integers. Cannot have negative loan counts.",
|
|
|
"Overdue threshold: Maximum allowable overdue amount, typically ranging from 0 to several lakhs. Depends on risk appetite.",
|
|
|
"Credit inquiry limits: Excessive inquiries (>5 in 6 months) indicate credit hunger and should be flagged.",
|
|
|
"Zero values interpretation: Zero or null values may indicate either absence of the attribute or that the parameter is not applicable.",
|
|
|
]
|
|
|
|
|
|
|
|
|
EXTRACTION_HINTS = [
|
|
|
"Bureau credit score location: Typically appears near terms like 'PERFORM', 'CONSUMER', 'Score', 'CIBIL', or in a dedicated score section on first page.",
|
|
|
"Credit score format: Usually displayed as a 3-digit number between 300-900, sometimes with a gauge or range indicator.",
|
|
|
"DPD information location: Often found in payment history tables, delinquency sections, or account performance summary.",
|
|
|
"Settlement and write-off status: Usually marked explicitly in account status columns with keywords 'Settled', 'Written Off', or status codes.",
|
|
|
"Live loan indicators: Marked with 'Active', 'Current', 'Live', or similar status in account listings.",
|
|
|
"GSTR-3B sales extraction: Sales figures are in Table 3.1, row labeled '(a) Outward taxable supplies', second column shows taxable value.",
|
|
|
"GSTR-3B month extraction: Month information appears as 'Period' followed by month name (January, February, etc.).",
|
|
|
"GSTR-3B year extraction: Year appears in 'Year' field in format 'YYYY-YY' (e.g., 2024-25) or in filename as MMYYYY (e.g., 012025).",
|
|
|
"Table structure in PDFs: Tables may span multiple pages. Look for continuation rows and merged cells.",
|
|
|
"Multiple bureau reports: When processing multiple reports, extract parameters separately for each person/entity.",
|
|
|
"NTC acceptance: Check for explicit mentions of 'No Track Case', 'NTC', 'New to Credit' status in summary or remarks.",
|
|
|
"Suit filed indicators: Look for keywords 'Suit Filed', 'Legal Action', 'Court Case' in account remarks or status.",
|
|
|
]
|
|
|
|
|
|
|
|
|
COMMON_PATTERNS = [
|
|
|
"Date formats in bureau reports: DD-MM-YYYY, DD/MM/YYYY, or MMM-YYYY for month-year format.",
|
|
|
"Currency representation: Indian Rupees shown as '₹', 'Rs.', 'INR', or just numbers with commas (e.g., 1,50,000).",
|
|
|
"Percentage formats: Shown with '%' symbol or as decimals (0.15 = 15%).",
|
|
|
"Boolean values: Yes/No, True/False, Y/N, 1/0, or Present/Absent for presence/absence of attributes.",
|
|
|
"Account types: PL (Personal Loan), BL (Business Loan), CC (Credit Card), HL (Home Loan), AL (Auto Loan).",
|
|
|
"Status codes in bureau: STD (Standard), SMA (Special Mention Account), SUB (Sub-standard), DBT (Doubtful), LSS (Loss).",
|
|
|
]
|
|
|
|
|
|
|
|
|
ALL_KNOWLEDGE = (
|
|
|
BUREAU_TERMINOLOGY +
|
|
|
GST_TERMINOLOGY +
|
|
|
VALIDATION_RULES +
|
|
|
EXTRACTION_HINTS +
|
|
|
COMMON_PATTERNS
|
|
|
)
|
|
|
|
|
|
|
|
|
KNOWLEDGE_CATEGORIES = {
|
|
|
"bureau_terminology": BUREAU_TERMINOLOGY,
|
|
|
"gst_terminology": GST_TERMINOLOGY,
|
|
|
"validation_rules": VALIDATION_RULES,
|
|
|
"extraction_hints": EXTRACTION_HINTS,
|
|
|
"common_patterns": COMMON_PATTERNS,
|
|
|
} |