datasciencesage's picture
Upload 8 files
95ff1e1 verified
BUREAU_TERMINOLOGY = [
"Bureau credit score: Numerical representation of creditworthiness ranging from 300 (poor) to 900 (excellent). Higher scores indicate better credit history and lower risk.",
"DPD (Days Past Due): Number of days a payment is overdue beyond the due date. Common thresholds monitored are 30+, 60+, and 90+ DPD.",
"30+ DPD: Count of accounts with payments overdue by 30 or more days in the specified monitoring period. Indicates early stage delinquency.",
"60+ DPD: Count of accounts with payments overdue by 60 or more days in the specified monitoring period. Indicates moderate delinquency.",
"90+ DPD: Count of accounts with payments overdue by 90 or more days in the specified monitoring period. Indicates serious delinquency.",
"Settlement: Debt resolved by borrower paying less than the full amount owed, typically after negotiation with creditor. Marked negatively on credit report.",
"Write-off: Debt declared unrecoverable by lender and removed from active accounts. Severely impacts credit score and indicates non-payment.",
"NTC (No-Track-Case): Credit applicants with insufficient credit history or no previous credit accounts in bureau database. Also called 'New to Credit'.",
"Suit Filed: Legal action initiated by creditor for debt recovery through courts. Indicates serious delinquency and unwillingness to pay.",
"Wilful Default: Deliberate non-payment of debt despite having the financial ability to pay. Considered fraudulent behavior and severely impacts creditworthiness.",
"Live PL/BL: Active Personal Loan or Business Loan currently being serviced by the borrower with regular payments.",
"Overdue amount: Total unpaid amount across all accounts that is past the due date. Sum of all overdue balances.",
"Credit inquiry: Request made by lender to check credit report when applicant applies for credit. Too many inquiries indicate credit hunger.",
"Active loans: Loans currently being serviced by borrower, not yet closed or settled. Indicates current credit obligations.",
"Loan exposure: Total outstanding amount across all loans. Also called total debt or credit exposure.",
]
# GST and GSTR-3B Terminology
GST_TERMINOLOGY = [
"GSTR-3B: Monthly return filing summarizing outward supplies, input tax credit claimed, and net tax liability for the tax period.",
"Table 3.1(a): Section in GSTR-3B reporting outward taxable supplies (other than zero rated, nil rated and exempted). This is the main sales figure.",
"Outward supplies: Goods or services provided by the registered GST taxpayer to customers. This is the sales/revenue of the business.",
"Taxable supplies: Supplies on which GST is levied at applicable rates (5%, 12%, 18%, or 28%). Excludes exempted and nil-rated supplies.",
"Taxable value: The base value on which GST is calculated, excluding the GST amount itself. This is the pre-tax revenue.",
"Outward taxable supplies: Sales of goods/services on which GST is applicable. Found in GSTR-3B Table 3.1, row (a).",
"GSTR-3B structure: Contains multiple tables - Table 3.1 for outward supplies, Table 3.2 for inter-state supplies, Table 4 for input tax credit.",
"Tax period: The month and year for which the GST return is filed. Format is usually 'Month YYYY' (e.g., January 2025).",
"GSTIN: GST Identification Number, unique 15-digit alphanumeric code assigned to each registered taxpayer.",
]
# Validation and Business Rules
VALIDATION_RULES = [
"Valid bureau credit scores: Must be between 300 and 900 inclusive. Scores outside this range are invalid.",
"Credit score interpretation: 300-579 is Poor, 580-669 is Fair, 670-739 is Good, 740-799 is Very Good, 800-900 is Excellent.",
"DPD hierarchy rule: 90+ DPD count ≤ 60+ DPD count ≤ 30+ DPD count. If this is violated, data may be incorrect.",
"GST sales validation: Taxable value should be non-negative numbers. Negative sales indicate data entry error.",
"Suspicious GST amounts: Values over 10 crore (100,000,000 rupees) should be flagged for verification as potentially incorrect.",
"Written-off debt amount: Should be non-negative. Negative values indicate error in extraction or data.",
"Loan counts validation: Max loans and max active loans should be non-negative integers. Cannot have negative loan counts.",
"Overdue threshold: Maximum allowable overdue amount, typically ranging from 0 to several lakhs. Depends on risk appetite.",
"Credit inquiry limits: Excessive inquiries (>5 in 6 months) indicate credit hunger and should be flagged.",
"Zero values interpretation: Zero or null values may indicate either absence of the attribute or that the parameter is not applicable.",
]
# Extraction Hints and Location Guidance
EXTRACTION_HINTS = [
"Bureau credit score location: Typically appears near terms like 'PERFORM', 'CONSUMER', 'Score', 'CIBIL', or in a dedicated score section on first page.",
"Credit score format: Usually displayed as a 3-digit number between 300-900, sometimes with a gauge or range indicator.",
"DPD information location: Often found in payment history tables, delinquency sections, or account performance summary.",
"Settlement and write-off status: Usually marked explicitly in account status columns with keywords 'Settled', 'Written Off', or status codes.",
"Live loan indicators: Marked with 'Active', 'Current', 'Live', or similar status in account listings.",
"GSTR-3B sales extraction: Sales figures are in Table 3.1, row labeled '(a) Outward taxable supplies', second column shows taxable value.",
"GSTR-3B month extraction: Month information appears as 'Period' followed by month name (January, February, etc.).",
"GSTR-3B year extraction: Year appears in 'Year' field in format 'YYYY-YY' (e.g., 2024-25) or in filename as MMYYYY (e.g., 012025).",
"Table structure in PDFs: Tables may span multiple pages. Look for continuation rows and merged cells.",
"Multiple bureau reports: When processing multiple reports, extract parameters separately for each person/entity.",
"NTC acceptance: Check for explicit mentions of 'No Track Case', 'NTC', 'New to Credit' status in summary or remarks.",
"Suit filed indicators: Look for keywords 'Suit Filed', 'Legal Action', 'Court Case' in account remarks or status.",
]
# Common Patterns and Formats
COMMON_PATTERNS = [
"Date formats in bureau reports: DD-MM-YYYY, DD/MM/YYYY, or MMM-YYYY for month-year format.",
"Currency representation: Indian Rupees shown as '₹', 'Rs.', 'INR', or just numbers with commas (e.g., 1,50,000).",
"Percentage formats: Shown with '%' symbol or as decimals (0.15 = 15%).",
"Boolean values: Yes/No, True/False, Y/N, 1/0, or Present/Absent for presence/absence of attributes.",
"Account types: PL (Personal Loan), BL (Business Loan), CC (Credit Card), HL (Home Loan), AL (Auto Loan).",
"Status codes in bureau: STD (Standard), SMA (Special Mention Account), SUB (Sub-standard), DBT (Doubtful), LSS (Loss).",
]
# All knowledge combined for easy iteration
ALL_KNOWLEDGE = (
BUREAU_TERMINOLOGY +
GST_TERMINOLOGY +
VALIDATION_RULES +
EXTRACTION_HINTS +
COMMON_PATTERNS
)
# Category mapping for retrieval filtering
KNOWLEDGE_CATEGORIES = {
"bureau_terminology": BUREAU_TERMINOLOGY,
"gst_terminology": GST_TERMINOLOGY,
"validation_rules": VALIDATION_RULES,
"extraction_hints": EXTRACTION_HINTS,
"common_patterns": COMMON_PATTERNS,
}