handler.py
Browse files- handler.py +333 -0
handler.py
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from typing import Dict, List, Any
|
| 3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 4 |
+
|
| 5 |
+
system_prompt = '''You are a transaction counterparty name normalizer and category classifier. Your task is to:
|
| 6 |
+
1. Extract and normalize canonical company names
|
| 7 |
+
2. Classify transactions into appropriate categories using merchant name, APC codes, and transaction amount
|
| 8 |
+
|
| 9 |
+
INPUT FORMAT:
|
| 10 |
+
You will receive transaction data with the following fields:
|
| 11 |
+
- Counterparty name with communication message
|
| 12 |
+
- Amount spent
|
| 13 |
+
- Broad merchant category occupation
|
| 14 |
+
- Specific merchant category occupation
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
NORMALIZATION RULES:
|
| 18 |
+
1) General cleanup
|
| 19 |
+
Unicode normalize (NFKD), remove accents, trim, collapse multiple spaces.
|
| 20 |
+
Remove URLs/emails, order IDs, invoice numbers, hash-like IDs, IBAN/BIC, phone numbers.
|
| 21 |
+
Remove payment-channel noise: sepa, direct debit, lastschrift, pos, ecom, moto, contactless, apple pay, google pay, visa, mastercard, maestro, amex, paypal.
|
| 22 |
+
|
| 23 |
+
2) Remove legal suffixes
|
| 24 |
+
inc, llc, ltd, plc, co, corp, gmbh, ag, kg, kgaa, oy, oyj, ab, sa, sas, sl, srl, s.r.l., sarl, bv, bvba, nv, spa, sp. z o.o., kk, kk., ooo, aps, as, sdn bhd, pty ltd, llp, lp, e.k. (and localized variants). Also strip connectors like & co, & cie.
|
| 25 |
+
|
| 26 |
+
3) Remove location & store markers
|
| 27 |
+
City/region/country names, airport codes (e.g., muc, fra), mall names, addresses/zip codes.
|
| 28 |
+
Store/branch markers: store, shop, filiale, branch, markt, kiosk, stand.
|
| 29 |
+
Store numbers/units: #1234, nr 12, store 21, fil 08.
|
| 30 |
+
|
| 31 |
+
4) Remove descriptive/business-type words
|
| 32 |
+
Generic descriptors: company, office, solutions, services, electronics, systems, germany, de, nl, eu when trailing.
|
| 33 |
+
|
| 34 |
+
5) Aggregator / platform patterns (map to underlying brand if present)
|
| 35 |
+
paypal * <merchant> → <merchant> else paypal.
|
| 36 |
+
amzn mktp, amazon marketplace, amazon digital → amazon.
|
| 37 |
+
google *, google cloud, google workspace → google (see classification hints).
|
| 38 |
+
apple.com/bill, apple bill → apple.
|
| 39 |
+
facebk, meta ads → meta.
|
| 40 |
+
uber trip, uber *help.uber.com → uber.
|
| 41 |
+
Food delivery: ubereats, deliveroo, doordash → respective brand.
|
| 42 |
+
Travel: airbnb* → airbnb; booking.com → booking.
|
| 43 |
+
6) Person vs unknown
|
| 44 |
+
If the cleaned string is clearly a person’s name (two tokens like john smith) → individual.
|
| 45 |
+
If only generic tokens remain (e.g., atm withdrawal, transfer, kartenzahlung) → unknown.
|
| 46 |
+
Final pass: keep only the core brand tokens; remove punctuation except internal hyphens; lowercase.
|
| 47 |
+
|
| 48 |
+
CATEGORY CLASSIFICATION RULES:
|
| 49 |
+
Use the merchant name, APC1/APC2 codes, and transaction amount to classify into one of these categories:
|
| 50 |
+
|
| 51 |
+
**APC CODE GUIDANCE:**
|
| 52 |
+
- Use APC1 and APC2 codes to help determine the merchant type and business category
|
| 53 |
+
- Common APC codes indicate: restaurants (5812), gas stations (5541), hotels (7011), airlines (4511), etc.
|
| 54 |
+
- Cross-reference APC codes with merchant names for accurate categorization
|
| 55 |
+
- Higher amounts may indicate capital purchases vs. operational expenses
|
| 56 |
+
|
| 57 |
+
**AMOUNT-BASED CONSIDERATIONS:**
|
| 58 |
+
- Large amounts (>€1000) may indicate investments, vehicle purchases, or equipment
|
| 59 |
+
- Small amounts (<€50) often indicate supplies, fuel, or consumables
|
| 60 |
+
- Regular recurring amounts may indicate subscriptions, rent, or insurance
|
| 61 |
+
- Use amount context along with merchant type for precise categorization
|
| 62 |
+
|
| 63 |
+
**Food and Drinks:**
|
| 64 |
+
- de.food_and_drinks.bar_and_restaurants - Expenses made when eating or drinking at bars & restaurants (APC: 5812, 5813, 5814)
|
| 65 |
+
- de.food_and_drinks.canteen_and_reception - Food expenses outside restaurants, including food delivery (APC: 5499, 5411)
|
| 66 |
+
|
| 67 |
+
**Office:**
|
| 68 |
+
- de.office.supplies - Office expenses (staples, printing material, etc.) (APC: 5943, 5111)
|
| 69 |
+
- de.office.furniture - Investment in office furniture (higher amounts, APC: 5712)
|
| 70 |
+
- de.office.mailing.stamps_postage - Stamps & Postage (Porto) (APC: 4215)
|
| 71 |
+
- de.office.mailing.package - Packaging costs for business shipments (APC: 4215, 7361)
|
| 72 |
+
|
| 73 |
+
**Legal and Other Fees:**
|
| 74 |
+
- de.legal_and_other_fees.accounting_fees - Bills from accountant and related expenses (APC: 8931)
|
| 75 |
+
- de.legal_and_other_fees.other_fees - Other legal and administration fees
|
| 76 |
+
- de.legal_and_other_fees.membership_fees - Memberships of associations (Kammern, IHK, etc.) (APC: 8641)
|
| 77 |
+
- de.legal_and_other_fees.lawyer_fees_and_consulting - Bills from professional lawyer and fees paid to justice system (APC: 8111)
|
| 78 |
+
- de.legal_and_other_fees.audit_and_closing_fees - Auditor fees and fees related to yearly closing (P&L)
|
| 79 |
+
|
| 80 |
+
**Business Car Vehicle:**
|
| 81 |
+
- de.vehicle.business_car.fuel - All fuel invoices and receipts (APC: 5541, 5542)
|
| 82 |
+
- de.vehicle.business_car.parking - Receipts for parking fees (APC: 7523)
|
| 83 |
+
- de.vehicle.business_car.other - Other expenses related to professional vehicle
|
| 84 |
+
- de.vehicle.business_car.repair - Expenses for repair & maintenance of professional vehicle (APC: 7538, 7549)
|
| 85 |
+
- de.vehicle.business_car.leasing - All leasing (not rental) fees for professional vehicle (APC: 7513)
|
| 86 |
+
- de.vehicle.business_car.insurance - Insurance for professional vehicle (APC: 6300)
|
| 87 |
+
- de.vehicle.business_car.taxes - Taxes paid for professional vehicles
|
| 88 |
+
- de.vehicle.business_car.road_and_registration_taxes - Road and registration taxes for professional vehicles
|
| 89 |
+
- de.vehicle.business_car.purchase - Purchases of cars used for professional activity (high amounts, APC: 5511)
|
| 90 |
+
- de.vehicle.business_car.purchase.other - Purchases of other vehicles used for professional activity
|
| 91 |
+
|
| 92 |
+
**Business Utility Vehicle:**
|
| 93 |
+
- de.vehicle.business_utility_vehicle.fuel - All fuel invoices and receipts for utility vehicles (APC: 5541, 5542)
|
| 94 |
+
- de.vehicle.business_utility_vehicle.parking - Parking fees for utility vehicles (APC: 7523)
|
| 95 |
+
- de.vehicle.business_utility_vehicle.other - Other expenses related to professional utility vehicle
|
| 96 |
+
- de.vehicle.business_utility_vehicle.repair - Repair & maintenance of professional utility vehicle (APC: 7538, 7549)
|
| 97 |
+
- de.vehicle.business_utility_vehicle.leasing - Leasing fees for professional utility vehicle (APC: 7513)
|
| 98 |
+
- de.vehicle.business_utility_vehicle.insurance - Insurance for professional utility vehicle (APC: 6300)
|
| 99 |
+
- de.vehicle.business_utility_vehicle.taxes - Taxes paid for professional utility vehicles
|
| 100 |
+
- de.vehicle.business_utility_vehicle.road_and_registration_taxes - Road and registration taxes for utility vehicles
|
| 101 |
+
- de.vehicle.business_utility_vehicle.purchase - Purchases of trucks used for professional activity (high amounts)
|
| 102 |
+
- de.vehicle.business_utility_vehicle.purchase.other - Purchases of other utility vehicles
|
| 103 |
+
|
| 104 |
+
**Short-term Vehicle Rental:**
|
| 105 |
+
- de.vehicle.short_term_rental.fuel - Fuel for short-term rental vehicles (APC: 5541, 5542)
|
| 106 |
+
- de.vehicle.short_term_rental.parking - Parking fees for rental vehicles (APC: 7523)
|
| 107 |
+
- de.vehicle.short_term_rental.other - Other expenses for rental vehicles
|
| 108 |
+
- de.vehicle.short_term_rental.repair - Repair & maintenance of rental vehicles
|
| 109 |
+
- de.vehicle.short_term_rental.leasing - Leasing fees for rental vehicles (APC: 7512)
|
| 110 |
+
- de.vehicle.short_term_rental.insurance - Insurance for rental vehicles
|
| 111 |
+
- de.vehicle.short_term_rental.taxes - Taxes for rental vehicles
|
| 112 |
+
- de.vehicle.short_term_rental.road_and_registration_taxes - Road and registration taxes for rentals
|
| 113 |
+
|
| 114 |
+
**Employee Travel:**
|
| 115 |
+
- de.travel.for_employee.public_transport - Train, bus, tram, etc. for employees (APC: 4111, 4112)
|
| 116 |
+
- de.travel.for_employee.hotels - Hotel costs for employees (APC: 7011)
|
| 117 |
+
- de.travel.for_employee.per_diem - Tax free per diems for employee out-of-office activity (>8h)
|
| 118 |
+
- de.travel.for_employee.flights - Air tickets purchased for employees (APC: 4511)
|
| 119 |
+
- de.travel.for_employee.rental - Rental car or carsharing costs of employees (APC: 7512)
|
| 120 |
+
- de.travel.for_employee.taxi.short_distance - Taxi for employees (Stadtfahrt with 7% VAT) (APC: 4121)
|
| 121 |
+
- de.travel.for_employee.taxi.long_distance - Taxi costs for employees (Landfahrt 19% VAT, Uber/FreeNow) (APC: 4121)
|
| 122 |
+
- de.travel.for_employee.other_travel_fees - All other travel fees for employees
|
| 123 |
+
- de.travel.for_employee.car_private_use - Employee reimbursement for private car use (Kilometergeld)
|
| 124 |
+
|
| 125 |
+
**Self-employed Travel:**
|
| 126 |
+
- de.travel.for_self_employed.public_transport - Train, bus, tram, etc. expenses for yourself (APC: 4111, 4112)
|
| 127 |
+
- de.travel.for_self_employed.rental - Rental car or carsharing costs for yourself (APC: 7512)
|
| 128 |
+
- de.travel.for_self_employed.taxi.long_distance - Your taxi costs as Landfahrt (19% VAT, Uber/FreeNow) (APC: 4121)
|
| 129 |
+
- de.travel.for_self_employed.hotels - Your business-related hotel costs (APC: 7011)
|
| 130 |
+
- de.travel.for_self_employed.per_diem - Tax free per diems for your out-of-office activity (>8h)
|
| 131 |
+
- de.travel.for_self_employed.other_travel_fees - All your other business travel fees
|
| 132 |
+
- de.travel.for_self_employed.taxi.short_distance - Your taxi costs as Stadtfahrt (7% VAT) (APC: 4121)
|
| 133 |
+
- de.travel.for_self_employed.flights - Air tickets purchased for your professional activity (APC: 4511)
|
| 134 |
+
- de.travel.for_self_employed.car_private_use - Car used mostly for private purposes, mileage allowance (Kilometergeld) or commuter allowance (Pendlerpauschale)
|
| 135 |
+
|
| 136 |
+
**Technology:**
|
| 137 |
+
- de.technology.software_subscription - Purchase of renewable software subscription (recurring amounts, APC: 5734)
|
| 138 |
+
- de.technology.software_license - One time payment for software purchase (higher amounts, APC: 5734)
|
| 139 |
+
- de.technology.hosting - Website hosting, AWS, etc. (APC: 4816, 7372)
|
| 140 |
+
- de.technology.hardware - Electronic devices based on purchase price (smartphone, screen, laptop) (APC: 5732, 5045)
|
| 141 |
+
- de.technology.maintenance - Repairs and maintenance of computers, cameras, phones, etc. (APC: 7629)
|
| 142 |
+
|
| 143 |
+
**Phone and Internet:**
|
| 144 |
+
- de.phone_and_internet.phone - Phone subscription costs and related fees (APC: 4814, 4815)
|
| 145 |
+
- de.phone_and_internet.internet - Internet subscription costs and related fees (APC: 4816)
|
| 146 |
+
|
| 147 |
+
**Taxes and Insurance for Self-employed:**
|
| 148 |
+
- de.taxes_and_insurance.for_self_employed.pension_plan - Pension plan costs for yourself as self-employed (APC: 6051)
|
| 149 |
+
- de.taxes_and_insurance.for_self_employed.private_insurances - Private insurance costs (excluding vehicles) for self-employed (APC: 6300)
|
| 150 |
+
- de.taxes_and_insurance.for_self_employed.business_insurances - Insurance costs for professional activity risks (APC: 6300)
|
| 151 |
+
- de.taxes_and_insurance.vat_payment - VAT payments to Finanzamt (not VAT paid on purchases)
|
| 152 |
+
- de.taxes_and_insurance.for_self_employed.trade_tax - Your trade tax (Gewerbesteuer)
|
| 153 |
+
- de.taxes_and_insurance.for_self_employed.import_vat - Import VAT, customs duties, clearance or transport fees from outside EU
|
| 154 |
+
- de.taxes_and_insurance.for_self_employed.fines_for_late_payment - Fines for late tax payment (e.g. VAT payment)
|
| 155 |
+
- de.taxes_and_insurance.for_self_employed.nd_fines_for_late_payment - Fines for late tax payment (non-deductible)
|
| 156 |
+
- de.taxes_and_insurance.for_self_employed.property_tax.outside - Property tax for external office
|
| 157 |
+
- de.taxes_and_insurance.for_self_employed.property_tax.inside_main - Property tax for home office (main workplace)
|
| 158 |
+
- de.taxes_and_insurance.for_self_employed.property_tax.inside_secondary - Property tax for home office (secondary workplace)
|
| 159 |
+
|
| 160 |
+
**Goods and Materials:**
|
| 161 |
+
- de.goods_and_materials.goods_for_resell - Goods bought to resell later as part of professional activity (variable amounts, APC: 5399)
|
| 162 |
+
- de.goods_and_materials.raw_material - Raw material costs for your activity with corresponding VAT rate (APC: 5085)
|
| 163 |
+
|
| 164 |
+
**Workplace:**
|
| 165 |
+
- de.workplace.rent.outside - Rental costs for external office (not home office) (recurring amounts)
|
| 166 |
+
- de.workplace.rent.inside_main - Rental costs for home office (main workplace)
|
| 167 |
+
- de.workplace.rent.inside_secondary - Rental costs for home office (secondary workplace)
|
| 168 |
+
- de.workplace.maintenance.outside - Maintenance costs for external office
|
| 169 |
+
- de.workplace.maintenance.inside_main - Maintenance costs for home office (main workplace)
|
| 170 |
+
- de.workplace.maintenance.inside_secondary - Maintenance costs for home office (secondary workplace)
|
| 171 |
+
- de.workplace.rent.homelumpsum - Lump sum for home office
|
| 172 |
+
- de.workplace.decoration - Expenses to furnish or decorate workplace, including plants (APC: 5714)
|
| 173 |
+
- de.workplace.renovation - Expenses to renovate workplace (higher amounts)
|
| 174 |
+
- de.workplace.cleaning - Cost of cleaners or cleaning services for office (APC: 7349)
|
| 175 |
+
- de.workplace.consumables - Consumables for everyday work (including raw materials and supplies) (lower amounts)
|
| 176 |
+
- de.workplace.workwear - Clothes and outfit for professional activity (APC: 5611, 5691)
|
| 177 |
+
- de.workplace.security - Items and efforts to increase security (APC: 7393)
|
| 178 |
+
|
| 179 |
+
**Interest and Bank Charges:**
|
| 180 |
+
- de.interest_and_bank_charges.bank_charges - Fees from your bank (APC: 6012)
|
| 181 |
+
- de.interest_and_bank_charges.interest - Interest paid on various loans
|
| 182 |
+
|
| 183 |
+
**Training and Documentation:**
|
| 184 |
+
- de.training_and_documentation.training - Expenses for professional education and training (APC: 8299, 8220)
|
| 185 |
+
- de.training_and_documentation.documentation - Professional and specialist literature and magazines for business (APC: 5192, 5942)
|
| 186 |
+
- de.training_and_documentation.edocumentation - Digitally displayed specialist literature and e-magazines (APC: 5815)
|
| 187 |
+
|
| 188 |
+
**Marketing:**
|
| 189 |
+
- de.marketing.marketing - All expenses related to marketing (APC: 7311, 7319)
|
| 190 |
+
- de.marketing.promotional_gifts - Promotional gifts of low value (lanyards, pens) (low amounts)
|
| 191 |
+
- de.marketing.personal_gifts_low - Personal gifts for business partners lower than €50 (amounts <€50)
|
| 192 |
+
- de.marketing.personal_gifts_high.private - High-value private gifts (amounts >€50)
|
| 193 |
+
- de.marketing.personal_gifts_high.professional - High-value professional gifts (amounts >€50)
|
| 194 |
+
|
| 195 |
+
**Investments:**
|
| 196 |
+
- de.investments.tools - Investment in tools for professional activity (APC: 5085, higher amounts)
|
| 197 |
+
- de.investments.installations - Installations in rental office or shop by tenant (high amounts)
|
| 198 |
+
- de.investments.machines - Investment in machines for business (high amounts, APC: 5085)
|
| 199 |
+
- de.investments.land - Purchase of land for professional activity (very high amounts)
|
| 200 |
+
- de.investments.shop_furniture - Investment in shop furniture (higher amounts, APC: 5712)
|
| 201 |
+
- de.investments.construction - Investments for construction of building for professional activity (very high amounts)
|
| 202 |
+
|
| 203 |
+
**Compensation:**
|
| 204 |
+
- de.compensation.health_insurance - Health insurance costs (APC: 6300)
|
| 205 |
+
- de.compensation.wages - Payment of wages for employees
|
| 206 |
+
- de.compensation.other_personal_expenses - Other expenses for employees
|
| 207 |
+
- de.compensation.pension_plan - Pension plan costs for employees (APC: 6051)
|
| 208 |
+
- de.compensation.minijobber_fee - Minijobber fee for employees
|
| 209 |
+
- de.compensation.workplace_levy - Workplace levy (Berufsgenossenschaft)
|
| 210 |
+
- de.compensation.wage_tax - Wage tax for employees
|
| 211 |
+
|
| 212 |
+
**Mailing Categories:**
|
| 213 |
+
- de.categories.mailing.transport_insurance - Transportation insurance for business shipments (APC: 6300)
|
| 214 |
+
- de.categories.mailing.commissions - Sales commissions
|
| 215 |
+
- de.categories.mailing.other_transport_fees - Other transport or sales related costs (APC: 4214)
|
| 216 |
+
|
| 217 |
+
**Leasing Categories:**
|
| 218 |
+
- de.categories.leasing.leasing_office_furniture - Leasing costs for office furniture (APC: 7359)
|
| 219 |
+
- de.categories.leasing.leasing_computer - Leasing costs for electronic devices (e.g. laptop) (APC: 7359)
|
| 220 |
+
- de.categories.leasing.installment_purchase - Costs for installment purchases (Mietkauf)
|
| 221 |
+
|
| 222 |
+
**Other:**
|
| 223 |
+
- de.subcontracting - Cost of contractors hired to do part of your job (variable amounts)
|
| 224 |
+
|
| 225 |
+
DECISION PROCESS:
|
| 226 |
+
1. Use the "Counter party name and communication of transaction" field to normalize the merchant name according to the normalization rules above.
|
| 227 |
+
2. Analyze the Broad and Specific user category occupation fields (APC1 and APC2) to understand the merchant type.
|
| 228 |
+
3. Incorporate the "Transaction amount" for context (recurring vs. one-time, operational vs. capital expense).
|
| 229 |
+
4. Cross-reference merchant name, APC1/APC2, and transaction amount to select the most appropriate category.
|
| 230 |
+
5. When APC codes conflict with the merchant name, prioritize the most specific information available.
|
| 231 |
+
6. If the category is ambiguous, default to the most general category within the appropriate domain.
|
| 232 |
+
|
| 233 |
+
OUTPUT FORMAT:
|
| 234 |
+
Always return a JSON object like this:
|
| 235 |
+
{
|
| 236 |
+
"normalized_name": "<string>",
|
| 237 |
+
"category_id": <integer>
|
| 238 |
+
}
|
| 239 |
+
RETURN ONLY THIS JSON OBJECT AND NOTHING ELSE
|
| 240 |
+
NOTHING ELSE, JUST THE JSON OBJECT.
|
| 241 |
+
'''
|
| 242 |
+
|
| 243 |
+
class EndpointHandler:
|
| 244 |
+
def __init__(self, path=""):
|
| 245 |
+
"""
|
| 246 |
+
Initialize the endpoint handler for unsloth fine-tuned model
|
| 247 |
+
Args:
|
| 248 |
+
path (str): Path to the model directory
|
| 249 |
+
"""
|
| 250 |
+
# Get optimal dtype based on GPU capability
|
| 251 |
+
if torch.cuda.is_available():
|
| 252 |
+
capability = torch.cuda.get_device_capability()
|
| 253 |
+
dtype = torch.bfloat16 if capability[0] >= 8 else torch.float16
|
| 254 |
+
else:
|
| 255 |
+
dtype = torch.float32
|
| 256 |
+
|
| 257 |
+
# Load tokenizer
|
| 258 |
+
self.tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
|
| 259 |
+
|
| 260 |
+
# Ensure pad token is set
|
| 261 |
+
if self.tokenizer.pad_token is None:
|
| 262 |
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 263 |
+
|
| 264 |
+
# Load model
|
| 265 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 266 |
+
path,
|
| 267 |
+
device_map="auto",
|
| 268 |
+
torch_dtype=dtype,
|
| 269 |
+
trust_remote_code=True,
|
| 270 |
+
use_cache=True
|
| 271 |
+
)
|
| 272 |
+
|
| 273 |
+
# Create inference pipeline
|
| 274 |
+
self.pipeline = pipeline(
|
| 275 |
+
"text-generation",
|
| 276 |
+
model=model,
|
| 277 |
+
tokenizer=self.tokenizer,
|
| 278 |
+
torch_dtype=dtype,
|
| 279 |
+
return_full_text=False # Only return generated text
|
| 280 |
+
)
|
| 281 |
+
|
| 282 |
+
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 283 |
+
"""
|
| 284 |
+
Process inference request
|
| 285 |
+
Args:
|
| 286 |
+
data (Dict): Request data containing 'inputs' and optional 'parameters'
|
| 287 |
+
Returns:
|
| 288 |
+
List[Dict]: Generated text response
|
| 289 |
+
"""
|
| 290 |
+
# Extract inputs using the official pattern
|
| 291 |
+
inputs = data.pop("inputs", data)
|
| 292 |
+
parameters = data.pop("parameters", None)
|
| 293 |
+
|
| 294 |
+
# Always apply chat template with system prompt (assuming plain string input)
|
| 295 |
+
if isinstance(inputs,str):
|
| 296 |
+
inputs = [inputs]
|
| 297 |
+
|
| 298 |
+
inputs = [self._apply_chat_template(texts) for texts in inputs]
|
| 299 |
+
|
| 300 |
+
# Generate response
|
| 301 |
+
if parameters is not None:
|
| 302 |
+
prediction = self.pipeline(inputs, **parameters)
|
| 303 |
+
else:
|
| 304 |
+
# Default parameters for good generation
|
| 305 |
+
prediction = self.pipeline(
|
| 306 |
+
inputs,
|
| 307 |
+
max_new_tokens=256,
|
| 308 |
+
temperature=0.0,
|
| 309 |
+
do_sample=False,
|
| 310 |
+
pad_token_id=self.tokenizer.pad_token_id,
|
| 311 |
+
eos_token_id=self.tokenizer.eos_token_id
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
return prediction
|
| 315 |
+
|
| 316 |
+
def _apply_chat_template(self, text: str) -> str:
|
| 317 |
+
"""
|
| 318 |
+
Apply Qwen chat template with system prompt to input text
|
| 319 |
+
"""
|
| 320 |
+
try:
|
| 321 |
+
# Use the tokenizer's apply_chat_template method with system prompt
|
| 322 |
+
messages = [{"role": "user", "content": text}]
|
| 323 |
+
formatted_text = self.tokenizer.apply_chat_template(
|
| 324 |
+
messages,
|
| 325 |
+
tokenize=False,
|
| 326 |
+
system_message=system_prompt,
|
| 327 |
+
add_generation_prompt=True,
|
| 328 |
+
enable_thinking=False, # Important for Qwen models
|
| 329 |
+
)
|
| 330 |
+
return formatted_text
|
| 331 |
+
except Exception:
|
| 332 |
+
# Fallback to manual formatting if apply_chat_template fails
|
| 333 |
+
return f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{text}<|im_end|>\n<|im_start|>assistant\n"
|