finance-entity-extractor / src /finee /prompt.py

Ranjit Behera

FinEE v1.0 - Finance Entity Extractor

dcc24f8 about 1 month ago

5.97 kB

	"""
	FinEE Prompt - LLM prompt templates for targeted extraction.

	Uses field-specific prompts instead of generic extraction for better accuracy.
	"""

	from typing import List, Optional, Dict


	# Base system prompt
	SYSTEM_PROMPT = """You are a financial data extraction expert. Extract structured information from Indian banking transaction messages accurately. Always output valid JSON."""


	# Field-specific targeted prompts (Tier 3)
	TARGETED_PROMPTS = {
	'merchant': """Extract ONLY the merchant/vendor name from this transaction.
	Reply with just the name, nothing else. If you cannot determine the merchant, reply "unknown".

	Transaction: {text}

	Merchant name:""",

	'category': """What category does this transaction belong to?
	Reply with ONE word from: food, shopping, transport, utilities, entertainment, transfer, salary, investment, healthcare, education, other

	Transaction: {text}

	Category:""",

	'date': """Extract ONLY the transaction date from this text.
	Reply in DD-MM-YYYY format. If no date found, reply "unknown".

	Transaction: {text}

	Date:""",

	'reference': """Extract ONLY the transaction reference/UTR number from this text.
	Reply with just the reference number (typically 12-16 digits). If not found, reply "unknown".

	Transaction: {text}

	Reference:""",

	'amount': """Extract ONLY the transaction amount from this text.
	Reply with just the number (e.g., 2500.00). If not found, reply "unknown".

	Transaction: {text}

	Amount:""",

	'type': """Is this transaction a DEBIT (money going out) or CREDIT (money coming in)?
	Reply with just "debit" or "credit".

	Transaction: {text}

	Type:""",

	'account': """Extract ONLY the account number (or last 4 digits) from this text.
	Reply with just the number. If not found, reply "unknown".

	Transaction: {text}

	Account:""",

	'vpa': """Extract ONLY the UPI VPA (Virtual Payment Address) from this text.
	A VPA looks like "username@bankcode" (e.g., swiggy@ybl).
	Reply with just the VPA. If not found, reply "unknown".

	Transaction: {text}

	VPA:""",
	}


	# Full extraction prompt (fallback)
	FULL_EXTRACTION_PROMPT = """Extract all financial entities from this Indian banking transaction message.
	Return a JSON object with these fields (use null if not found):
	- amount: transaction amount as a number
	- type: "debit" or "credit"
	- date: in DD-MM-YYYY format
	- account: account number (or last 4 digits)
	- reference: UPI/transaction reference number
	- vpa: UPI Virtual Payment Address (e.g., swiggy@ybl)
	- merchant: merchant/vendor name
	- category: one of [food, shopping, transport, utilities, entertainment, transfer, salary, investment, healthcare, education, other]

	Transaction:
	{text}

	JSON output:"""


	# Chat-style prompt (for models that support chat format)
	CHAT_EXTRACTION_TEMPLATE = {
	"system": SYSTEM_PROMPT,
	"user": """Extract financial entities from this transaction:

	{text}

	Return JSON with: amount, type, date, account, reference, vpa, merchant, category"""
	}


	def get_targeted_prompt(field: str, text: str) -> str:
	"""
	Get a targeted prompt for extracting a specific field.

	Args:
	field: Field name to extract
	text: Transaction text

	Returns:
	Formatted prompt string
	"""
	if field not in TARGETED_PROMPTS:
	raise ValueError(f"Unknown field: {field}. Available: {list(TARGETED_PROMPTS.keys())}")

	return TARGETED_PROMPTS[field].format(text=text)


	def get_multi_field_prompt(fields: List[str], text: str) -> str:
	"""
	Get a prompt for extracting multiple specific fields.

	Args:
	fields: List of field names to extract
	text: Transaction text

	Returns:
	Formatted prompt string
	"""
	if not fields:
	return get_full_extraction_prompt(text)

	field_descriptions = {
	'amount': 'amount (as a number)',
	'type': 'type ("debit" or "credit")',
	'date': 'date (DD-MM-YYYY format)',
	'account': 'account (number or last 4 digits)',
	'reference': 'reference (UPI/transaction ID)',
	'vpa': 'vpa (UPI address like user@bank)',
	'merchant': 'merchant (vendor name)',
	'category': 'category (food/shopping/transport/etc)',
	}

	fields_list = ', '.join(field_descriptions.get(f, f) for f in fields)

	prompt = f"""Extract ONLY these fields from the transaction: {fields_list}

	Return a JSON object with only these fields. Use null if not found.

	Transaction:
	{text}

	JSON output:"""

	return prompt


	def get_full_extraction_prompt(text: str) -> str:
	"""
	Get the full extraction prompt.

	Args:
	text: Transaction text

	Returns:
	Formatted prompt string
	"""
	return FULL_EXTRACTION_PROMPT.format(text=text)


	def get_chat_messages(text: str) -> List[Dict[str, str]]:
	"""
	Get chat-format messages for models that support it.

	Args:
	text: Transaction text

	Returns:
	List of message dictionaries
	"""
	return [
	{"role": "system", "content": CHAT_EXTRACTION_TEMPLATE["system"]},
	{"role": "user", "content": CHAT_EXTRACTION_TEMPLATE["user"].format(text=text)},
	]


	def parse_targeted_response(field: str, response: str) -> Optional[str]:
	"""
	Parse the response from a targeted prompt.

	Args:
	field: Field name that was extracted
	response: Raw LLM response

	Returns:
	Cleaned field value or None
	"""
	if not response:
	return None

	# Clean response
	cleaned = response.strip()

	# Handle "unknown" responses
	if cleaned.lower() in ('unknown', 'null', 'none', 'n/a', ''):
	return None

	# Remove quotes if present
	if cleaned.startswith('"') and cleaned.endswith('"'):
	cleaned = cleaned[1:-1]
	if cleaned.startswith("'") and cleaned.endswith("'"):
	cleaned = cleaned[1:-1]

	return cleaned if cleaned else None