Bank-Scrubber / src /utils /api_clients.py
Aryan Jain
bank scrubber streamlit application
4e71548
import asyncio
import json
from typing import Dict, Any, Optional
from openai import AsyncOpenAI
from huggingface_hub import AsyncInferenceClient
from src.config.config import settings
class GroqClient:
"""Async client for Groq API."""
def __init__(self):
self.client = AsyncOpenAI(
base_url=settings.groq_base_url,
api_key=settings.groq_api_key,
)
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_value, traceback):
await self.client.close()
async def extract_account_details(self, text: str) -> str:
"""Extract account details using LLM."""
system_prompt = """
You are a financial document parser that extracts structured data from bank statements.
Your task is to extract the following fields and return only valid JSON:
- Starting balance can also be referred with "Balance last statement" or "Balance previous statement" in pdfs.
- Ending balance can also be referred with "Balance this statement" in pdfs.
{
"bank_name": "string",
"account_holder": "string",
"accounts": [{
"account_name": "string",
"account_number": "string",
"starting_balance": float,
"ending_balance": float,
"statement_start_date": "YYYY-MM-DD",
"statement_end_date": "YYYY-MM-DD"
}]
}
Guidelines:
- Return strictly valid JSON (no markdown, comments, or extra explanation).
- `starting_balance` and `ending_balance` must be `float` (no currency symbol).
- Dates must follow the format `"YYYY-MM-DD"`.
- Do not respond with anything other than the JSON object.
- If multiple account are there then include all the account list in a list.
"""
response = await self.client.chat.completions.create(
model=settings.llm_model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": text},
],
)
return response.choices[0].message.content
class HuggingFaceClient:
"""Async client for HuggingFace Inference API."""
def __init__(self):
self.client = AsyncInferenceClient(
provider=settings.huggingface_provider,
api_key=settings.huggingface_api_key,
)
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_value, traceback):
pass
async def extract_account_details(self, text: str) -> str:
"""Extract account details using HuggingFace model."""
# This is a placeholder - you can implement HuggingFace specific logic here
# For now, we'll use the same prompt as Groq
system_prompt = """
You are a financial document parser that extracts structured data from bank statements.
Your task is to extract the following fields and return only valid JSON:
- Starting balance can also be referred with "Balance last statement" or "Balance previous statement" in pdfs.
- Ending balance can also be referred with "Balance this statement" in pdfs.
{
"bank_name": "string",
"account_holder": "string",
"accounts": [{
"account_name": "string",
"account_number": "string",
"starting_balance": float,
"ending_balance": float,
"statement_start_date": "YYYY-MM-DD",
"statement_end_date": "YYYY-MM-DD"
}]
}
Guidelines:
- Return strictly valid JSON (no markdown, comments, or extra explanation).
- `starting_balance` and `ending_balance` must be `float` (no currency symbol).
- Dates must follow the format `"YYYY-MM-DD"`.
- Do not respond with anything other than the JSON object.
- If multiple account are there then include all the account list in a list.
"""
# This would need to be implemented based on the specific HuggingFace model
# For now, returning a placeholder
return '{"bank_name": "Unknown", "account_holder": "Unknown", "accounts": []}'