Spaces:
Runtime error
Runtime error
File size: 4,297 Bytes
4e71548 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import asyncio
import json
from typing import Dict, Any, Optional
from openai import AsyncOpenAI
from huggingface_hub import AsyncInferenceClient
from src.config.config import settings
class GroqClient:
"""Async client for Groq API."""
def __init__(self):
self.client = AsyncOpenAI(
base_url=settings.groq_base_url,
api_key=settings.groq_api_key,
)
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_value, traceback):
await self.client.close()
async def extract_account_details(self, text: str) -> str:
"""Extract account details using LLM."""
system_prompt = """
You are a financial document parser that extracts structured data from bank statements.
Your task is to extract the following fields and return only valid JSON:
- Starting balance can also be referred with "Balance last statement" or "Balance previous statement" in pdfs.
- Ending balance can also be referred with "Balance this statement" in pdfs.
{
"bank_name": "string",
"account_holder": "string",
"accounts": [{
"account_name": "string",
"account_number": "string",
"starting_balance": float,
"ending_balance": float,
"statement_start_date": "YYYY-MM-DD",
"statement_end_date": "YYYY-MM-DD"
}]
}
Guidelines:
- Return strictly valid JSON (no markdown, comments, or extra explanation).
- `starting_balance` and `ending_balance` must be `float` (no currency symbol).
- Dates must follow the format `"YYYY-MM-DD"`.
- Do not respond with anything other than the JSON object.
- If multiple account are there then include all the account list in a list.
"""
response = await self.client.chat.completions.create(
model=settings.llm_model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": text},
],
)
return response.choices[0].message.content
class HuggingFaceClient:
"""Async client for HuggingFace Inference API."""
def __init__(self):
self.client = AsyncInferenceClient(
provider=settings.huggingface_provider,
api_key=settings.huggingface_api_key,
)
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_value, traceback):
pass
async def extract_account_details(self, text: str) -> str:
"""Extract account details using HuggingFace model."""
# This is a placeholder - you can implement HuggingFace specific logic here
# For now, we'll use the same prompt as Groq
system_prompt = """
You are a financial document parser that extracts structured data from bank statements.
Your task is to extract the following fields and return only valid JSON:
- Starting balance can also be referred with "Balance last statement" or "Balance previous statement" in pdfs.
- Ending balance can also be referred with "Balance this statement" in pdfs.
{
"bank_name": "string",
"account_holder": "string",
"accounts": [{
"account_name": "string",
"account_number": "string",
"starting_balance": float,
"ending_balance": float,
"statement_start_date": "YYYY-MM-DD",
"statement_end_date": "YYYY-MM-DD"
}]
}
Guidelines:
- Return strictly valid JSON (no markdown, comments, or extra explanation).
- `starting_balance` and `ending_balance` must be `float` (no currency symbol).
- Dates must follow the format `"YYYY-MM-DD"`.
- Do not respond with anything other than the JSON object.
- If multiple account are there then include all the account list in a list.
"""
# This would need to be implemented based on the specific HuggingFace model
# For now, returning a placeholder
return '{"bank_name": "Unknown", "account_holder": "Unknown", "accounts": []}' |