import asyncio import json from typing import Dict, Any, Optional from openai import AsyncOpenAI from huggingface_hub import AsyncInferenceClient from src.config.config import settings class GroqClient: """Async client for Groq API.""" def __init__(self): self.client = AsyncOpenAI( base_url=settings.groq_base_url, api_key=settings.groq_api_key, ) async def __aenter__(self): return self async def __aexit__(self, exc_type, exc_value, traceback): await self.client.close() async def extract_account_details(self, text: str) -> str: """Extract account details using LLM.""" system_prompt = """ You are a financial document parser that extracts structured data from bank statements. Your task is to extract the following fields and return only valid JSON: - Starting balance can also be referred with "Balance last statement" or "Balance previous statement" in pdfs. - Ending balance can also be referred with "Balance this statement" in pdfs. { "bank_name": "string", "account_holder": "string", "accounts": [{ "account_name": "string", "account_number": "string", "starting_balance": float, "ending_balance": float, "statement_start_date": "YYYY-MM-DD", "statement_end_date": "YYYY-MM-DD" }] } Guidelines: - Return strictly valid JSON (no markdown, comments, or extra explanation). - `starting_balance` and `ending_balance` must be `float` (no currency symbol). - Dates must follow the format `"YYYY-MM-DD"`. - Do not respond with anything other than the JSON object. - If multiple account are there then include all the account list in a list. """ response = await self.client.chat.completions.create( model=settings.llm_model, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": text}, ], ) return response.choices[0].message.content class HuggingFaceClient: """Async client for HuggingFace Inference API.""" def __init__(self): self.client = AsyncInferenceClient( provider=settings.huggingface_provider, api_key=settings.huggingface_api_key, ) async def __aenter__(self): return self async def __aexit__(self, exc_type, exc_value, traceback): pass async def extract_account_details(self, text: str) -> str: """Extract account details using HuggingFace model.""" # This is a placeholder - you can implement HuggingFace specific logic here # For now, we'll use the same prompt as Groq system_prompt = """ You are a financial document parser that extracts structured data from bank statements. Your task is to extract the following fields and return only valid JSON: - Starting balance can also be referred with "Balance last statement" or "Balance previous statement" in pdfs. - Ending balance can also be referred with "Balance this statement" in pdfs. { "bank_name": "string", "account_holder": "string", "accounts": [{ "account_name": "string", "account_number": "string", "starting_balance": float, "ending_balance": float, "statement_start_date": "YYYY-MM-DD", "statement_end_date": "YYYY-MM-DD" }] } Guidelines: - Return strictly valid JSON (no markdown, comments, or extra explanation). - `starting_balance` and `ending_balance` must be `float` (no currency symbol). - Dates must follow the format `"YYYY-MM-DD"`. - Do not respond with anything other than the JSON object. - If multiple account are there then include all the account list in a list. """ # This would need to be implemented based on the specific HuggingFace model # For now, returning a placeholder return '{"bank_name": "Unknown", "account_holder": "Unknown", "accounts": []}'