Spaces:
Runtime error
Runtime error
| import asyncio | |
| import json | |
| from typing import Dict, Any, Optional | |
| from openai import AsyncOpenAI | |
| from huggingface_hub import AsyncInferenceClient | |
| from src.config.config import settings | |
| class GroqClient: | |
| """Async client for Groq API.""" | |
| def __init__(self): | |
| self.client = AsyncOpenAI( | |
| base_url=settings.groq_base_url, | |
| api_key=settings.groq_api_key, | |
| ) | |
| async def __aenter__(self): | |
| return self | |
| async def __aexit__(self, exc_type, exc_value, traceback): | |
| await self.client.close() | |
| async def extract_account_details(self, text: str) -> str: | |
| """Extract account details using LLM.""" | |
| system_prompt = """ | |
| You are a financial document parser that extracts structured data from bank statements. | |
| Your task is to extract the following fields and return only valid JSON: | |
| - Starting balance can also be referred with "Balance last statement" or "Balance previous statement" in pdfs. | |
| - Ending balance can also be referred with "Balance this statement" in pdfs. | |
| { | |
| "bank_name": "string", | |
| "account_holder": "string", | |
| "accounts": [{ | |
| "account_name": "string", | |
| "account_number": "string", | |
| "starting_balance": float, | |
| "ending_balance": float, | |
| "statement_start_date": "YYYY-MM-DD", | |
| "statement_end_date": "YYYY-MM-DD" | |
| }] | |
| } | |
| Guidelines: | |
| - Return strictly valid JSON (no markdown, comments, or extra explanation). | |
| - `starting_balance` and `ending_balance` must be `float` (no currency symbol). | |
| - Dates must follow the format `"YYYY-MM-DD"`. | |
| - Do not respond with anything other than the JSON object. | |
| - If multiple account are there then include all the account list in a list. | |
| """ | |
| response = await self.client.chat.completions.create( | |
| model=settings.llm_model, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": text}, | |
| ], | |
| ) | |
| return response.choices[0].message.content | |
| class HuggingFaceClient: | |
| """Async client for HuggingFace Inference API.""" | |
| def __init__(self): | |
| self.client = AsyncInferenceClient( | |
| provider=settings.huggingface_provider, | |
| api_key=settings.huggingface_api_key, | |
| ) | |
| async def __aenter__(self): | |
| return self | |
| async def __aexit__(self, exc_type, exc_value, traceback): | |
| pass | |
| async def extract_account_details(self, text: str) -> str: | |
| """Extract account details using HuggingFace model.""" | |
| # This is a placeholder - you can implement HuggingFace specific logic here | |
| # For now, we'll use the same prompt as Groq | |
| system_prompt = """ | |
| You are a financial document parser that extracts structured data from bank statements. | |
| Your task is to extract the following fields and return only valid JSON: | |
| - Starting balance can also be referred with "Balance last statement" or "Balance previous statement" in pdfs. | |
| - Ending balance can also be referred with "Balance this statement" in pdfs. | |
| { | |
| "bank_name": "string", | |
| "account_holder": "string", | |
| "accounts": [{ | |
| "account_name": "string", | |
| "account_number": "string", | |
| "starting_balance": float, | |
| "ending_balance": float, | |
| "statement_start_date": "YYYY-MM-DD", | |
| "statement_end_date": "YYYY-MM-DD" | |
| }] | |
| } | |
| Guidelines: | |
| - Return strictly valid JSON (no markdown, comments, or extra explanation). | |
| - `starting_balance` and `ending_balance` must be `float` (no currency symbol). | |
| - Dates must follow the format `"YYYY-MM-DD"`. | |
| - Do not respond with anything other than the JSON object. | |
| - If multiple account are there then include all the account list in a list. | |
| """ | |
| # This would need to be implemented based on the specific HuggingFace model | |
| # For now, returning a placeholder | |
| return '{"bank_name": "Unknown", "account_holder": "Unknown", "accounts": []}' |