Ranjit0034 commited on
Commit
e729c58
·
verified ·
1 Parent(s): 123b3e7

Upload src/finee/api.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src/finee/api.py +572 -0
src/finee/api.py ADDED
@@ -0,0 +1,572 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FinEE API - FastAPI Backend
3
+ ============================
4
+
5
+ RESTful API for financial entity extraction with:
6
+ - Single/batch extraction endpoints
7
+ - RAG-enhanced extraction
8
+ - PDF/Image processing
9
+ - Multi-turn chat
10
+ - Analytics
11
+
12
+ Author: Ranjit Behera
13
+ """
14
+
15
+ import os
16
+ import json
17
+ import logging
18
+ from datetime import datetime
19
+ from typing import List, Dict, Optional, Any
20
+ from pathlib import Path
21
+
22
+ from fastapi import FastAPI, HTTPException, UploadFile, File, BackgroundTasks
23
+ from fastapi.middleware.cors import CORSMiddleware
24
+ from fastapi.responses import JSONResponse
25
+ from pydantic import BaseModel, Field
26
+ import uvicorn
27
+
28
+ # Configure logging
29
+ logging.basicConfig(level=logging.INFO)
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ # ============================================================================
34
+ # PYDANTIC MODELS
35
+ # ============================================================================
36
+
37
+ class ExtractionRequest(BaseModel):
38
+ """Single message extraction request."""
39
+ message: str = Field(..., description="Bank SMS or email to extract from")
40
+ use_rag: bool = Field(True, description="Use RAG for context-aware extraction")
41
+ use_llm: bool = Field(False, description="Use LLM for complex cases")
42
+
43
+
44
+ class BatchExtractionRequest(BaseModel):
45
+ """Batch extraction request."""
46
+ messages: List[str] = Field(..., description="List of messages to extract")
47
+ use_rag: bool = True
48
+ use_llm: bool = False
49
+
50
+
51
+ class ExtractionResult(BaseModel):
52
+ """Extraction result."""
53
+ amount: Optional[float] = None
54
+ type: Optional[str] = None
55
+ account: Optional[str] = None
56
+ bank: Optional[str] = None
57
+ date: Optional[str] = None
58
+ time: Optional[str] = None
59
+ reference: Optional[str] = None
60
+ merchant: Optional[str] = None
61
+ beneficiary: Optional[str] = None
62
+ vpa: Optional[str] = None
63
+ category: Optional[str] = None
64
+ is_p2m: Optional[bool] = None
65
+ balance: Optional[float] = None
66
+ status: Optional[str] = None
67
+ confidence: float = 0.0
68
+
69
+
70
+ class ExtractionResponse(BaseModel):
71
+ """API response for extraction."""
72
+ success: bool
73
+ data: Optional[ExtractionResult] = None
74
+ raw_text: Optional[str] = None
75
+ rag_context: Optional[Dict] = None
76
+ processing_time_ms: float = 0
77
+ error: Optional[str] = None
78
+
79
+
80
+ class ChatMessage(BaseModel):
81
+ """Chat message."""
82
+ role: str = Field(..., description="'user' or 'assistant'")
83
+ content: str
84
+
85
+
86
+ class ChatRequest(BaseModel):
87
+ """Chat request for multi-turn analysis."""
88
+ messages: List[ChatMessage]
89
+ context: Optional[Dict] = None
90
+
91
+
92
+ class AnalyticsRequest(BaseModel):
93
+ """Analytics request."""
94
+ transactions: List[Dict]
95
+ period: Optional[str] = "month"
96
+ group_by: Optional[str] = "category"
97
+
98
+
99
+ # ============================================================================
100
+ # FASTAPI APP
101
+ # ============================================================================
102
+
103
+ app = FastAPI(
104
+ title="FinEE API",
105
+ description="Financial Entity Extraction API for Indian Banking",
106
+ version="2.0.0",
107
+ docs_url="/docs",
108
+ redoc_url="/redoc",
109
+ )
110
+
111
+ # CORS
112
+ app.add_middleware(
113
+ CORSMiddleware,
114
+ allow_origins=["*"],
115
+ allow_credentials=True,
116
+ allow_methods=["*"],
117
+ allow_headers=["*"],
118
+ )
119
+
120
+ # Global state
121
+ _extractor = None
122
+ _rag_engine = None
123
+
124
+
125
+ def get_extractor():
126
+ """Lazy load extractor."""
127
+ global _extractor
128
+ if _extractor is None:
129
+ try:
130
+ from finee import FinancialExtractor
131
+ _extractor = FinancialExtractor(use_llm=False)
132
+ logger.info("Extractor initialized")
133
+ except ImportError:
134
+ logger.warning("FinEE not installed, using mock extractor")
135
+ _extractor = MockExtractor()
136
+ return _extractor
137
+
138
+
139
+ def get_rag_engine():
140
+ """Lazy load RAG engine."""
141
+ global _rag_engine
142
+ if _rag_engine is None:
143
+ try:
144
+ from finee.rag import RAGEngine
145
+ _rag_engine = RAGEngine()
146
+ logger.info("RAG engine initialized")
147
+ except ImportError:
148
+ logger.warning("RAG not available")
149
+ _rag_engine = None
150
+ return _rag_engine
151
+
152
+
153
+ class MockExtractor:
154
+ """Mock extractor for testing."""
155
+ def extract(self, text: str) -> Dict:
156
+ import re
157
+ result = {}
158
+
159
+ # Amount
160
+ amount_match = re.search(r'Rs\.?\s*([\d,]+(?:\.\d{2})?)', text)
161
+ if amount_match:
162
+ result['amount'] = float(amount_match.group(1).replace(',', ''))
163
+
164
+ # Type
165
+ if any(w in text.lower() for w in ['debit', 'debited', 'paid', 'spent']):
166
+ result['type'] = 'debit'
167
+ elif any(w in text.lower() for w in ['credit', 'credited', 'received']):
168
+ result['type'] = 'credit'
169
+
170
+ return result
171
+
172
+
173
+ # ============================================================================
174
+ # ENDPOINTS
175
+ # ============================================================================
176
+
177
+ @app.get("/")
178
+ async def root():
179
+ """Health check."""
180
+ return {
181
+ "status": "healthy",
182
+ "service": "FinEE API",
183
+ "version": "2.0.0",
184
+ "timestamp": datetime.utcnow().isoformat()
185
+ }
186
+
187
+
188
+ @app.get("/health")
189
+ async def health():
190
+ """Detailed health check."""
191
+ return {
192
+ "status": "healthy",
193
+ "components": {
194
+ "extractor": _extractor is not None,
195
+ "rag": _rag_engine is not None,
196
+ }
197
+ }
198
+
199
+
200
+ @app.post("/extract", response_model=ExtractionResponse)
201
+ async def extract(request: ExtractionRequest):
202
+ """
203
+ Extract financial entities from a single message.
204
+
205
+ - **message**: The bank SMS, email, or notification text
206
+ - **use_rag**: Enable RAG for context-aware extraction
207
+ - **use_llm**: Use LLM for complex cases (slower but more accurate)
208
+ """
209
+ import time
210
+ start = time.time()
211
+
212
+ try:
213
+ extractor = get_extractor()
214
+ rag = get_rag_engine() if request.use_rag else None
215
+
216
+ # RAG context
217
+ rag_context = None
218
+ if rag:
219
+ context = rag.retrieve(request.message)
220
+ rag_context = {
221
+ "merchant_info": context.merchant_info,
222
+ "similar_transactions": context.similar_transactions,
223
+ "category_hierarchy": context.category_hierarchy,
224
+ }
225
+
226
+ # Extract
227
+ result = extractor.extract(request.message)
228
+
229
+ # Enhance with RAG
230
+ if rag_context and rag_context.get("merchant_info"):
231
+ if not result.get("merchant"):
232
+ result["merchant"] = rag_context["merchant_info"]["name"]
233
+ if not result.get("category"):
234
+ result["category"] = rag_context["merchant_info"]["category"]
235
+ if "is_p2m" not in result:
236
+ result["is_p2m"] = rag_context["merchant_info"]["is_p2m"]
237
+
238
+ processing_time = (time.time() - start) * 1000
239
+
240
+ return ExtractionResponse(
241
+ success=True,
242
+ data=ExtractionResult(**result),
243
+ raw_text=request.message,
244
+ rag_context=rag_context,
245
+ processing_time_ms=round(processing_time, 2)
246
+ )
247
+
248
+ except Exception as e:
249
+ logger.error(f"Extraction failed: {e}")
250
+ return ExtractionResponse(
251
+ success=False,
252
+ error=str(e),
253
+ processing_time_ms=round((time.time() - start) * 1000, 2)
254
+ )
255
+
256
+
257
+ @app.post("/extract/batch")
258
+ async def extract_batch(request: BatchExtractionRequest):
259
+ """
260
+ Extract entities from multiple messages.
261
+
262
+ - **messages**: List of messages to process
263
+ - Returns list of extraction results
264
+ """
265
+ results = []
266
+
267
+ for message in request.messages:
268
+ req = ExtractionRequest(
269
+ message=message,
270
+ use_rag=request.use_rag,
271
+ use_llm=request.use_llm
272
+ )
273
+ result = await extract(req)
274
+ results.append(result)
275
+
276
+ return {
277
+ "success": True,
278
+ "total": len(results),
279
+ "successful": sum(1 for r in results if r.success),
280
+ "results": results
281
+ }
282
+
283
+
284
+ @app.post("/parse/pdf")
285
+ async def parse_pdf(file: UploadFile = File(...)):
286
+ """
287
+ Parse bank statement PDF and extract transactions.
288
+
289
+ - **file**: PDF file of bank statement
290
+ - Returns list of extracted transactions
291
+ """
292
+ if not file.filename.endswith('.pdf'):
293
+ raise HTTPException(400, "Only PDF files are supported")
294
+
295
+ try:
296
+ # Read PDF
297
+ content = await file.read()
298
+
299
+ # Parse PDF (placeholder - needs pdfplumber)
300
+ transactions = []
301
+
302
+ # TODO: Implement PDF parsing
303
+ # from pdfplumber import open as open_pdf
304
+ # with open_pdf(io.BytesIO(content)) as pdf:
305
+ # for page in pdf.pages:
306
+ # text = page.extract_text()
307
+ # ...
308
+
309
+ return {
310
+ "success": True,
311
+ "filename": file.filename,
312
+ "transactions": transactions,
313
+ "message": "PDF parsing not yet implemented"
314
+ }
315
+
316
+ except Exception as e:
317
+ raise HTTPException(500, f"PDF parsing failed: {e}")
318
+
319
+
320
+ @app.post("/parse/image")
321
+ async def parse_image(file: UploadFile = File(...)):
322
+ """
323
+ Parse screenshot/image using OCR and extract entities.
324
+
325
+ - **file**: Image file (PNG, JPG)
326
+ - Returns extracted text and entities
327
+ """
328
+ allowed = ['.png', '.jpg', '.jpeg', '.webp']
329
+ ext = Path(file.filename).suffix.lower()
330
+
331
+ if ext not in allowed:
332
+ raise HTTPException(400, f"Only {allowed} files are supported")
333
+
334
+ try:
335
+ content = await file.read()
336
+
337
+ # OCR (placeholder - needs pytesseract or EasyOCR)
338
+ extracted_text = ""
339
+
340
+ # TODO: Implement OCR
341
+ # import pytesseract
342
+ # from PIL import Image
343
+ # image = Image.open(io.BytesIO(content))
344
+ # extracted_text = pytesseract.image_to_string(image)
345
+
346
+ # Extract entities from OCR text
347
+ if extracted_text:
348
+ extractor = get_extractor()
349
+ result = extractor.extract(extracted_text)
350
+ else:
351
+ result = {}
352
+
353
+ return {
354
+ "success": True,
355
+ "filename": file.filename,
356
+ "extracted_text": extracted_text,
357
+ "entities": result,
358
+ "message": "Image OCR not yet implemented"
359
+ }
360
+
361
+ except Exception as e:
362
+ raise HTTPException(500, f"Image parsing failed: {e}")
363
+
364
+
365
+ @app.post("/chat")
366
+ async def chat(request: ChatRequest):
367
+ """
368
+ Multi-turn chat for financial analysis.
369
+
370
+ - **messages**: Conversation history
371
+ - **context**: Optional transaction context
372
+ """
373
+ try:
374
+ # Get last user message
375
+ user_messages = [m for m in request.messages if m.role == "user"]
376
+ if not user_messages:
377
+ raise HTTPException(400, "No user message found")
378
+
379
+ last_message = user_messages[-1].content
380
+
381
+ # Simple intent detection
382
+ intent = detect_intent(last_message)
383
+
384
+ # Generate response based on intent
385
+ response = generate_response(intent, last_message, request.context)
386
+
387
+ return {
388
+ "success": True,
389
+ "response": response,
390
+ "intent": intent,
391
+ }
392
+
393
+ except Exception as e:
394
+ raise HTTPException(500, f"Chat failed: {e}")
395
+
396
+
397
+ @app.post("/analytics")
398
+ async def analytics(request: AnalyticsRequest):
399
+ """
400
+ Generate spending analytics from transactions.
401
+
402
+ - **transactions**: List of extracted transactions
403
+ - **period**: Time period (week, month, year)
404
+ - **group_by**: Grouping (category, merchant, type)
405
+ """
406
+ try:
407
+ transactions = request.transactions
408
+
409
+ if not transactions:
410
+ return {"success": True, "data": {}}
411
+
412
+ # Group and aggregate
413
+ groups = {}
414
+ total = 0
415
+
416
+ for txn in transactions:
417
+ key = txn.get(request.group_by, "other")
418
+ amount = txn.get("amount", 0)
419
+ txn_type = txn.get("type", "debit")
420
+
421
+ if key not in groups:
422
+ groups[key] = {"total": 0, "count": 0, "transactions": []}
423
+
424
+ if txn_type == "debit":
425
+ groups[key]["total"] += amount
426
+ total += amount
427
+
428
+ groups[key]["count"] += 1
429
+ groups[key]["transactions"].append(txn)
430
+
431
+ # Calculate percentages
432
+ for key in groups:
433
+ groups[key]["percentage"] = round(groups[key]["total"] / total * 100, 1) if total > 0 else 0
434
+
435
+ # Sort by total
436
+ sorted_groups = dict(sorted(groups.items(), key=lambda x: x[1]["total"], reverse=True))
437
+
438
+ return {
439
+ "success": True,
440
+ "period": request.period,
441
+ "group_by": request.group_by,
442
+ "total_spent": total,
443
+ "transaction_count": len(transactions),
444
+ "breakdown": sorted_groups
445
+ }
446
+
447
+ except Exception as e:
448
+ raise HTTPException(500, f"Analytics failed: {e}")
449
+
450
+
451
+ @app.get("/merchants")
452
+ async def list_merchants(
453
+ category: Optional[str] = None,
454
+ limit: int = 50
455
+ ):
456
+ """
457
+ List known merchants from knowledge base.
458
+
459
+ - **category**: Filter by category
460
+ - **limit**: Max results
461
+ """
462
+ rag = get_rag_engine()
463
+
464
+ if not rag:
465
+ return {"success": False, "error": "RAG not available"}
466
+
467
+ merchants = []
468
+ for name, merchant in rag.merchant_kb.merchants.items():
469
+ if category and merchant.category != category:
470
+ continue
471
+
472
+ merchants.append({
473
+ "name": merchant.name,
474
+ "category": merchant.category,
475
+ "vpa": merchant.vpa,
476
+ "is_p2m": merchant.is_p2m,
477
+ })
478
+
479
+ if len(merchants) >= limit:
480
+ break
481
+
482
+ return {
483
+ "success": True,
484
+ "count": len(merchants),
485
+ "merchants": merchants
486
+ }
487
+
488
+
489
+ @app.get("/categories")
490
+ async def list_categories():
491
+ """List available transaction categories."""
492
+ from finee.rag import CategoryTaxonomy
493
+
494
+ categories = []
495
+ for name, info in CategoryTaxonomy.TAXONOMY.items():
496
+ categories.append({
497
+ "name": name,
498
+ "parent": info.get("parent"),
499
+ "children": info.get("children", []),
500
+ "keywords": info.get("keywords", [])
501
+ })
502
+
503
+ return {
504
+ "success": True,
505
+ "categories": categories
506
+ }
507
+
508
+
509
+ # ============================================================================
510
+ # HELPER FUNCTIONS
511
+ # ============================================================================
512
+
513
+ def detect_intent(message: str) -> str:
514
+ """Simple intent detection."""
515
+ message_lower = message.lower()
516
+
517
+ if any(w in message_lower for w in ['how much', 'total', 'spent', 'spending']):
518
+ return "spending_query"
519
+ elif any(w in message_lower for w in ['compare', 'vs', 'versus', 'difference']):
520
+ return "comparison"
521
+ elif any(w in message_lower for w in ['category', 'break', 'breakdown']):
522
+ return "category_breakdown"
523
+ elif any(w in message_lower for w in ['extract', 'parse', 'analyze']):
524
+ return "extraction"
525
+ else:
526
+ return "general"
527
+
528
+
529
+ def generate_response(intent: str, message: str, context: Optional[Dict]) -> str:
530
+ """Generate chat response based on intent."""
531
+ if intent == "spending_query":
532
+ if context and "transactions" in context:
533
+ total = sum(t.get("amount", 0) for t in context["transactions"] if t.get("type") == "debit")
534
+ return f"Based on your transactions, you've spent ₹{total:,.2f}"
535
+ return "Please share your transaction data for spending analysis."
536
+
537
+ elif intent == "category_breakdown":
538
+ return "I can break down your spending by category. Please share transaction data."
539
+
540
+ elif intent == "comparison":
541
+ return "To compare periods, please specify the time ranges you'd like to compare."
542
+
543
+ elif intent == "extraction":
544
+ return "Share a bank message and I'll extract the financial details."
545
+
546
+ else:
547
+ return "I can help you analyze transactions, extract entities, or provide spending insights. What would you like to know?"
548
+
549
+
550
+ # ============================================================================
551
+ # MAIN
552
+ # ============================================================================
553
+
554
+ def start_server(host: str = "0.0.0.0", port: int = 8000):
555
+ """Start the API server."""
556
+ uvicorn.run(app, host=host, port=port)
557
+
558
+
559
+ if __name__ == "__main__":
560
+ import argparse
561
+
562
+ parser = argparse.ArgumentParser(description="FinEE API Server")
563
+ parser.add_argument("--host", default="0.0.0.0", help="Host to bind")
564
+ parser.add_argument("--port", type=int, default=8000, help="Port to bind")
565
+ parser.add_argument("--reload", action="store_true", help="Enable auto-reload")
566
+
567
+ args = parser.parse_args()
568
+
569
+ if args.reload:
570
+ uvicorn.run("api:app", host=args.host, port=args.port, reload=True)
571
+ else:
572
+ start_server(args.host, args.port)