datasciencesage commited on
Commit
36a85aa
·
verified ·
1 Parent(s): 935aa3d

Upload 4 files

Browse files
Files changed (4) hide show
  1. app/__init__.py +0 -0
  2. app/constants.py +134 -0
  3. app/main.py +305 -0
  4. app/models.py +89 -0
app/__init__.py ADDED
File without changes
app/constants.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SAMPLE_STORE_KEYS= [
2
+ {"value": "business.address.pincode", "label": "Business Pincode", "group": "business"},
3
+ {"value": "business.address.state", "label": "Business State", "group": "business"},
4
+ {"value": "business.vintage_in_years", "label": "Business Vintage In Years", "group": "business"},
5
+ {"value": "business.commercial_cibil_score", "label": "Commercial Cibil Score", "group": "business"},
6
+ {"value": "primary_applicant.age", "label": "Primary Applicant Age", "group": "primary_applicant"},
7
+ {"value": "primary_applicant.monthly_income", "label": "Primary Applicant Monthly Income", "group": "primary_applicant"},
8
+ {"value": "primary_applicant.tags", "label": "Primary Applicant Tags", "group": "primary_applicant"},
9
+ {"value": "bureau.score", "label": "Bureau Score", "group": "bureau"},
10
+ {"value": "bureau.is_ntc", "label": "Is New to Credit?", "group": "bureau"},
11
+ {"value": "bureau.overdue_amount", "label": "Overdue Amount", "group": "bureau"},
12
+ {"value": "bureau.dpd", "label": "DPD", "group": "bureau"},
13
+ {"value": "bureau.active_accounts", "label": "Active Accounts", "group": "bureau"},
14
+ {"value": "bureau.enquiries", "label": "Enquiries", "group": "bureau"},
15
+ {"value": "bureau.suit_filed", "label": "Suit Filed", "group": "bureau"},
16
+ {"value": "bureau.wilful_default", "label": "Wilful Default", "group": "bureau"},
17
+ {"value": "banking.abb", "label": "ABB", "group": "banking"},
18
+ {"value": "banking.avg_monthly_turnover", "label": "Avg Monthly Turnover", "group": "banking"},
19
+ {"value": "banking.total_credits", "label": "Total Credits", "group": "banking"},
20
+ {"value": "banking.total_debits", "label": "Total Debits", "group": "banking"},
21
+ {"value": "banking.inward_bounces", "label": "Inward Bounces", "group": "banking"},
22
+ {"value": "banking.outward_bounces", "label": "Outward Bounces", "group": "banking"},
23
+ {"value": "gst.registration_age_months", "label": "Registration Age Months", "group": "gst"},
24
+ {"value": "gst.place_of_supply_count", "label": "Place Of Supply Count", "group": "gst"},
25
+ {"value": "gst.is_gstin", "label": "Is GSTIN", "group": "gst"},
26
+ {"value": "gst.filing_amount", "label": "Filing Amount", "group": "gst"},
27
+ {"value": "gst.missed_returns", "label": "Missed Returns", "group": "gst"},
28
+ {"value": "gst.monthly_turnover_avg", "label": "Monthly Turnover Avg", "group": "gst"},
29
+ {"value": "gst.turnover", "label": "Turnover", "group": "gst"},
30
+ {"value": "gst.turnover_growth_rate", "label": "Turnover Growth Rate", "group": "gst"},
31
+ {"value": "gst.output_tax_liability", "label": "Output Tax Liability", "group": "gst"},
32
+ {"value": "gst.tax_paid_cash_vs_credit_ratio", "label": "Tax Paid Cash Vs Credit Ratio", "group": "gst"},
33
+ {"value": "gst.high_risk_suppliers_count", "label": "High Risk Suppliers Count", "group": "gst"},
34
+ {"value": "gst.supplier_concentration_ratio", "label": "Supplier Concentration Ratio", "group": "gst"},
35
+ {"value": "gst.customer_concentration_ratio", "label": "Customer Concentration Ratio", "group": "gst"},
36
+ {"value": "itr.years_filed", "label": "Years Filed", "group": "itr"},
37
+ {"value": "foir", "label": "FOIR", "group": "metrics"},
38
+ {"value": "debt_to_income", "label": "Debt To Income", "group": "metrics"},
39
+ ]
40
+
41
+ # Policy documents for RAG
42
+ POLICIES = [
43
+ "Minimum bureau score must be 600 for loan approval. Scores below 600 indicate high credit risk.",
44
+ "Business vintage should be at least 2 years for standard loans. New businesses needs additional scrutiny.",
45
+ "Applicants with wilful default or suit filed status are automatically rejected regardless of other parameters.",
46
+ "High overdue amount greater than 50000 rupees flags application high risk and requires manual review.",
47
+ "Primary applicant age must be between 21 and 65 years. Outside this range applications are not eligible.",
48
+ "DPD (Days Past Due) greater than 90 days indicates serious payment default and leads to rejection.",
49
+ "Monthly income below 25000 for primary applicant is insufficient for loan approval in most cases.",
50
+ "New to Credit (NTC) applicants require bureau score of at least 650 instead of standard 600.",
51
+ "GST registration age should be minimum 12 months for business loan eligibility verification.",
52
+ "Banking average monthly turnover must exceed 100000 rupees for commercial lending approval.",
53
+ ]
54
+
55
+ MOCK_STORE_SAMPLES = [
56
+
57
+ {
58
+ "bureau.score": 750,
59
+ "business.vintage_in_years": 5,
60
+ "primary_applicant.age": 35,
61
+ "primary_applicant.monthly_income": 75000,
62
+ "bureau.wilful_default": False,
63
+ "bureau.suit_filed": False,
64
+ "bureau.overdue_amount": 0,
65
+ "bureau.dpd": 0,
66
+ "primary_applicant.tags": ["regular", "salaried"],
67
+ },
68
+ {
69
+ "bureau.score": 550,
70
+ "business.vintage_in_years": 3,
71
+ "primary_applicant.age": 40,
72
+ "bureau.wilful_default": False,
73
+ "bureau.overdue_amount": 10000,
74
+ },
75
+ # Sample 3: Wilful default - should fail
76
+ {
77
+ "bureau.score": 720,
78
+ "business.vintage_in_years": 4,
79
+ "primary_applicant.age": 45,
80
+ "bureau.wilful_default": True,
81
+ "bureau.overdue_amount": 5000,
82
+ },
83
+ # Sample 4: High overdue amount
84
+ {
85
+ "bureau.score": 680,
86
+ "business.vintage_in_years": 3,
87
+ "primary_applicant.age": 38,
88
+ "bureau.wilful_default": False,
89
+ "bureau.overdue_amount": 75000,
90
+ "bureau.dpd": 120,
91
+ },
92
+ # Sample 5: Veteran tag with good income
93
+ {
94
+ "bureau.score": 710,
95
+ "primary_applicant.age": 42,
96
+ "primary_applicant.monthly_income": 150000,
97
+ "primary_applicant.tags": ["veteran", "business_owner"],
98
+ "business.vintage_in_years": 6,
99
+ },
100
+ # Sample 6: Edge case - minimum acceptable values
101
+ {
102
+ "bureau.score": 600,
103
+ "business.vintage_in_years": 2,
104
+ "primary_applicant.age": 25,
105
+ "primary_applicant.monthly_income": 50000,
106
+ "bureau.wilful_default": False,
107
+ "bureau.overdue_amount": 0,
108
+ },
109
+ # Sample 7: NTC applicant
110
+ {
111
+ "bureau.score": 655,
112
+ "bureau.is_ntc": True,
113
+ "primary_applicant.age": 28,
114
+ "business.vintage_in_years": 1.5,
115
+ },
116
+ # Sample 8: High DPD
117
+ {
118
+ "bureau.score": 640,
119
+ "bureau.dpd": 95,
120
+ "business.vintage_in_years": 4,
121
+ "primary_applicant.age": 50,
122
+ },
123
+ ]
124
+
125
+ def get_key_by_value(value) :
126
+ """Helper to find key object by value string"""
127
+ for key in SAMPLE_STORE_KEYS:
128
+ if key["value"] == value:
129
+ return key
130
+ return None
131
+
132
+ def build_key_search_text(key):
133
+ """Build searchable text for a key (used in embeddings)"""
134
+ return f"{key['label']} {key['value']} {key['group']}"
app/main.py CHANGED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Depends
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from contextlib import asynccontextmanager
4
+ from typing import Dict, Any
5
+ import time
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+ from dotenv import load_dotenv
10
+ from loguru import logger
11
+
12
+ # Add parent directory to Python path (universal fix)
13
+ ROOT_DIR = Path(__file__).parent.parent
14
+ sys.path.insert(0, str(ROOT_DIR))
15
+
16
+ # Now use absolute imports
17
+ from app.models import GenerateRuleRequest, GenerateRuleResponse, ErrorResponse
18
+ from app.services.embedding_service import EmbeddingService
19
+ from app.services.key_mapper import KeyMapper
20
+ from app.services.rag_service import RAGService
21
+ from app.services.rule_service import RuleGenerationService
22
+
23
+ # Load environment variables
24
+ load_dotenv()
25
+
26
+ # Global service instances
27
+ services = {}
28
+
29
+
30
+ @asynccontextmanager
31
+ async def lifespan(app: FastAPI):
32
+ """Startup and shutdown handler"""
33
+ # Startup - load all services
34
+ logger.info("="*50)
35
+ logger.info("Initializing JSON Logic Rule Generator API")
36
+ logger.info("="*50)
37
+
38
+ try:
39
+ # Initialize embedding service
40
+ logger.info("1. Initializing Embedding Service...")
41
+ embedding_service = EmbeddingService()
42
+ services['embedding'] = embedding_service
43
+
44
+ # Initialize key mapper
45
+ logger.info("2. Initializing Key Mapper...")
46
+ key_mapper = KeyMapper(embedding_service)
47
+ services['key_mapper'] = key_mapper
48
+
49
+ # Initialize RAG service
50
+ logger.info("3. Initializing RAG Service...")
51
+ rag_service = RAGService(embedding_service)
52
+ services['rag'] = rag_service
53
+
54
+ # Initialize rule service
55
+ logger.info("4. Initializing Rule Generation Service...")
56
+ rule_service = RuleGenerationService()
57
+ services['rule'] = rule_service
58
+
59
+ logger.success("="*50)
60
+ logger.success("All services initialized successfully!")
61
+ logger.success("API ready to accept requests")
62
+ logger.success("="*50)
63
+
64
+ except Exception as e:
65
+ logger.error(f"FATAL ERROR during initialization: {str(e)}")
66
+ import traceback
67
+ traceback.print_exc()
68
+ raise
69
+
70
+ yield
71
+
72
+ # Shutdown
73
+ logger.info("Shutting down services...")
74
+ services.clear()
75
+
76
+
77
+ # Create FastAPI app
78
+ app = FastAPI(
79
+ title="JSON Logic Rule Generator API",
80
+ description="AI-powered API for generating JSON Logic rules from natural language with RAG & embeddings",
81
+ version="1.0.0",
82
+ lifespan=lifespan
83
+ )
84
+
85
+ # CORS - allow everything for now
86
+ app.add_middleware(
87
+ CORSMiddleware,
88
+ allow_origins=["*"],
89
+ allow_credentials=True,
90
+ allow_methods=["*"],
91
+ allow_headers=["*"],
92
+ )
93
+
94
+
95
+ def get_services():
96
+ """DI for services"""
97
+ return services
98
+
99
+
100
+ @app.get("/", tags=["Health"])
101
+ async def root():
102
+ """Root endpoint"""
103
+ return {
104
+ "status": "online",
105
+ "service": "JSON Logic Rule Generator API",
106
+ "version": "1.0.0",
107
+ "endpoints": {
108
+ "generate_rule": "/generate-rule",
109
+ "docs": "/docs",
110
+ "health": "/health"
111
+ }
112
+ }
113
+
114
+
115
+ @app.get("/health", tags=["Health"])
116
+ async def health_check(svc: Dict = Depends(get_services)):
117
+ """Health check - shows which services are loaded"""
118
+ return {
119
+ "status": "healthy",
120
+ "services": {
121
+ "embedding": "embedding" in svc,
122
+ "key_mapper": "key_mapper" in svc,
123
+ "rag": "rag" in svc,
124
+ "rule_generation": "rule" in svc
125
+ },
126
+ "models": {
127
+ "embedding_model": os.getenv("EMBED_MODEL", "all-MiniLM-L6-v2"),
128
+ "llm_model": "gpt-4o-mini"
129
+ }
130
+ }
131
+
132
+
133
+ @app.post(
134
+ "/generate-rule",
135
+ response_model=GenerateRuleResponse,
136
+ responses={
137
+ 400: {"model": ErrorResponse},
138
+ 422: {"model": ErrorResponse},
139
+ 500: {"model": ErrorResponse}
140
+ },
141
+ tags=["Rule Generation"]
142
+ )
143
+ async def generate_rule(
144
+ request: GenerateRuleRequest,
145
+ svc: Dict = Depends(get_services)
146
+ ) -> GenerateRuleResponse:
147
+ """
148
+ Generate JSON Logic rule from natural language
149
+
150
+ Process:
151
+ 1. Maps user phrases to store keys (hybrid: embeddings + BM25 + RRF)
152
+ 2. Retrieves relevant policies using CRAG
153
+ 3. Generates JSON Logic with self-consistency voting
154
+ 4. Validates on mock data
155
+
156
+ Returns valid JSON Logic + explanation + confidence score
157
+ """
158
+ start_time = time.time()
159
+
160
+ try:
161
+ logger.info("="*60)
162
+ logger.info(f"NEW REQUEST: {request.prompt[:80]}...")
163
+ logger.info("="*60)
164
+
165
+ # grab services
166
+ key_mapper = svc['key_mapper']
167
+ rag_service = svc['rag']
168
+ rule_service = svc['rule']
169
+
170
+ # Step 1: map keys
171
+ logger.info("[1/4] Mapping user phrases to keys...")
172
+ key_mappings = key_mapper.map_keys(request.prompt, top_k=5)
173
+
174
+ if not key_mappings:
175
+ # nothing found - suggest closest matches
176
+ all_mappings = key_mapper.map_keys(request.prompt, top_k=3)
177
+ raise HTTPException(
178
+ status_code=400,
179
+ detail={
180
+ "error": "No matching keys found",
181
+ "detail": "Prompt contains terms that couldn't be mapped to available keys",
182
+ "suggestions": [
183
+ {
184
+ "key": m.mapped_to,
185
+ "similarity": m.similarity,
186
+ "phrase": m.user_phrase
187
+ }
188
+ for m in all_mappings
189
+ ]
190
+ }
191
+ )
192
+
193
+ logger.debug(f"Found {len(key_mappings)} key mappings")
194
+ for m in key_mappings[:3]:
195
+ logger.debug(f" - {m.mapped_to}: {m.similarity:.3f}")
196
+
197
+ # Step 2: add extra context if provided
198
+ if request.context_docs:
199
+ logger.info(f"[2/4] Adding {len(request.context_docs)} context documents...")
200
+ rag_service.add_documents(request.context_docs)
201
+
202
+ # Step 3: get relevant policies
203
+ logger.info("[3/4] Retrieving relevant policies (CRAG)...")
204
+ policy_docs, policy_relevance = rag_service.retrieve_with_crag(
205
+ request.prompt,
206
+ top_k=2
207
+ )
208
+ policy_context = rag_service.format_context(policy_docs)
209
+ logger.debug(f"Policy relevance: {policy_relevance:.3f}")
210
+
211
+ # Step 4: generate the rule
212
+ logger.info("[4/4] Generating JSON Logic rule...")
213
+ rule_result = rule_service.generate_rule(
214
+ prompt=request.prompt,
215
+ key_mappings=key_mappings,
216
+ policy_context=policy_context,
217
+ num_variants=3
218
+ )
219
+
220
+ # calculate final confidence
221
+ confidence_score = rule_service.calculate_confidence_score(
222
+ rule_result,
223
+ key_mappings,
224
+ policy_relevance
225
+ )
226
+
227
+ # build response
228
+ response = GenerateRuleResponse(
229
+ json_logic=rule_result['json_logic'],
230
+ explanation=rule_result['explanation'],
231
+ used_keys=rule_result['used_keys'],
232
+ key_mappings=key_mappings,
233
+ confidence_score=confidence_score
234
+ )
235
+
236
+ elapsed = time.time() - start_time
237
+ logger.success("="*60)
238
+ logger.success(f"SUCCESS - Generated rule in {elapsed:.2f}s")
239
+ logger.success(f"Confidence: {confidence_score:.3f}")
240
+ logger.success("="*60)
241
+
242
+ return response
243
+
244
+ except HTTPException:
245
+ raise
246
+ except Exception as e:
247
+ logger.error(f"ERROR: {str(e)}")
248
+ import traceback
249
+ traceback.print_exc()
250
+
251
+ raise HTTPException(
252
+ status_code=500,
253
+ detail={
254
+ "error": "Internal server error",
255
+ "detail": str(e)
256
+ }
257
+ )
258
+
259
+
260
+ @app.get("/keys", tags=["Utilities"])
261
+ async def list_available_keys():
262
+ """List all available keys grouped by category"""
263
+ from app.constants import SAMPLE_STORE_KEYS
264
+
265
+ # group by category
266
+ grouped = {}
267
+ for key in SAMPLE_STORE_KEYS:
268
+ group = key['group']
269
+ if group not in grouped:
270
+ grouped[group] = []
271
+ grouped[group].append({
272
+ 'value': key['value'],
273
+ 'label': key['label']
274
+ })
275
+
276
+ return {
277
+ "total_keys": len(SAMPLE_STORE_KEYS),
278
+ "groups": list(grouped.keys()),
279
+ "keys_by_group": grouped
280
+ }
281
+
282
+
283
+ @app.get("/policies", tags=["Utilities"])
284
+ async def list_policies():
285
+ """List all policy documents used by RAG"""
286
+ from app.constants import POLICIES
287
+
288
+ return {
289
+ "total_policies": len(POLICIES),
290
+ "policies": [
291
+ {"id": i+1, "text": policy}
292
+ for i, policy in enumerate(POLICIES)
293
+ ]
294
+ }
295
+
296
+
297
+ if __name__ == "__main__":
298
+ import uvicorn
299
+ uvicorn.run(
300
+ "app.main:app",
301
+ host="127.0.0.1",
302
+ port=8000,
303
+ reload=True,
304
+ log_level="info"
305
+ )
app/models.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field, ConfigDict
2
+ from typing import List, Dict, Any, Optional
3
+
4
+
5
+ class KeyMapping(BaseModel):
6
+ """Maps user phrase to actual store key"""
7
+ user_phrase: str
8
+ mapped_to: str
9
+ similarity: float # 0 to 1
10
+
11
+ model_config = ConfigDict(
12
+ json_schema_extra={
13
+ "example": {
14
+ "user_phrase": "bureau score",
15
+ "mapped_to": "bureau.score",
16
+ "similarity": 0.93
17
+ }
18
+ }
19
+ )
20
+
21
+
22
+ class GenerateRuleRequest(BaseModel):
23
+ """Request for generating a rule"""
24
+ prompt: str = Field(min_length=10, max_length=500)
25
+ context_docs: Optional[List[str]] = Field(
26
+ default=None,
27
+ description="Optional additional policy documents to consider"
28
+ )
29
+
30
+
31
+ model_config = ConfigDict(
32
+ json_schema_extra={
33
+ "example": {
34
+ "prompt": "Approve if bureau score > 700 and business vintage at least 3 years",
35
+ "context_docs": ["Custom policy: Minimum age 25"]
36
+ }
37
+ }
38
+ )
39
+
40
+
41
+ class GenerateRuleResponse(BaseModel):
42
+ """What we send back after generating a rule"""
43
+ json_logic: Dict[str, Any]
44
+ explanation: str
45
+ used_keys: List[str]
46
+ key_mappings: List[KeyMapping]
47
+ confidence_score: float
48
+
49
+ model_config = ConfigDict(
50
+ json_schema_extra={
51
+ "example": {
52
+ "json_logic": {
53
+ "and": [
54
+ {">": [{"var": "bureau.score"}, 700]},
55
+ {">=": [{"var": "business.vintage_in_years"}, 3]}
56
+ ]
57
+ },
58
+ "explanation": "Approves applications where bureau score exceeds 700 AND business vintage is at least 3 years.",
59
+ "used_keys": ["bureau.score", "business.vintage_in_years"],
60
+ "key_mappings": [
61
+ {
62
+ "user_phrase": "bureau score",
63
+ "mapped_to": "bureau.score",
64
+ "similarity": 0.93
65
+ }
66
+ ],
67
+ "confidence_score": 0.89
68
+ }
69
+ }
70
+ )
71
+
72
+
73
+ class ErrorResponse(BaseModel):
74
+ """Error format"""
75
+ error: str
76
+ detail: Optional[str] = None
77
+ suggestions: Optional[List[Dict[str, Any]]] = None # suggested keys when nothing matches
78
+
79
+ model_config = ConfigDict(
80
+ json_schema_extra={
81
+ "example": {
82
+ "error": "No matching keys found",
83
+ "detail": "Prompt contains fields not in our key list",
84
+ "suggestions": [
85
+ {"key": "bureau.score", "similarity": 0.45}
86
+ ]
87
+ }
88
+ }
89
+ )