Spaces:

datasciencesage
/

JSON_Logic_Rule_Generator

Sleeping

App Files Files Community

datasciencesage commited on Dec 11, 2025

Commit

36a85aa

verified ·

1 Parent(s): 935aa3d

Upload 4 files

Browse files

Files changed (4) hide show

app/__init__.py +0 -0
app/constants.py +134 -0
app/main.py +305 -0
app/models.py +89 -0

app/__init__.py ADDED Viewed

File without changes

app/constants.py ADDED Viewed

	@@ -0,0 +1,134 @@

+SAMPLE_STORE_KEYS= [
+    {"value": "business.address.pincode", "label": "Business Pincode", "group": "business"},
+    {"value": "business.address.state", "label": "Business State", "group": "business"},
+    {"value": "business.vintage_in_years", "label": "Business Vintage In Years", "group": "business"},
+    {"value": "business.commercial_cibil_score", "label": "Commercial Cibil Score", "group": "business"},
+    {"value": "primary_applicant.age", "label": "Primary Applicant Age", "group": "primary_applicant"},
+    {"value": "primary_applicant.monthly_income", "label": "Primary Applicant Monthly Income", "group": "primary_applicant"},
+    {"value": "primary_applicant.tags", "label": "Primary Applicant Tags", "group": "primary_applicant"},
+    {"value": "bureau.score", "label": "Bureau Score", "group": "bureau"},
+    {"value": "bureau.is_ntc", "label": "Is New to Credit?", "group": "bureau"},
+    {"value": "bureau.overdue_amount", "label": "Overdue Amount", "group": "bureau"},
+    {"value": "bureau.dpd", "label": "DPD", "group": "bureau"},
+    {"value": "bureau.active_accounts", "label": "Active Accounts", "group": "bureau"},
+    {"value": "bureau.enquiries", "label": "Enquiries", "group": "bureau"},
+    {"value": "bureau.suit_filed", "label": "Suit Filed", "group": "bureau"},
+    {"value": "bureau.wilful_default", "label": "Wilful Default", "group": "bureau"},
+    {"value": "banking.abb", "label": "ABB", "group": "banking"},
+    {"value": "banking.avg_monthly_turnover", "label": "Avg Monthly Turnover", "group": "banking"},
+    {"value": "banking.total_credits", "label": "Total Credits", "group": "banking"},
+    {"value": "banking.total_debits", "label": "Total Debits", "group": "banking"},
+    {"value": "banking.inward_bounces", "label": "Inward Bounces", "group": "banking"},
+    {"value": "banking.outward_bounces", "label": "Outward Bounces", "group": "banking"},
+    {"value": "gst.registration_age_months", "label": "Registration Age Months", "group": "gst"},
+    {"value": "gst.place_of_supply_count", "label": "Place Of Supply Count", "group": "gst"},
+    {"value": "gst.is_gstin", "label": "Is GSTIN", "group": "gst"},
+    {"value": "gst.filing_amount", "label": "Filing Amount", "group": "gst"},
+    {"value": "gst.missed_returns", "label": "Missed Returns", "group": "gst"},
+    {"value": "gst.monthly_turnover_avg", "label": "Monthly Turnover Avg", "group": "gst"},
+    {"value": "gst.turnover", "label": "Turnover", "group": "gst"},
+    {"value": "gst.turnover_growth_rate", "label": "Turnover Growth Rate", "group": "gst"},
+    {"value": "gst.output_tax_liability", "label": "Output Tax Liability", "group": "gst"},
+    {"value": "gst.tax_paid_cash_vs_credit_ratio", "label": "Tax Paid Cash Vs Credit Ratio", "group": "gst"},
+    {"value": "gst.high_risk_suppliers_count", "label": "High Risk Suppliers Count", "group": "gst"},
+    {"value": "gst.supplier_concentration_ratio", "label": "Supplier Concentration Ratio", "group": "gst"},
+    {"value": "gst.customer_concentration_ratio", "label": "Customer Concentration Ratio", "group": "gst"},
+    {"value": "itr.years_filed", "label": "Years Filed", "group": "itr"},
+    {"value": "foir", "label": "FOIR", "group": "metrics"},
+    {"value": "debt_to_income", "label": "Debt To Income", "group": "metrics"},
+]
+# Policy documents for RAG
+POLICIES = [
+    "Minimum bureau score must be 600 for loan approval. Scores below 600 indicate high credit risk.",
+    "Business vintage should be at least 2 years for standard loans. New businesses needs additional scrutiny.",
+    "Applicants with wilful default or suit filed status are automatically rejected regardless of other parameters.",
+    "High overdue amount greater than 50000 rupees flags application high risk and requires manual review.",
+    "Primary applicant age must be between 21 and 65 years. Outside this range applications are not eligible.",
+    "DPD (Days Past Due) greater than 90 days indicates serious payment default and leads to rejection.",
+    "Monthly income below 25000 for primary applicant is insufficient for loan approval in most cases.",
+    "New to Credit (NTC) applicants require bureau score of at least 650 instead of standard 600.",
+    "GST registration age should be minimum 12 months for business loan eligibility verification.",
+    "Banking average monthly turnover must exceed 100000 rupees for commercial lending approval.",
+]
+MOCK_STORE_SAMPLES = [
+    {
+        "bureau.score": 750,
+        "business.vintage_in_years": 5,
+        "primary_applicant.age": 35,
+        "primary_applicant.monthly_income": 75000,
+        "bureau.wilful_default": False,
+        "bureau.suit_filed": False,
+        "bureau.overdue_amount": 0,
+        "bureau.dpd": 0,
+        "primary_applicant.tags": ["regular", "salaried"],
+    },
+    {
+        "bureau.score": 550,
+        "business.vintage_in_years": 3,
+        "primary_applicant.age": 40,
+        "bureau.wilful_default": False,
+        "bureau.overdue_amount": 10000,
+    },
+    # Sample 3: Wilful default - should fail
+    {
+        "bureau.score": 720,
+        "business.vintage_in_years": 4,
+        "primary_applicant.age": 45,
+        "bureau.wilful_default": True,
+        "bureau.overdue_amount": 5000,
+    },
+    # Sample 4: High overdue amount
+    {
+        "bureau.score": 680,
+        "business.vintage_in_years": 3,
+        "primary_applicant.age": 38,
+        "bureau.wilful_default": False,
+        "bureau.overdue_amount": 75000,
+        "bureau.dpd": 120,
+    },
+    # Sample 5: Veteran tag with good income
+    {
+        "bureau.score": 710,
+        "primary_applicant.age": 42,
+        "primary_applicant.monthly_income": 150000,
+        "primary_applicant.tags": ["veteran", "business_owner"],
+        "business.vintage_in_years": 6,
+    },
+    # Sample 6: Edge case - minimum acceptable values
+    {
+        "bureau.score": 600,
+        "business.vintage_in_years": 2,
+        "primary_applicant.age": 25,
+        "primary_applicant.monthly_income": 50000,
+        "bureau.wilful_default": False,
+        "bureau.overdue_amount": 0,
+    },
+    # Sample 7: NTC applicant
+    {
+        "bureau.score": 655,
+        "bureau.is_ntc": True,
+        "primary_applicant.age": 28,
+        "business.vintage_in_years": 1.5,
+    },
+    # Sample 8: High DPD
+    {
+        "bureau.score": 640,
+        "bureau.dpd": 95,
+        "business.vintage_in_years": 4,
+        "primary_applicant.age": 50,
+    },
+]
+def get_key_by_value(value) :
+    """Helper to find key object by value string"""
+    for key in SAMPLE_STORE_KEYS:
+        if key["value"] == value:
+            return key
+    return None
+def build_key_search_text(key):
+    """Build searchable text for a key (used in embeddings)"""
+    return f"{key['label']} {key['value']} {key['group']}"

app/main.py CHANGED Viewed

	@@ -0,0 +1,305 @@

+from fastapi import FastAPI, HTTPException, Depends
+from fastapi.middleware.cors import CORSMiddleware
+from contextlib import asynccontextmanager
+from typing import Dict, Any
+import time
+import os
+import sys
+from pathlib import Path
+from dotenv import load_dotenv
+from loguru import logger
+# Add parent directory to Python path (universal fix)
+ROOT_DIR = Path(__file__).parent.parent
+sys.path.insert(0, str(ROOT_DIR))
+# Now use absolute imports
+from app.models import GenerateRuleRequest, GenerateRuleResponse, ErrorResponse
+from app.services.embedding_service import EmbeddingService
+from app.services.key_mapper import KeyMapper
+from app.services.rag_service import RAGService
+from app.services.rule_service import RuleGenerationService
+# Load environment variables
+load_dotenv()
+# Global service instances
+services = {}
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Startup and shutdown handler"""
+    # Startup - load all services
+    logger.info("="*50)
+    logger.info("Initializing JSON Logic Rule Generator API")
+    logger.info("="*50)
+    try:
+        # Initialize embedding service
+        logger.info("1. Initializing Embedding Service...")
+        embedding_service = EmbeddingService()
+        services['embedding'] = embedding_service
+        # Initialize key mapper
+        logger.info("2. Initializing Key Mapper...")
+        key_mapper = KeyMapper(embedding_service)
+        services['key_mapper'] = key_mapper
+        # Initialize RAG service
+        logger.info("3. Initializing RAG Service...")
+        rag_service = RAGService(embedding_service)
+        services['rag'] = rag_service
+        # Initialize rule service
+        logger.info("4. Initializing Rule Generation Service...")
+        rule_service = RuleGenerationService()
+        services['rule'] = rule_service
+        logger.success("="*50)
+        logger.success("All services initialized successfully!")
+        logger.success("API ready to accept requests")
+        logger.success("="*50)
+    except Exception as e:
+        logger.error(f"FATAL ERROR during initialization: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        raise
+    yield
+    # Shutdown
+    logger.info("Shutting down services...")
+    services.clear()
+# Create FastAPI app
+app = FastAPI(
+    title="JSON Logic Rule Generator API",
+    description="AI-powered API for generating JSON Logic rules from natural language with RAG & embeddings",
+    version="1.0.0",
+    lifespan=lifespan
+)
+# CORS - allow everything for now
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+def get_services():
+    """DI for services"""
+    return services
+@app.get("/", tags=["Health"])
+async def root():
+    """Root endpoint"""
+    return {
+        "status": "online",
+        "service": "JSON Logic Rule Generator API",
+        "version": "1.0.0",
+        "endpoints": {
+            "generate_rule": "/generate-rule",
+            "docs": "/docs",
+            "health": "/health"
+        }
+    }
+@app.get("/health", tags=["Health"])
+async def health_check(svc: Dict = Depends(get_services)):
+    """Health check - shows which services are loaded"""
+    return {
+        "status": "healthy",
+        "services": {
+            "embedding": "embedding" in svc,
+            "key_mapper": "key_mapper" in svc,
+            "rag": "rag" in svc,
+            "rule_generation": "rule" in svc
+        },
+        "models": {
+            "embedding_model": os.getenv("EMBED_MODEL", "all-MiniLM-L6-v2"),
+            "llm_model": "gpt-4o-mini"
+        }
+    }
+@app.post(
+    "/generate-rule",
+    response_model=GenerateRuleResponse,
+    responses={
+        400: {"model": ErrorResponse},
+        422: {"model": ErrorResponse},
+        500: {"model": ErrorResponse}
+    },
+    tags=["Rule Generation"]
+)
+async def generate_rule(
+    request: GenerateRuleRequest,
+    svc: Dict = Depends(get_services)
+) -> GenerateRuleResponse:
+    """
+    Generate JSON Logic rule from natural language
+    Process:
+    1. Maps user phrases to store keys (hybrid: embeddings + BM25 + RRF)
+    2. Retrieves relevant policies using CRAG
+    3. Generates JSON Logic with self-consistency voting
+    4. Validates on mock data
+    Returns valid JSON Logic + explanation + confidence score
+    """
+    start_time = time.time()
+    try:
+        logger.info("="*60)
+        logger.info(f"NEW REQUEST: {request.prompt[:80]}...")
+        logger.info("="*60)
+        # grab services
+        key_mapper = svc['key_mapper']
+        rag_service = svc['rag']
+        rule_service = svc['rule']
+        # Step 1: map keys
+        logger.info("[1/4] Mapping user phrases to keys...")
+        key_mappings = key_mapper.map_keys(request.prompt, top_k=5)
+        if not key_mappings:
+            # nothing found - suggest closest matches
+            all_mappings = key_mapper.map_keys(request.prompt, top_k=3)
+            raise HTTPException(
+                status_code=400,
+                detail={
+                    "error": "No matching keys found",
+                    "detail": "Prompt contains terms that couldn't be mapped to available keys",
+                    "suggestions": [
+                        {
+                            "key": m.mapped_to,
+                            "similarity": m.similarity,
+                            "phrase": m.user_phrase
+                        }
+                        for m in all_mappings
+                    ]
+                }
+            )
+        logger.debug(f"Found {len(key_mappings)} key mappings")
+        for m in key_mappings[:3]:
+            logger.debug(f"  - {m.mapped_to}: {m.similarity:.3f}")
+        # Step 2: add extra context if provided
+        if request.context_docs:
+            logger.info(f"[2/4] Adding {len(request.context_docs)} context documents...")
+            rag_service.add_documents(request.context_docs)
+        # Step 3: get relevant policies
+        logger.info("[3/4] Retrieving relevant policies (CRAG)...")
+        policy_docs, policy_relevance = rag_service.retrieve_with_crag(
+            request.prompt,
+            top_k=2
+        )
+        policy_context = rag_service.format_context(policy_docs)
+        logger.debug(f"Policy relevance: {policy_relevance:.3f}")
+        # Step 4: generate the rule
+        logger.info("[4/4] Generating JSON Logic rule...")
+        rule_result = rule_service.generate_rule(
+            prompt=request.prompt,
+            key_mappings=key_mappings,
+            policy_context=policy_context,
+            num_variants=3
+        )
+        # calculate final confidence
+        confidence_score = rule_service.calculate_confidence_score(
+            rule_result,
+            key_mappings,
+            policy_relevance
+        )
+        # build response
+        response = GenerateRuleResponse(
+            json_logic=rule_result['json_logic'],
+            explanation=rule_result['explanation'],
+            used_keys=rule_result['used_keys'],
+            key_mappings=key_mappings,
+            confidence_score=confidence_score
+        )
+        elapsed = time.time() - start_time
+        logger.success("="*60)
+        logger.success(f"SUCCESS - Generated rule in {elapsed:.2f}s")
+        logger.success(f"Confidence: {confidence_score:.3f}")
+        logger.success("="*60)
+        return response
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"ERROR: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(
+            status_code=500,
+            detail={
+                "error": "Internal server error",
+                "detail": str(e)
+            }
+        )
+@app.get("/keys", tags=["Utilities"])
+async def list_available_keys():
+    """List all available keys grouped by category"""
+    from app.constants import SAMPLE_STORE_KEYS
+    # group by category
+    grouped = {}
+    for key in SAMPLE_STORE_KEYS:
+        group = key['group']
+        if group not in grouped:
+            grouped[group] = []
+        grouped[group].append({
+            'value': key['value'],
+            'label': key['label']
+        })
+    return {
+        "total_keys": len(SAMPLE_STORE_KEYS),
+        "groups": list(grouped.keys()),
+        "keys_by_group": grouped
+    }
+@app.get("/policies", tags=["Utilities"])
+async def list_policies():
+    """List all policy documents used by RAG"""
+    from app.constants import POLICIES
+    return {
+        "total_policies": len(POLICIES),
+        "policies": [
+            {"id": i+1, "text": policy}
+            for i, policy in enumerate(POLICIES)
+        ]
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "app.main:app",
+        host="127.0.0.1",
+        port=8000,
+        reload=True,
+        log_level="info"
+    )

app/models.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from pydantic import BaseModel, Field, ConfigDict
+from typing import List, Dict, Any, Optional
+class KeyMapping(BaseModel):
+    """Maps user phrase to actual store key"""
+    user_phrase: str
+    mapped_to: str
+    similarity: float  # 0 to 1
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "user_phrase": "bureau score",
+                "mapped_to": "bureau.score",
+                "similarity": 0.93
+            }
+        }
+    )
+class GenerateRuleRequest(BaseModel):
+    """Request for generating a rule"""
+    prompt: str = Field(min_length=10, max_length=500)
+    context_docs: Optional[List[str]] = Field(
+        default=None,
+        description="Optional additional policy documents to consider"
+    )
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "prompt": "Approve if bureau score > 700 and business vintage at least 3 years",
+                "context_docs": ["Custom policy: Minimum age 25"]
+            }
+        }
+    )
+class GenerateRuleResponse(BaseModel):
+    """What we send back after generating a rule"""
+    json_logic: Dict[str, Any]
+    explanation: str
+    used_keys: List[str]
+    key_mappings: List[KeyMapping]
+    confidence_score: float
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "json_logic": {
+                    "and": [
+                        {">": [{"var": "bureau.score"}, 700]},
+                        {">=": [{"var": "business.vintage_in_years"}, 3]}
+                    ]
+                },
+                "explanation": "Approves applications where bureau score exceeds 700 AND business vintage is at least 3 years.",
+                "used_keys": ["bureau.score", "business.vintage_in_years"],
+                "key_mappings": [
+                    {
+                        "user_phrase": "bureau score",
+                        "mapped_to": "bureau.score",
+                        "similarity": 0.93
+                    }
+                ],
+                "confidence_score": 0.89
+            }
+        }
+    )
+class ErrorResponse(BaseModel):
+    """Error format"""
+    error: str
+    detail: Optional[str] = None
+    suggestions: Optional[List[Dict[str, Any]]] = None  # suggested keys when nothing matches
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "error": "No matching keys found",
+                "detail": "Prompt contains fields not in our key list",
+                "suggestions": [
+                    {"key": "bureau.score", "similarity": 0.45}
+                ]
+            }
+        }
+    )