File size: 7,365 Bytes
64e5ee2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
from fastapi import FastAPI, File, UploadFile, Request
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
import pandas as pd
import io
import uvicorn
from reconciliation import ReconciliationEngine
from anomaly import AnomalyDetector
from llm_explainer import LLMExplainer
from fraud_graph import FraudGraph
from gst_api import GSTGatewayMock
import os

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize engines
try:
    recon_engine = ReconciliationEngine(threshold=85.0)
except Exception as e:
    recon_engine = None

anomaly_detector = AnomalyDetector(contamination=0.05)
llm_explainer = LLMExplainer()
fraud_graph = FraudGraph()
gst_api = GSTGatewayMock()

@app.post("/api/reconcile")
async def api_reconcile(books: UploadFile = File(...), gst: UploadFile = File(...)):
    books_content = await books.read()
    gst_content = await gst.read()
    
    try:
        books_df = pd.read_csv(io.BytesIO(books_content))
        gst_df = pd.read_csv(io.BytesIO(gst_content))
    except Exception as e:
        return {"error": "Invalid CSV format. Please ensure you are uploading valid CSV files, not PDFs or Excel documents."}
        
    return process_data(books_df, gst_df)

@app.post("/api/explain")
async def api_explain(request: Request):
    data = await request.json()
    row = data.get("row", {})
    match_status = data.get("match_status", "Anomaly")
    b_vendor = data.get("b_vendor", "N/A")
    g_vendor = data.get("g_vendor", "N/A")
    b_amount = data.get("b_amount", 0)
    g_amount = data.get("g_amount", 0)
    
    explanation = llm_explainer.explain_discrepancy(row, match_status, b_vendor, g_vendor, b_amount, g_amount)
    return {"explanation": explanation}

@app.post("/api/fetch_live")
async def api_fetch_live(books: UploadFile = File(...)):
    books_content = await books.read()
    books_df = pd.read_csv(io.BytesIO(books_content))
    
    gst_df = gst_api.fetch_gst_data("2023-01-01", "2023-12-31", "27AADCB2230M1Z2")
    return process_data(books_df, gst_df)

def process_data(books_df, gst_df):
    if recon_engine is None:
        return {"error": "Reconciliation engine failed to initialize"}
        
    try:
        merged_df = recon_engine.reconcile(books_df, gst_df)
    except Exception as e:
        return {"error": f"Reconciliation failed: {str(e)}"}
        
    books_with_anomalies = anomaly_detector.detect_anomalies(books_df, amount_col='Amount')
    
    if 'InvoiceID' in merged_df.columns and 'InvoiceID' in books_with_anomalies.columns:
        merged_df = pd.merge(merged_df, books_with_anomalies[['InvoiceID', 'IsAnomaly', 'AnomalyScore']], 
                             on='InvoiceID', how='left')
                             
    discrepancies = merged_df[merged_df['MatchStatus'] != 'Exact Match'].copy()
    
    recon_results = merged_df.fillna("").infer_objects(copy=False).to_dict(orient="records")
    anomalies = merged_df[merged_df['IsAnomaly'] == True].fillna("").infer_objects(copy=False).to_dict(orient="records")
    
    # Compute chart data
    recon_trend = [0] * 12
    discrep_trend = [0] * 12
    anomaly_dist = {"critical": [0]*12, "high": [0]*12, "medium": [0]*12}
    
    # Try to extract month if InvoiceDate exists
    date_col = 'InvoiceDate' if 'InvoiceDate' in merged_df.columns else 'InvoiceDate_books' if 'InvoiceDate_books' in merged_df.columns else None
    
    if date_col and date_col in merged_df.columns:
        merged_df['Month'] = pd.to_datetime(merged_df[date_col], errors='coerce').dt.month
        merged_df['Month'] = merged_df['Month'].fillna(1).astype(int)
        
        monthly_recon = merged_df[merged_df['MatchStatus'] == 'Exact Match'].groupby('Month').size()
        for m, count in monthly_recon.items():
            if 1 <= m <= 12:
                recon_trend[m-1] = int(count)
                
        monthly_discrep = merged_df[merged_df['MatchStatus'] != 'Exact Match'].groupby('Month').size()
        for m, count in monthly_discrep.items():
            if 1 <= m <= 12:
                discrep_trend[m-1] = int(count)
                
        monthly_anomalies = merged_df[merged_df['IsAnomaly'] == True]
        for _, row in monthly_anomalies.iterrows():
            m = int(row.get('Month', 1))
            if 1 <= m <= 12:
                score = row.get('AnomalyScore', 0)
                if score > 0.3:
                    anomaly_dist["critical"][m-1] += 1
                elif score > 0.1:
                    anomaly_dist["high"][m-1] += 1
                else:
                    anomaly_dist["medium"][m-1] += 1
    
    # Run Fraud Graph Analysis
    try:
        fraud_graph.build_graph(merged_df, source_col='VendorName_books', target_col='VendorName_gst', amount_col='Amount_books')
        cycles = fraud_graph.detect_cycles()
        risk_scores = fraud_graph.analyze_risk_nodes()
        
        fraud_nodes = [{"id": str(n), "label": str(n), "size": 15, "color": "#64748b", "risk_score": risk_scores.get(n, 0.0)} for n in fraud_graph.graph.nodes()]
        fraud_edges = [{"from": list(fraud_graph.graph.nodes()).index(u), "to": list(fraud_graph.graph.nodes()).index(v), "weight": d.get('weight', 0)} for u, v, d in fraud_graph.graph.edges(data=True)]
        max_risk = max(risk_scores.values()) if risk_scores else 0.0
        overall_risk_score = min(10.0, max_risk * 100) # Arbitrary scale to 0-10
    except Exception as e:
        cycles = []
        fraud_nodes = []
        fraud_edges = []
        overall_risk_score = 0.0
        
    # Get FAISS Stats
    try:
        ntotal = recon_engine.index.ntotal if recon_engine and recon_engine.index else 0
        mem_mb = round(ntotal * 384 * 4 / (1024 * 1024), 2)
    except:
        ntotal = 0
        mem_mb = 0

    return {
        "summary": {
            "total_books": len(books_df),
            "total_gst": len(gst_df),
            "exact": len(merged_df[merged_df['MatchStatus'] == 'Exact Match']),
            "fuzzy": len(merged_df[merged_df['MatchStatus'].str.contains('Fuzzy', na=False)]),
            "semantic": len(merged_df[merged_df['MatchStatus'].str.contains('Semantic', na=False)]),
            "discrepancies": len(discrepancies),
            "unmatched": len(merged_df[merged_df['MatchStatus'].str.contains('Mismatch', na=False) | merged_df['MatchStatus'].str.contains('Missing', na=False)]),
            "anomalies": len(anomalies),
            "fraud_rings": len(cycles),
            "overall_risk_score": overall_risk_score
        },
        "charts": {
            "recon_trend": recon_trend,
            "discrep_trend": discrep_trend,
            "anomaly_dist": anomaly_dist
        },
        "fraud_network": {
            "nodes": fraud_nodes,
            "edges": fraud_edges,
            "cycles": cycles
        },
        "faiss_stats": {
            "ntotal": ntotal,
            "memory_mb": mem_mb
        },
        "reconciliation": recon_results[:50], # Limit payload for UI
        "anomalies": anomalies[:50]
    }

# Serve the frontend files
app.mount("/", StaticFiles(directory=".", html=True), name="static")

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)