File size: 7,365 Bytes
64e5ee2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | from fastapi import FastAPI, File, UploadFile, Request
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
import pandas as pd
import io
import uvicorn
from reconciliation import ReconciliationEngine
from anomaly import AnomalyDetector
from llm_explainer import LLMExplainer
from fraud_graph import FraudGraph
from gst_api import GSTGatewayMock
import os
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Initialize engines
try:
recon_engine = ReconciliationEngine(threshold=85.0)
except Exception as e:
recon_engine = None
anomaly_detector = AnomalyDetector(contamination=0.05)
llm_explainer = LLMExplainer()
fraud_graph = FraudGraph()
gst_api = GSTGatewayMock()
@app.post("/api/reconcile")
async def api_reconcile(books: UploadFile = File(...), gst: UploadFile = File(...)):
books_content = await books.read()
gst_content = await gst.read()
try:
books_df = pd.read_csv(io.BytesIO(books_content))
gst_df = pd.read_csv(io.BytesIO(gst_content))
except Exception as e:
return {"error": "Invalid CSV format. Please ensure you are uploading valid CSV files, not PDFs or Excel documents."}
return process_data(books_df, gst_df)
@app.post("/api/explain")
async def api_explain(request: Request):
data = await request.json()
row = data.get("row", {})
match_status = data.get("match_status", "Anomaly")
b_vendor = data.get("b_vendor", "N/A")
g_vendor = data.get("g_vendor", "N/A")
b_amount = data.get("b_amount", 0)
g_amount = data.get("g_amount", 0)
explanation = llm_explainer.explain_discrepancy(row, match_status, b_vendor, g_vendor, b_amount, g_amount)
return {"explanation": explanation}
@app.post("/api/fetch_live")
async def api_fetch_live(books: UploadFile = File(...)):
books_content = await books.read()
books_df = pd.read_csv(io.BytesIO(books_content))
gst_df = gst_api.fetch_gst_data("2023-01-01", "2023-12-31", "27AADCB2230M1Z2")
return process_data(books_df, gst_df)
def process_data(books_df, gst_df):
if recon_engine is None:
return {"error": "Reconciliation engine failed to initialize"}
try:
merged_df = recon_engine.reconcile(books_df, gst_df)
except Exception as e:
return {"error": f"Reconciliation failed: {str(e)}"}
books_with_anomalies = anomaly_detector.detect_anomalies(books_df, amount_col='Amount')
if 'InvoiceID' in merged_df.columns and 'InvoiceID' in books_with_anomalies.columns:
merged_df = pd.merge(merged_df, books_with_anomalies[['InvoiceID', 'IsAnomaly', 'AnomalyScore']],
on='InvoiceID', how='left')
discrepancies = merged_df[merged_df['MatchStatus'] != 'Exact Match'].copy()
recon_results = merged_df.fillna("").infer_objects(copy=False).to_dict(orient="records")
anomalies = merged_df[merged_df['IsAnomaly'] == True].fillna("").infer_objects(copy=False).to_dict(orient="records")
# Compute chart data
recon_trend = [0] * 12
discrep_trend = [0] * 12
anomaly_dist = {"critical": [0]*12, "high": [0]*12, "medium": [0]*12}
# Try to extract month if InvoiceDate exists
date_col = 'InvoiceDate' if 'InvoiceDate' in merged_df.columns else 'InvoiceDate_books' if 'InvoiceDate_books' in merged_df.columns else None
if date_col and date_col in merged_df.columns:
merged_df['Month'] = pd.to_datetime(merged_df[date_col], errors='coerce').dt.month
merged_df['Month'] = merged_df['Month'].fillna(1).astype(int)
monthly_recon = merged_df[merged_df['MatchStatus'] == 'Exact Match'].groupby('Month').size()
for m, count in monthly_recon.items():
if 1 <= m <= 12:
recon_trend[m-1] = int(count)
monthly_discrep = merged_df[merged_df['MatchStatus'] != 'Exact Match'].groupby('Month').size()
for m, count in monthly_discrep.items():
if 1 <= m <= 12:
discrep_trend[m-1] = int(count)
monthly_anomalies = merged_df[merged_df['IsAnomaly'] == True]
for _, row in monthly_anomalies.iterrows():
m = int(row.get('Month', 1))
if 1 <= m <= 12:
score = row.get('AnomalyScore', 0)
if score > 0.3:
anomaly_dist["critical"][m-1] += 1
elif score > 0.1:
anomaly_dist["high"][m-1] += 1
else:
anomaly_dist["medium"][m-1] += 1
# Run Fraud Graph Analysis
try:
fraud_graph.build_graph(merged_df, source_col='VendorName_books', target_col='VendorName_gst', amount_col='Amount_books')
cycles = fraud_graph.detect_cycles()
risk_scores = fraud_graph.analyze_risk_nodes()
fraud_nodes = [{"id": str(n), "label": str(n), "size": 15, "color": "#64748b", "risk_score": risk_scores.get(n, 0.0)} for n in fraud_graph.graph.nodes()]
fraud_edges = [{"from": list(fraud_graph.graph.nodes()).index(u), "to": list(fraud_graph.graph.nodes()).index(v), "weight": d.get('weight', 0)} for u, v, d in fraud_graph.graph.edges(data=True)]
max_risk = max(risk_scores.values()) if risk_scores else 0.0
overall_risk_score = min(10.0, max_risk * 100) # Arbitrary scale to 0-10
except Exception as e:
cycles = []
fraud_nodes = []
fraud_edges = []
overall_risk_score = 0.0
# Get FAISS Stats
try:
ntotal = recon_engine.index.ntotal if recon_engine and recon_engine.index else 0
mem_mb = round(ntotal * 384 * 4 / (1024 * 1024), 2)
except:
ntotal = 0
mem_mb = 0
return {
"summary": {
"total_books": len(books_df),
"total_gst": len(gst_df),
"exact": len(merged_df[merged_df['MatchStatus'] == 'Exact Match']),
"fuzzy": len(merged_df[merged_df['MatchStatus'].str.contains('Fuzzy', na=False)]),
"semantic": len(merged_df[merged_df['MatchStatus'].str.contains('Semantic', na=False)]),
"discrepancies": len(discrepancies),
"unmatched": len(merged_df[merged_df['MatchStatus'].str.contains('Mismatch', na=False) | merged_df['MatchStatus'].str.contains('Missing', na=False)]),
"anomalies": len(anomalies),
"fraud_rings": len(cycles),
"overall_risk_score": overall_risk_score
},
"charts": {
"recon_trend": recon_trend,
"discrep_trend": discrep_trend,
"anomaly_dist": anomaly_dist
},
"fraud_network": {
"nodes": fraud_nodes,
"edges": fraud_edges,
"cycles": cycles
},
"faiss_stats": {
"ntotal": ntotal,
"memory_mb": mem_mb
},
"reconciliation": recon_results[:50], # Limit payload for UI
"anomalies": anomalies[:50]
}
# Serve the frontend files
app.mount("/", StaticFiles(directory=".", html=True), name="static")
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)
|