Spaces:

PARTHA181098
/

InvoiceAgenticAI

Running

App Files Files Community

PARTHASAKHAPAUL commited on 11 days ago

Commit

2b44e69

1 Parent(s): f64784b

Initial Commit 0.1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Project/.dockerignore +12 -0
Project/.gitignore +12 -0
Project/agents/audit_agent.py +693 -0
Project/agents/base_agent.py +221 -0
Project/agents/document_agent.py +411 -0
Project/agents/escalation_agent.py +315 -0
Project/agents/forecast_agent.py +253 -0
Project/agents/insights_agent.py +107 -0
Project/agents/payment_agent.py +348 -0
Project/agents/risk_agent.py +644 -0
Project/agents/smart_explainer_agent.py +220 -0
Project/agents/validation_agent.py +357 -0
Project/bounding_box.py +138 -0
Project/data/annotated_invoice.pdf +0 -0
Project/data/invoices/Invoice-01.pdf +0 -0
Project/data/invoices/Invoice-02.pdf +0 -0
Project/data/invoices/Invoice-03.pdf +0 -0
Project/data/invoices/Invoice-04.pdf +0 -0
Project/data/invoices/Invoice-05.pdf +0 -0
Project/data/invoices/Invoice-06.pdf +0 -0
Project/data/invoices/Invoice-07.pdf +0 -0
Project/data/invoices/Invoice-08.pdf +0 -0
Project/data/invoices/Invoice-09.pdf +0 -0
Project/data/invoices/Invoice-10.pdf +0 -0
Project/data/invoices/Invoice-11.pdf +0 -0
Project/data/invoices/Invoice-12.pdf +0 -0
Project/data/invoices/Invoice-13.pdf +0 -0
Project/data/invoices/Invoice-14.pdf +0 -0
Project/data/invoices/Invoice-15.pdf +0 -0
Project/data/invoices/Invoice-16.pdf +0 -0
Project/data/invoices/Invoice-17.pdf +0 -0
Project/data/invoices/Invoice-18.pdf +0 -0
Project/data/invoices/Invoice-19.pdf +0 -0
Project/data/invoices/Invoice-20.pdf +0 -0
Project/data/invoices/Invoice-21.pdf +0 -0
Project/data/invoices/Invoice-22.pdf +0 -0
Project/data/invoices/Invoice-23.pdf +0 -0
Project/data/invoices/Invoice-24.pdf +0 -0
Project/data/invoices/Invoice-25.pdf +0 -0
Project/data/invoices/Invoice-26.pdf +0 -0
Project/data/invoices/Invoice-27.pdf +0 -0
Project/data/invoices/Invoice-28.pdf +0 -0
Project/data/invoices/Invoice-29.pdf +0 -0
Project/data/invoices/Invoice-30.pdf +0 -0
Project/data/invoices/Invoice-31.pdf +0 -0
Project/data/invoices/Invoice-32.pdf +0 -0
Project/data/invoices/Invoice-33.pdf +0 -0
Project/data/invoices/Invoice-34.pdf +0 -0
Project/data/invoices/Invoice-35.pdf +0 -0
Project/data/invoices/Invoice-36.pdf +0 -0

Project/.dockerignore ADDED Viewed

	@@ -0,0 +1,12 @@

+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.env
+.git
+.gitignore
+myenv/
+venv/
+.env/
+.venv/
+tests.py

Project/.gitignore ADDED Viewed

	@@ -0,0 +1,12 @@

+myenv
+.env
+key_stats.json
+tests.py
+Dockerfile
+run.py
+tests.py
+__pycache__/
+*.pyc
+logs/audit
+nodes
+output/escalations

Project/agents/audit_agent.py ADDED Viewed

	@@ -0,0 +1,693 @@

+"""Audit Agent for Invoice Processing"""
+# TODO: Implement agent
+import os
+import json
+import pandas as pd
+from typing import Dict, Any, List, Optional
+from datetime import datetime, timedelta
+import google.generativeai as genai
+from dotenv import load_dotenv
+import time
+from statistics import mean
+from agents.base_agent import BaseAgent
+from state import (
+    InvoiceProcessingState, ProcessingStatus, PaymentStatus,
+    ValidationStatus, RiskLevel
+)
+from utils.logger import StructuredLogger
+load_dotenv()
+class AuditAgent(BaseAgent):
+    """Agent responsible for audit trail generation, compliance tracking, and reporting"""
+    def __init__(self, config: Dict[str, Any] = None):
+        super().__init__("audit_agent",config)
+        self.logger = StructuredLogger("AuditAgent")
+        # --- Health tracking ---
+        self.execution_history: List[Dict[str, Any]] = []
+        self.max_history = 50  # store last 50 runs
+    def _validate_preconditions(self, state: InvoiceProcessingState, workflow_type) -> bool:
+        """
+        Ensure that the state object is properly initialized before invoice processing begins.
+        Checks for presence of critical fields like process_id, file_name, and timestamps.
+        """
+        if not state:
+            return False
+        # Must have valid process id and file name
+        if not getattr(state, "process_id", None) or not getattr(state, "file_name", None):
+            return False
+        # Must have timestamps and valid status
+        if not getattr(state, "created_at", None) or not getattr(state, "overall_status", None):
+            return False
+        # Should not already be marked complete
+        if state.overall_status in ("failed", "pending"):
+            return False
+        return True
+    def _validate_postconditions(self, state: InvoiceProcessingState) -> bool:
+        """
+        Validate that all expected outputs and audit data are present after processing.
+        Ensures that critical workflow components completed successfully.
+        """
+        if not state:
+            return False
+        # Must have processed invoice data and validation results
+        if not state.invoice_data or not state.validation_result:
+            return False
+        # Must have at least one audit entry for traceability
+        if not state.audit_trail or len(state.audit_trail) == 0:
+            return False
+        # Risk or payment results may be optional, but check consistency
+        if state.risk_assessment and state.risk_assessment.risk_score > 1.0:
+            return False  # sanity check for invalid scores
+        # Final status should not be pending anymore
+        if state.overall_status == "pending":
+            return False
+        return True
+    async def execute(self, state: InvoiceProcessingState, workflow_type) -> InvoiceProcessingState:
+        """Main audit generation workflow"""
+        self.logger.logger.info("Starting audit trail generation")
+        start_time = time.time()
+        success = False
+        try:
+            if not self._validate_preconditions(state, workflow_type):
+                self.logger.logger.error("Preconditions not met for audit generation")
+                state.overall_status = ProcessingStatus.FAILED
+                self._log_decision(state, "Audit Failed", "Preconditions not met", confidence=0.0)
+                return state
+            audit_record = await self._generate_audit_record(state)
+            print("audit_record---------", audit_record)
+            compliance_results = await self._perform_compliance_checks(state,audit_record)
+            print("compliance_results---------", compliance_results)
+            audit_summary = await self._generate_audit_summary(state,audit_record,compliance_results)
+            print("audit_summary---------", audit_summary)
+            await self._save_audit_records(state,audit_record,audit_summary,compliance_results)
+            reportable_events = await self._identify_reportable_events(state,audit_record)
+            print("reportable_events---------", reportable_events)
+            await self._generate_audit_alerts(state,reportable_events)
+            state.audit_trail = audit_record.get("audit_trail",[])
+            print("state.audit_trail---------", state.audit_trail)
+            state.compliance_report = compliance_results
+            state.current_agent = "audit_agent"
+            state.overall_status = "completed"
+            self.logger.logger.info("Audit trail and compliance generated successfully")
+            success = True
+            self._log_decision(
+                state,
+                "Auditing Successful",
+                "Auditing Processed",
+                100.0,
+                state.process_id
+            )
+            state.audit_trail[-1]
+            return state
+        except Exception as e:
+            self.logger.logger.error(f"Audit agent execution failed: {e}")
+            state.overall_status = ProcessingStatus.FAILED
+            return state
+        finally:
+            duration_ms = round((time.time() - start_time) * 1000, 2)
+            self._record_execution(success, duration_ms, state)
+    async def _generate_audit_record(self, state: InvoiceProcessingState) -> Dict[str, Any]:
+        """
+        Aggregate and structure all agent-level logs into a consistent audit report.
+        Uses the state's existing audit_trail list and agent_metrics for detailed tracking.
+        """
+        self.logger.logger.debug("Generating audit record")
+        if not isinstance(state, InvoiceProcessingState):
+            raise ValueError("Invalid state object passed to _generate_audit_record")
+        audit_trail_records = []
+        for entry in getattr(state, "audit_trail", []):
+            record = {
+                "process_id": getattr(entry, "process_id", state.process_id),
+                "timestamp": getattr(entry, "timestamp", datetime.utcnow().isoformat() + "Z"),
+                "agent_name": getattr(entry, "agent_name", "unknown"),
+                "action": getattr(entry, "action", "undefined"),
+                # "status": getattr(entry, "status", "completed"),
+                "details": getattr(entry, "details", {}),
+                # "duration_ms": getattr(entry, "details", {}).get("duration_ms", 0),
+                # "error_message": getattr(entry, "details", {}).get("error_message", None),
+            }
+            audit_trail_records.append(record)
+        # Include agent metrics summary for full traceability
+        metrics_summary = {
+            agent: {
+                "executions": getattr(m, "processed_count", 0),
+                "success_rate": getattr(m, "success_rate", 0),
+                "failures": getattr(m, "errors", 0),
+                "avg_duration_ms": getattr(m, "avg_latency_ms", 0.0),
+                "last_run_at": getattr(m, "last_run_at", None),
+            }
+            for agent, m in getattr(state, "agent_metrics", {}).items()
+        }
+        audit_report = {
+            "process_id": state.process_id,
+            "created_at": state.created_at.isoformat() + "Z",
+            "updated_at": state.updated_at.isoformat() + "Z",
+            "total_entries": len(audit_trail_records),
+            "audit_trail": audit_trail_records,
+            "metrics_summary": metrics_summary,
+        }
+        self.logger.logger.info(
+            f"Audit record generated with {len(audit_trail_records)} entries for process {state.process_id}"
+        )
+        return audit_report
+    async def _perform_compliance_checks(
+        self, state: InvoiceProcessingState, audit_record: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Perform SOX, GDPR, and financial compliance validations.
+        Aggregates results from internal compliance check methods and produces
+        a structured compliance report.
+        """
+        self.logger.logger.debug("Performing compliance checks for process %s", state.process_id)
+        # Defensive: ensure proper structures
+        if not isinstance(state, InvoiceProcessingState):
+            raise ValueError("Invalid state object passed to _perform_compliance_checks")
+        if not isinstance(audit_record, dict):
+            raise ValueError("Invalid audit record structure")
+        # Run all compliance sub-checks safely
+        sox = self._check_sox_compliance(state, audit_record) or {}
+        privacy = self._check_data_privacy_compliance(state, audit_record) or {}
+        financial = self._check_financial_controls(state, audit_record) or {}
+        completeness = self._check_audit_trail_completeness(state, audit_record) or {}
+        # Normalize results for consistency
+        sox_issues = sox.get("issues", [])
+        privacy_issues = privacy.get("issues", [])
+        financial_issues = financial.get("issues", [])
+        is_complete = completeness.get("complete", True)
+        # Compose structured compliance summary
+        compliance_report = {
+            "process_id": state.process_id,
+            "timestamp": datetime.utcnow().isoformat() + "Z",
+            "sox_compliance": "compliant" if not sox_issues else "non_compliant",
+            "gdpr_compliance": "compliant" if not privacy_issues else "non_compliant",
+            "financial_controls": "passed" if not financial_issues else "failed",
+            "audit_trail_complete": is_complete,
+            "retention_policy": getattr(self.config, "retention_policy", "7_years"),
+            "encryption_status": "encrypted",
+            "issues": {
+                "sox": sox_issues,
+                "privacy": privacy_issues,
+                "financial": financial_issues,
+            },
+        }
+        # Optional: attach compliance report to the state for future use
+        setattr(state, "compliance_report", compliance_report)
+        state.updated_at = datetime.utcnow()
+        self.logger.logger.info(
+            f"Compliance checks completed for process {state.process_id}: "
+            f"SOX={compliance_report['sox_compliance']}, "
+            f"GDPR={compliance_report['gdpr_compliance']}, "
+            f"Financial={compliance_report['financial_controls']}"
+        )
+        return compliance_report
+    def _check_sox_compliance(
+        self,
+        state: InvoiceProcessingState,
+        audit_record: Dict[str, Any]
+    ) -> Dict[str, List[str]]:
+        """
+        Intelligent SOX compliance verification.
+        Checks that all approval steps, segregation of duties,
+        and key sign-offs are properly recorded and timestamped.
+        """
+        issues = []
+        approval_chain = getattr(state, "approval_chain", [])
+        if not approval_chain:
+            issues.append("Missing approval chain records")
+        else:
+            # Verify each approval step includes signer and timestamp
+            for step in approval_chain:
+                if not step.get("approved_by") or not step.get("timestamp"):
+                    issues.append(f"Incomplete approval step: {step}")
+            # Optional: check segregation of duties
+            approvers = [a.get("approved_by") for a in approval_chain if a.get("approved_by")]
+            if len(set(approvers)) < len(approvers):
+                issues.append("Potential conflict of interest: repeated approver detected")
+        VALID_ACTIONS = {
+        "Extraction Successful",
+        "Validation Successful",
+        "Risk Assessment Successful",
+        "Agent Successfully Executed",
+        "approved"
+        }
+        has_final_approval = all(
+            any(keyword in entry.get("action", "") for keyword in VALID_ACTIONS)
+            for entry in audit_record.get("audit_trail", [])
+        )
+        if not has_final_approval:
+            issues.append("Some approval event yet to successful in audit trail")
+        return {"issues": issues}
+    def _check_data_privacy_compliance(
+        self,
+        state: InvoiceProcessingState,
+        audit_record: Dict[str, Any]
+    ) -> Dict[str, List[str]]:
+        """
+        Validate GDPR / Data Privacy compliance.
+        Ensures that no unmasked personal or financial data is logged or stored.
+        """
+        issues = []
+        text_repr = str(audit_record).lower()
+        # PII patterns to scan for (we can expand this list)
+        suspicious_patterns = ["@gmail.com", "@yahoo.com", "ssn", "credit card", "bank_account"]
+        for pattern in suspicious_patterns:
+            if pattern in text_repr:
+                issues.append(f"Unmasked PII detected: '{pattern}'")
+        # Ensure encryption and retention policy
+        # if getattr(state, "config", {}).get("encryption_status") != "encrypted":
+        #     issues.append("Data encryption not confirmed")
+        # if "retention_policy" not in getattr(state, "config", {}):
+        #     issues.append("Retention policy not defined")
+        return {"issues": issues}
+    def _check_financial_controls(
+        self,
+        state: InvoiceProcessingState,
+        audit_record: Dict[str, Any]
+    ) -> Dict[str, List[str]]:
+        """
+        Validate financial control compliance.
+        Ensures that transactions, approvals, and risk assessments
+        are properly recorded before payment release.
+        """
+        issues = []
+        # Check for missing financial artifacts
+        if not getattr(state, "payment_decision", None):
+            issues.append("Missing payment decision records")
+        if not getattr(state, "validation_result", None):
+            issues.append("Missing validation result for payment control")
+        if state.validation_result and state.validation_result.validation_status == "invalid":
+            issues.append("Invoice marked invalid but payment decision exists")
+        # Cross-check audit trail for financial actions
+        actions = [a.get("action", "").lower() for a in audit_record.get("audit_trail", [])]
+        if not any("approved" in a for a in actions):
+            issues.append("No payment-related activity recorded in audit trail")
+        return {"issues": issues}
+    def _check_audit_trail_completeness(
+        self,
+        state: InvoiceProcessingState,
+        audit_record: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Ensure all mandatory agents and workflow stages were executed and logged.
+        Validates sequence integrity and timestamp order.
+        """
+        required_agents = ["document_agent", "validation_agent", "risk_agent", "payment_agent"]
+        logged_agents = [x.get("agent_name") for x in audit_record.get("audit_trail", [])]
+        missing = [a for a in required_agents if a not in logged_agents]
+        complete = len(missing) == 0
+        timestamps = []
+        for e in audit_record.get("audit_trail", []):
+            ts = e.get("timestamp")
+            if ts:
+                try:
+                    if isinstance(ts, datetime):
+                        timestamps.append(ts)
+                    else:
+                        # Normalize 'Z' and try parsing
+                        ts_str = str(ts).replace("Z", "+00:00")
+                        try:
+                            timestamps.append(datetime.fromisoformat(ts_str))
+                        except Exception:
+                            try:
+                                timestamps.append(datetime.strptime(ts_str, "%Y-%m-%d %H:%M:%S.%f"))
+                            except Exception:
+                                timestamps.append(datetime.strptime(ts_str, "%Y-%m-%d %H:%M:%S"))
+                except Exception:
+                    self.logger.logger.warning(f"Invalid timestamp format in audit trail: {ts}")
+        if timestamps and timestamps != sorted(timestamps):
+            missing.append("Non-sequential timestamps detected in audit trail")
+        # Check for duplicate agent entries
+        if len(logged_agents) != len(set(logged_agents)):
+            missing.append("Duplicate agent entries found in audit trail")
+        return {"complete": complete, "missing": missing}
+    async def _generate_audit_summary(
+        self,
+        state: InvoiceProcessingState,
+        audit_record: Dict[str, Any],
+        compliance_results: Dict[str, Any]
+    ) -> str:
+        """
+        Generate a structured textual audit summary report.
+        Combines audit record data and compliance results into a concise, test-friendly JSON summary.
+        """
+        self.logger.logger.debug("Generating audit summary for process %s", state.process_id)
+        # Defensive: ensure valid input types
+        if not isinstance(state, InvoiceProcessingState):
+            raise ValueError("Invalid state object passed to _generate_audit_summary")
+        if not isinstance(audit_record, dict):
+            raise ValueError("Invalid audit record structure")
+        if not isinstance(compliance_results, dict):
+            raise ValueError("Invalid compliance results structure")
+        # Extract audit trail count safely
+        total_actions = len(audit_record.get("audit_trail", []))
+        # Safely extract compliance keys
+        sox_status = compliance_results.get("sox_compliance", "unknown")
+        gdpr_status = compliance_results.get("gdpr_compliance", "unknown")
+        financial_status = compliance_results.get("financial_controls", "unknown")
+        retention_policy = compliance_results.get("retention_policy", "7_years")
+        # Build structured summary
+        summary_data = {
+            "process_id": state.process_id,
+            "generated_at": datetime.utcnow().isoformat() + "Z",
+            "total_actions": total_actions,
+            "overall_status": getattr(state, "overall_status", "UNKNOWN"),
+            "compliance": {
+                "SOX": sox_status,
+                "GDPR": gdpr_status,
+                "Financial": financial_status,
+            },
+            "retention_policy": retention_policy,
+        }
+        # Attach to state for post-validation
+        setattr(state, "audit_summary", summary_data)
+        state.updated_at = datetime.utcnow()
+        # Log completion
+        self.logger.logger.info(
+            f"Audit summary generated for process {state.process_id}: "
+            f"Actions={total_actions}, SOX={sox_status}, GDPR={gdpr_status}, Financial={financial_status}"
+        )
+        # Return formatted JSON for easy test validation or storage
+        return json.dumps(summary_data, indent=2)
+    async def _save_audit_records(self, state: InvoiceProcessingState,
+                                audit_record: Dict[str, Any],
+                                audit_summary: str,
+                                compliance_results: Dict[str, Any]):
+        """Save audit log to file"""
+        os.makedirs("logs/audit",exist_ok=True)
+        file_path = f"logs/audit/audit_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.json"
+        with open(file_path,"w") as f:
+            json.dump({
+                "audit_trail": audit_record["audit_trail"],
+                "summary": json.loads(audit_summary),
+                "compliance":compliance_results
+            },f,indent=2, default=str)
+        self.logger.logger.info(f"Audit record saved:{file_path}")
+    async def _identify_reportable_events(
+        self,
+        state: InvoiceProcessingState,
+        audit_record: Dict[str, Any]
+    ) -> List[Dict[str, Any]]:
+        """
+        Identify reportable anomalies or irregularities from the audit trail for compliance auditors.
+        Includes failed actions, high latency events, and repeated errors.
+        """
+        self.logger.logger.debug("Analyzing audit trail for reportable events...")
+        reportable: List[Dict[str, Any]] = []
+        audit_trail = audit_record.get("audit_trail", [])
+        if not audit_trail:
+            self.logger.logger.warning("No audit trail found for process %s", state.process_id)
+            return []
+        # Group by agent to detect repeated failures
+        failure_counts = {}
+        for entry in audit_trail:
+            # Defensive: ensure entry is a dict
+            if not isinstance(entry, dict):
+                continue
+            status = str(entry.get("status", "")).lower()
+            error_message = entry.get("error_message")
+            duration_ms = entry.get("duration_ms", 0)
+            agent = entry.get("agent_name", "unknown")
+            # Track failures for later aggregation
+            if status == "failed":
+                failure_counts[agent] = failure_counts.get(agent, 0) + 1
+            # Identify anomalies
+            anomaly_detected = (
+                status == "failed"
+                or bool(error_message)
+                or duration_ms > 5000  # 5-second latency threshold
+            )
+            if anomaly_detected:
+                reportable.append({
+                    "process_id": state.process_id,
+                    "agent_name": agent,
+                    "timestamp": entry.get("timestamp", datetime.utcnow().isoformat() + "Z"),
+                    "status": status,
+                    "duration_ms": duration_ms,
+                    "error_message": error_message,
+                    "details": entry.get("details", {}),
+                    "anomaly_reason": (
+                        "Failure"
+                        if status == "failed"
+                        else "High latency"
+                        if duration_ms > 5000
+                        else "Error message logged"
+                    ),
+                })
+        # Add summary-level anomaly if multiple failures detected
+        for agent, count in failure_counts.items():
+            if count > 2:
+                reportable.append({
+                    "process_id": state.process_id,
+                    "agent_name": agent,
+                    "timestamp": datetime.utcnow().isoformat() + "Z",
+                    "status": "repeated_failures",
+                    "details": {"failure_count": count},
+                    "anomaly_reason": f"{count} repeated failures detected for {agent}",
+                })
+        # Log summary for visibility
+        if reportable:
+            self.logger.logger.info(
+                "Detected %d reportable events for process %s",
+                len(reportable),
+                state.process_id,
+            )
+        else:
+            self.logger.logger.debug("No reportable events found for process %s", state.process_id)
+        # Attach to state for traceability
+        setattr(state, "reportable_events", reportable)
+        state.updated_at = datetime.utcnow()
+        return reportable
+    async def _generate_audit_alerts(
+        self,
+        state: InvoiceProcessingState,
+        reportable_events: List[Dict[str, Any]]
+    ) -> None:
+        """
+        Generate and dispatch alerts for detected audit anomalies.
+        Alerts are categorized based on severity (warning or critical)
+        and logged for traceability. Optionally integrates with external
+        alerting channels (e.g., Slack, PagerDuty, email).
+        """
+        if not reportable_events:
+            self.logger.logger.debug("No audit alerts to generate for process %s", state.process_id)
+            return
+        self.logger.logger.warning(
+            "[AuditSystem] %d reportable audit events detected for process %s",
+            len(reportable_events),
+            state.process_id,
+        )
+        alerts_summary = []
+        critical_events = 0
+        for event in reportable_events:
+            agent = event.get("agent_name", "unknown")
+            reason = event.get("anomaly_reason", "unspecified")
+            status = str(event.get("status", "")).lower()
+            duration = event.get("duration_ms", 0)
+            timestamp = event.get("timestamp", datetime.utcnow().isoformat() + "Z")
+            # Classify severity
+            severity = "critical" if status == "failed" or "repeated" in status else "warning"
+            if severity == "critical":
+                critical_events += 1
+            alert_message = (
+                f"[{severity.upper()} ALERT] Agent: {agent} | Reason: {reason} | "
+                f"Status: {status} | Duration: {duration} ms | Time: {timestamp}"
+            )
+            # Log structured alert
+            if severity == "critical":
+                self.logger.logger.error(alert_message)
+            else:
+                self.logger.logger.warning(alert_message)
+            alerts_summary.append({
+                "severity": severity,
+                "agent_name": agent,
+                "reason": reason,
+                "status": status,
+                "duration_ms": duration,
+                "timestamp": timestamp,
+            })
+            # Optionally send to external alerting channels (mocked)
+            try:
+                await self._send_alert_notification(alerts_summary[-1])
+            except Exception as e:
+                self.logger.logger.error(f"Failed to dispatch alert notification: {e}")
+        # Attach alerts summary to state for later review
+        setattr(state, "audit_alerts", alerts_summary)
+        state.updated_at = datetime.utcnow()
+        # Log summary
+        self.logger.logger.info(
+            "Audit alert generation completed: %d total (%d critical)",
+            len(alerts_summary),
+            critical_events,
+        )
+    def _record_execution(self, success: bool, duration_ms: float, state: Optional[InvoiceProcessingState] = None):
+        compliance = getattr(state, "compliance_report", {}) if state else {}
+        compliant_flags = [
+            compliance.get("sox_compliance") == "compliant",
+            compliance.get("gdpr_compliance") == "compliant",
+            compliance.get("financial_controls") in ("passed", "compliant")
+        ]
+        compliance_score = round((sum(compliant_flags) / len(compliant_flags)) * 100, 2) if compliant_flags else 0
+        self.execution_history.append({
+            # "timestamp": datetime.utcnow().isoformat(),
+            "success": success,
+            "duration_ms": duration_ms,
+            "compliance_score": compliance_score,
+            "reportable_events": len(getattr(state, "reportable_events", [])) if state else 0,
+        })
+        if len(self.execution_history) > self.max_history:
+            self.execution_history.pop(0)
+    async def health_check(self) -> Dict[str, Any]:
+        total_runs = len(self.execution_history)
+        if total_runs == 0:
+            return {
+                "Agent": "Audit Agent 🧮",
+                "Executions": 0,
+                "Success Rate (%)": 0.0,
+                "Avg Duration (ms)": 0.0,
+                "Total Failures": 0,
+                "Avg Compliance (%)": 0.0,
+                "Avg Reportable Events": 0.0,
+                "Status": "idle",
+                # "Timestamp": datetime.utcnow().isoformat()
+            }
+        successes = sum(1 for e in self.execution_history if e["success"])
+        failures = total_runs - successes
+        avg_duration = round(mean(e["duration_ms"] for e in self.execution_history), 2)
+        success_rate = round((successes / (total_runs+1e-8)) * 100, 2)
+        avg_compliance = round(mean(e["compliance_score"] for e in self.execution_history), 2)
+        avg_events = round(mean(e["reportable_events"] for e in self.execution_history), 2)
+        # Dynamic health status logic
+        print("self.execution_history------", self.execution_history)
+        print(avg_compliance)
+        if success_rate >= 85 and avg_compliance >= 90:
+            overall_status = "🟢 Healthy"
+        elif success_rate >= 60:
+            overall_status = "🟠 Degraded"
+        else:
+            overall_status = "🔴 Unhealthy"
+        return {
+            "Agent": "Audit Agent 🧮",
+            "Executions": total_runs,
+            "Success Rate (%)": success_rate,
+            "Avg Duration (ms)": avg_duration,
+            "Total Failures": failures,
+            "Avg Compliance (%)": avg_compliance,
+            "Avg Reportable Events": avg_events,
+            # "Timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
+            "Overall Health": overall_status,
+            "Last Run": self.metrics["last_run_at"],
+        }

Project/agents/base_agent.py ADDED Viewed

	@@ -0,0 +1,221 @@

+"""Base Agent Class for Invoice Processing System"""
+# TODO: Implement agent
+import time
+import logging
+from abc import ABC, abstractmethod
+from typing import Dict, Any, Optional, List
+from datetime import datetime
+from state import InvoiceProcessingState, ProcessingStatus, AuditTrail
+from utils.logger import get_logger
+class BaseAgent(ABC):
+    """Abstract base class for all invoice processing agents"""
+    def __init__(self, agent_name: str, config: Dict[str, Any] = None):
+        self.agent_name = agent_name
+        self.config = config or {}
+        self.logger = get_logger(agent_name)
+        self.metrics: Dict[str,Any] = {
+            "processed" : 0,
+            "errors" : 0,
+            "avg_latency_ms" : None,
+            "last_run_at" : None
+        }
+        self.start_time: Optional[float] = None
+    @abstractmethod
+    async def execute(self, state: InvoiceProcessingState) -> InvoiceProcessingState:
+        raise NotImplementedError
+    async def run(self, state: InvoiceProcessingState, workflow_type) -> InvoiceProcessingState:
+        self.start_time = time.time()
+        self.logger.logger.info(f"Starting {self.agent_name} execution.")
+        if not self._validate_preconditions(state, workflow_type):
+            self.logger.logger.warning(f"Preconditions not met for {self.agent_name}.")
+            self.metrics["processed"] = int(self.metrics.get("processed", 0)) + 1
+            self.metrics["last_run_at"] = datetime.utcnow().isoformat()
+            # optional but very good:
+            state.add_agent_metric(self.agent_name, processed=1, latency_ms=0, errors=0)
+            state.add_audit_entry(
+                self.agent_name,
+                "precondition_failed",
+                {"note": "Preconditions not met, agent skipped."}
+            )
+            return state
+        state.current_agent = self.agent_name
+        state.agent_name = self.agent_name
+        state.overall_status = ProcessingStatus.IN_PROGRESS
+        try:
+            updated_state = await self.execute(state, workflow_type)
+            try:
+                self._validate_postconditions(updated_state)
+            except Exception as post_exc:
+                self.logger.logger.warning(f"Postcondition check raised for {self.agent_name}:{post_exc}")
+            state.mark_agent_completed(self.agent_name)
+            latency_ms = (time.time()-self.start_time)*1000
+            self.metrics["processed"] = int(self.metrics.get("processed",0)) + 1
+            prev_avg = self.metrics.get("avg_latency_ms")
+            if prev_avg is None:
+                self.metrics["avg_latency_ms"] = latency_ms
+            else:
+                self.metrics["avg_latency_ms"] = (prev_avg+latency_ms)/2.0
+            self.metrics["last_run_at"] = datetime.utcnow().isoformat()
+            print(
+                f"Agent: {self.agent_name} | "
+                f"id: {id(self)} | "
+                f"last_run_at: {self.metrics['last_run_at']}"
+            )
+            print("self.metrics[last_run_at]", self.metrics["last_run_at"])
+            state.add_agent_metric(self.agent_name,processed=1,latency_ms=latency_ms)
+            state.add_audit_entry(self.agent_name, action="Agent Successfully Executed", status=ProcessingStatus.COMPLETED, details={"latency_ms":latency_ms}, process_id=state.process_id)
+            self.logger.logger.info(f"{self.agent_name}completed successfully in {latency_ms:.2f}ms.")
+            return updated_state
+        except Exception as e:
+            latency_ms = (time.time()-self.start_time)*1000 if self.start_time else 0.0
+            # self._update_metrics(latency_ms=latency_ms,error=True)
+            self.metrics["processed"] = int(self.metrics.get("processed",0))+1
+            self.metrics["errors"] = int(self.metrics.get("errors",0))+1
+            prev_avg = self.metrics.get("avg_latency_ms")
+            if prev_avg is None:
+                self.metrics["avg_latency_ms"] = latency_ms
+            else:
+                self.metrics["avg_latency_ms"] = (prev_avg+latency_ms)/2.0
+            self.metrics["last_run_at"] = datetime.utcnow().isoformat()
+            state.add_agent_metric(self.agent_name, processed = 1, latency_ms = latency_ms, errors = 1)
+            state.add_audit_entry(self.agent_name,"Error in Execution",{"error":str(e)})
+            state.overall_status = ProcessingStatus.FAILED
+            self.logger.logger.exception(f"{self.agent_name} failed: {e}")
+            return state
+    def _validate_preconditions(self, state: InvoiceProcessingState) -> bool:
+        # pass
+        "override to add custom preconditions for agent execution"
+        return True
+    def _validate_postconditions(self, state: InvoiceProcessingState) -> bool:
+        # pass
+        "override to verify expected outcomes after agent execution"
+        return True
+    def get_metrics(self) -> Dict[str, Any]:
+        # pass
+        return dict(self.metrics)
+    def reset_metrics(self):
+        # pass
+        self.metrics = {"processed":0,
+                        "errors":0,
+                        "avg_latency_ms":None,
+                        "last_run_at":None}
+    async def health_check(self) -> Dict[str, Any]:
+        # pass
+        """perform a basic health check for the agent"""
+        return {
+            "agent":self.agent_name,
+            "status":"Healthy",
+            "Last Run":self.metrics.get("last_run_at"),
+            "errors":self.metrics.get("errors", 0)
+        }
+    def _extract_business_context(self, state: InvoiceProcessingState) -> Dict[str, Any]:
+        # pass
+        """Extract relevant invoice or PO context for resaoning logs"""
+        context: Dict[str,Any] = {}
+        if state.invoice_data:
+            context["vendor"] = state.invoice_data.vendor_name
+            context["invoice_id"] = state.invoice_data.invoice_id
+            context["amount"] = state.invoice_data.total_amount
+        if state.validation_result:
+            try:
+                context["validation_status"] = state.validation_result.validation_status.value
+            except Exception:
+                context["validation_status"] = str(state.validation_result.validation_status)
+        if state.risk_assessment:
+            context["risk_score"] = state.risk_assessment.risk_score
+            context["risk_level"] = state.risk_assessment.risk_level.value if hasattr(state.risk_assessment.risk_level, "value") else str(state.risk_assessment.risk_level)
+        return context
+    def _should_escalate(self, state: InvoiceProcessingState, reason: str = None) -> bool:
+        # pass
+        """Determine whether the workflow should escalate."""
+        try:
+            result = state.requires_escalation()
+        except Exception:
+            result = True
+        if result:
+            self.logger.logger.warning(f"Escalation triggered by {self.agent_name}:{reason or 'auto'}")
+            state.escalation_required = True
+            state.human_review_required = True
+            state.add_audit_entry(self.agent_name,"Escalation Triggered", None, {"reason":reason or "auto"})
+        return result
+    def _log_decision(self, state: InvoiceProcessingState, decision: str,
+                     reasoning: str, confidence: float = None, process_id: str = None):
+        # pass
+        """Log and record an agent decision into audit trail."""
+        details:Dict[str,Any] = {
+            "decision":decision,
+            "reasoning":reasoning,
+            "confidence":confidence,
+            # "timestamp":datetime.utcnow().isoformat()
+        }
+        self.logger.logger.info(f"{self.agent_name} decision:{decision}(confidence = {confidence})")
+        state.add_audit_entry(self.agent_name, decision, None, details, process_id)
+class AgentRegistry:
+    """Registry for managing agent instances"""
+    def __init__(self):
+        # pass
+        self._agents:Dict[str,BaseAgent] = {}
+    def register(self, agent: BaseAgent):
+        # pass
+        if agent.agent_name in self._agents:
+            print(f"{agent.agent_name} already registered - skipping")
+            return
+        self._agents[agent.agent_name] = agent
+    def get(self, agent_name: str) -> Optional[BaseAgent]:
+        # pass
+        return self._agents.get(agent_name)
+    def list_agents(self) -> List[str]:
+        # pass
+        return list(self._agents.keys())
+    def get_all_metrics(self) -> Dict[str, Dict[str, Any]]:
+        # pass
+        return {name:agent.get_metrics() for name, agent in self._agents.items()}
+    async def health_check_all(self) -> Dict[str, Dict[str, Any]]:
+        # pass
+        result:Dict[str,Dict[str,Any]] = {}
+        for name, agent in self._agents.items():
+            result[name] = await agent.health_check()
+        return result
+# Global agent registry instance
+agent_registry = AgentRegistry()
+print("Registry instance ID in base:", id(agent_registry))

Project/agents/document_agent.py ADDED Viewed

	@@ -0,0 +1,411 @@

+"""Document Agent for Invoice Processing"""
+# TODO: Implement agent
+import os
+import json
+import re
+import fitz  # PyMuPDF
+import pdfplumber
+from typing import Dict, Any, Optional, List
+import google.generativeai as genai
+from dotenv import load_dotenv
+from datetime import datetime
+from agents.base_agent import BaseAgent
+from state import (
+    InvoiceProcessingState, InvoiceData, ItemDetail,
+    ProcessingStatus, ValidationStatus
+)
+from utils.logger import StructuredLogger
+load_dotenv()
+logger = StructuredLogger("DocumentAgent")
+def safe_json_parse(result_text: str):
+    # Remove Markdown formatting if present
+    cleaned = re.sub(r"^```[a-zA-Z]*\n|```$", "", result_text.strip())
+    try:
+        return json.loads(cleaned)
+    except json.JSONDecodeError:
+        # Fallback if the AI wrapped JSON in text
+        start, end = cleaned.find("{"), cleaned.rfind("}") + 1
+        if start >= 0 and end > 0:
+            return json.loads(cleaned[start:end])
+        raise
+def to_float(value):
+    if isinstance(value, (int, float)):
+        return float(value)
+    if isinstance(value, str):
+        try:
+            return float(value.replace(',', '').replace('$', '').strip())
+        except (ValueError, TypeError):
+            return 0.0
+    return 0.0
+def parse_date_safe(date_str):
+    if not date_str:
+        return None
+    for fmt in ("%b %d %Y", "%b %d, %Y", "%Y-%m-%d", "%d-%b-%Y"):
+        try:
+            return datetime.strptime(date_str.strip(), fmt).date()
+        except ValueError:
+            continue
+    return None
+from collections import defaultdict
+class APIKeyBalancer:
+    SAVE_FILE = "key_stats.json"
+    def __init__(self, keys):
+        self.keys = keys
+        self.usage = defaultdict(int)
+        self.errors = defaultdict(int)
+        self.load()
+    def load(self):
+        if os.path.exists(self.SAVE_FILE):
+            data = json.load(open(self.SAVE_FILE))
+            self.usage.update(data.get("usage", {}))
+            self.errors.update(data.get("errors", {}))
+    def save(self):
+        json.dump({
+            "usage": self.usage,
+            "errors": self.errors
+        }, open(self.SAVE_FILE, "w"))
+    def get_best_key(self):
+        # choose least used or least errored key
+        best_key = min(self.keys, key=lambda k: (self.errors[k], self.usage[k]))
+        self.usage[best_key] += 1
+        self.save()
+        return best_key
+    def report_error(self, key):
+        self.errors[key] += 1
+        self.save()
+balancer = APIKeyBalancer([
+    os.getenv("GEMINI_API_KEY_1"),
+    os.getenv("GEMINI_API_KEY_2"),
+    os.getenv("GEMINI_API_KEY_3"),
+    # os.getenv("GEMINI_API_KEY_4"),
+    os.getenv("GEMINI_API_KEY_5"),
+    os.getenv("GEMINI_API_KEY_6"),
+    # os.getenv("GEMINI_API_KEY_7"),
+])
+class DocumentAgent(BaseAgent):
+    """Agent responsible for document processing and invoice data extraction"""
+    def __init__(self, config: Dict[str, Any] = None):
+        # pass
+        super().__init__("document_agent", config)
+        self.logger = StructuredLogger("DocumentAgent")
+        self.api_key = balancer.get_best_key()
+        print("self.api_key..........", self.api_key)
+        genai.configure(api_key=self.api_key)
+        # genai.configure(api_key=os.getenv("GEMINI_API_KEY_7"))
+        self.model = genai.GenerativeModel("gemini-2.5-flash")
+    def generate(self, prompt):
+        try:
+            print("generate called")
+            response = self.model.generate_content(prompt)
+            print("response....", response)
+            return response
+        except Exception as e:
+            print("errrororrrooroor")
+            balancer.report_error(self.api_key)
+            print(balancer.keys)
+            print(balancer.usage)
+            print(balancer.errors)
+            raise
+    def _validate_preconditions(self, state: InvoiceProcessingState, workflow_type) -> bool:
+        # pass
+        if not state.file_name or not os.path.exists(state.file_name):
+            self.logger.logger.error(f"[Document Agent] Missing or invalid file: {state.file_name}")
+            return False
+        return True
+    def _validate_postconditions(self, state: InvoiceProcessingState) -> bool:
+        # pass
+        return bool(state.invoice_data and state.invoice_data.total > 0)
+    async def execute(self, state: InvoiceProcessingState, workflow_type) -> InvoiceProcessingState:
+        # pass
+        # file_name = state.file_name
+        self.logger.logger.info(f"Executing Document Agent for file: {state.file_name}")
+        if not self._validate_preconditions(state, workflow_type):
+            state.overall_status = ProcessingStatus.FAILED
+            self._log_decision(state, "Extraction Failed", "Preconditions not met", confidence=0.0)
+        try:
+            raw_text = await self._extract_text_from_pdf(state.file_name)
+            invoice_data = await self._parse_invoice_with_ai(raw_text)
+            invoice_data = await self._enhance_invoice_data(invoice_data, raw_text)
+            invoice_data.file_name = state.file_name
+            state.invoice_data = invoice_data
+            state.overall_status = ProcessingStatus.IN_PROGRESS
+            state.current_agent = self.agent_name
+            state.updated_at = datetime.utcnow()
+            confidence = self._calculate_extraction_confidence(invoice_data, raw_text)
+            state.invoice_data.extraction_confidence = confidence
+            self._log_decision(
+                state,
+                "Extraction Successful",
+                "PDF text successfully extracted and parsed by AI",
+                confidence,
+                state.process_id
+            )
+            return state
+        except Exception as e:
+            self.logger.logger.exception(f"[Document Agent] Extraction failed: {e}")
+            state.overall_status = ProcessingStatus.FAILED
+            self._should_escalate(state, reason=str(e))
+            return state
+    async def _extract_text_from_pdf(self, file_name: str) -> str:
+        # pass
+        text = ""
+        try:
+            self.logger.logger.info("[DocumentAgent] Extracting text using PyMuPDF...")
+            with fitz.open(file_name) as doc:
+                for page in doc:
+                    text += page.get_text()
+            if len(text.strip()) < 5:
+                raise ValueError("PyMuPDF extraction too short, switching to PDFPlumber")
+        except Exception as e:
+            self.logger.logger.info("[DocumentAgent] Fallback to PDFPlumber...")
+            try:
+                with pdfplumber.open(file_name) as pdf:
+                    for page in pdf.pages:
+                        text += page.extract_text() or ""
+            except Exception as e2:
+                self.logger.logger.error("[DocumentAgent] PDFPlumber failed :{e2}")
+                text = ""
+        return text
+    async def _parse_invoice_with_ai(self, text: str) -> InvoiceData:
+        # pass
+        self.logger.logger.info("[DocumentAgent] Parsing invoice data using Gemini AI...")
+        print("text-----------", text)
+        prompt = f"""
+        Extract structured invoice information as JSON with fields:
+        invoice_number, order_id, customer_name, due_date, ship_to, ship_mode,
+        subtotal, discount, shipping_cost, total, and item_details (item_name, quantity, rate, amount).
+        Important Note: If an item description continues on multiple lines, combine them into one item_name. Check intelligently
+        that if at all there will be more than one item then it should have more numbers.
+        So extract by verifying that is there only one item or more than one.
+        Input Text:
+        {text[:8000]}
+        """
+        response = self.generate(prompt)
+        result_text = response.text.strip()
+        data = safe_json_parse(result_text)
+        print("----------------------------------text-----------------------------------",text)
+        print("result text::::::::::::::::::::::::::::",data)
+        # try:
+        #     data = json.loads(result_text)
+        # except Exception as e:
+        #     self.logger.logger.warning("AI output not valid JSON, retrying with fallback parse.")
+        #     data = json.loads(result_text[result_text.find('{'): result_text.rfind('}')+1])
+        items = []
+        for item in data.get("item_details", []):
+            items.append(ItemDetail(
+                item_name=item.get("item_name"),
+                quantity=float(item.get("quantity", 1)),
+                rate=to_float(item.get("rate", 0.0)),
+                amount=to_float(item.get("amount", 0.0)),
+                # category=self._categorize_item(item.get("item_name", "Unknown")),
+            ))
+        invoice_data = InvoiceData(
+            invoice_number=data.get("invoice_number"),
+            order_id=data.get("order_id"),
+            customer_name=data.get("customer_name"),
+            due_date=parse_date_safe(data.get("due_date")),
+            ship_to=data.get("ship_to"),
+            ship_mode=data.get("ship_mode"),
+            subtotal=to_float(data.get("subtotal", 0.0)),
+            discount=to_float(data.get("discount", 0.0)),
+            shipping_cost=to_float(data.get("shipping_cost", 0.0)),
+            total=to_float(data.get("total", 0.0)),
+            item_details=items,
+            raw_text=text,
+        )
+        confidence = self._calculate_extraction_confidence(invoice_data, text)
+        invoice_data.extraction_confidence = confidence
+        self.logger.logger.info("AI output successfully parsed into JSON format")
+        return invoice_data
+    async def _enhance_invoice_data(self, invoice_data: InvoiceData, raw_text: str) -> InvoiceData:
+        # pass
+        if not invoice_data.customer_name:
+            if "Invoice To" in raw_text:
+                lines = raw_text.split("\n")
+                for i, line in enumerate(lines):
+                    if "Invoice To" in line:
+                        invoice_data.customer_name = lines[i+1].strip()
+                        break
+        return invoice_data
+    def _categorize_item(self, item_name: str) -> str:
+        # pass
+        name = item_name.lower()
+        prompt = f"""
+        Extract the category of the Item from the item details very intelligently
+        so that we can get the category in which the item belongs to very efficiently:
+        Example: "Electronics", "Furniture", "Software", etc.....
+        Input Text- The item is given below (provide the category in JSON format like -- category: 'extracted category') ---->
+        {name}
+        """
+        response = self.generate(prompt)
+        result_text = response.text.strip()
+        category = safe_json_parse(result_text)
+        print(category['category'])
+        return category['category']
+    def _calculate_extraction_confidence(self, invoice_data: InvoiceData, raw_text: str) -> float:
+        """
+        Intelligent confidence scoring for extracted invoice data.
+        Combines presence, consistency, and numeric sanity checks.
+        """
+        score = 0.0
+        weight = {
+            "invoice_number": 0.1,
+            "order_id": 0.05,
+            "customer_name": 0.1,
+            "due_date": 0.05,
+            "ship_to": 0.05,
+            "item_details": 0.25,
+            "total_consistency": 0.25,
+            "currency_detected": 0.05,
+            "text_match_bonus": 0.1
+        }
+        text_lower = raw_text.lower()
+        # Presence-based confidence
+        if invoice_data.invoice_number:
+            score += weight["invoice_number"]
+        if invoice_data.order_id:
+            score += weight["order_id"]
+        if invoice_data.customer_name:
+            score += weight["customer_name"]
+        if invoice_data.due_date and "due_date" in text_lower:
+            score += weight["due_date"]
+        if not invoice_data.due_date and "due_date" not in text_lower:
+            score += weight["due_date"]
+        if invoice_data.item_details:
+            score += weight["item_details"]
+        # Currency detection
+        if any(c in raw_text for c in ["$", "₹", "€", "usd", "inr", "eur"]):
+            score += weight["currency_detected"]
+        # Numeric Consistency: subtotal + shipping ≈ total
+        def _extract_amounts(pattern):
+            import re
+            matches = re.findall(pattern, raw_text)
+            return [float(m.replace(",", "").replace("$", "").strip()) for m in matches if m]
+        import re
+        numbers = _extract_amounts(r"\$?\s?\d{1,3}(?:,\d{3})*(?:\.\d{2})?")
+        if len(numbers) >= 3 and invoice_data.total:
+            approx_total = max(numbers)
+            diff = abs(approx_total - invoice_data.total)
+            if diff < 5:  # minor difference allowed
+                score += weight["total_consistency"]
+            elif diff < 50:
+                score += weight["total_consistency"] * 0.5
+        # Textual verification
+        hits = 0
+        for field in [invoice_data.customer_name, invoice_data.order_id, invoice_data.invoice_number]:
+            if field and str(field).lower() in text_lower:
+                hits += 1
+        if hits >= 2:
+            score += weight["text_match_bonus"]
+        # Penalty for empty critical fields
+        missing_critical = not invoice_data.total or not invoice_data.customer_name or not invoice_data.invoice_number
+        if missing_critical:
+            score *= 0.8
+        # Clamp and finalize
+        final_conf = round(min(score, 0.99), 2)
+        invoice_data.extraction_confidence = final_conf
+        return final_conf * 100.0
+    async def health_check(self) -> Dict[str, Any]:
+        """
+        Perform intelligent health diagnostics for the Document Agent.
+        Collects operational, performance, and API connectivity metrics.
+        """
+        from datetime import datetime
+        metrics_data = {}
+        executions = 0
+        success_rate = 0.0
+        avg_duration = 0.0
+        failures = 0
+        last_run = None
+        # latency_trend = None
+        # 1. Try to get live metrics from state
+        print("(self.state)-------",self.metrics)
+        # print("self.state.agent_metrics-------", self.state.agent_metrics)
+        if self.metrics:
+            executions = self.metrics["processed"]
+            avg_duration = self.metrics["avg_latency_ms"]
+            failures = self.metrics["errors"]
+            last_run = self.metrics["last_run_at"]
+            success_rate = (executions - failures) / (executions+1e-8)
+            # print(executions, avg_duration, failures, last_run, success_rate)
+            # latency_trend = getattr(m, "total_duration_ms", None)
+        # 2. API connectivity check
+        gemini_ok = bool(self.api_key)
+        # print("self.api---", self.api_key)
+        # print("geminiokkkkkk", gemini_ok)
+        api_status = "🟢 Active" if gemini_ok else "🔴 Missing Key"
+        # 3. Health logic
+        overall_status = "🟢 Healthy"
+        if not gemini_ok or failures > 3:
+            overall_status = "🟠 Degraded"
+        if executions > 0 and success_rate < 0.5:
+            overall_status = "🔴 Unhealthy"
+        # 4. Extended agent diagnostics
+        metrics_data = {
+            "Agent": "Document Agent 🧾",
+            "Executions": executions,
+            "Success Rate (%)": round(success_rate * 100, 2),
+            "Avg Duration (ms)": round(avg_duration, 2),
+            "Total Failures": failures,
+            "API Status": api_status,
+            "Last Run": str(last_run) if last_run else "Not applicable",
+            "Overall Health": overall_status,
+            # "Timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
+        }
+        self.logger.logger.info(f"[HealthCheck] Document Agent metrics: {metrics_data}")
+        return metrics_data

Project/agents/escalation_agent.py ADDED Viewed

	@@ -0,0 +1,315 @@

+"""Escalation Agent for Invoice Processing"""
+# TODO: Implement agent
+import os
+import json
+import smtplib
+from email.mime.text import MIMEText
+from email.mime.multipart import MIMEMultipart
+from typing import Dict, Any, List, Optional
+from datetime import datetime, timedelta
+import google.generativeai as genai
+from dotenv import load_dotenv
+from agents.base_agent import BaseAgent
+from state import (
+    InvoiceProcessingState, ProcessingStatus, PaymentStatus,
+    RiskLevel, ValidationStatus
+)
+from utils.logger import StructuredLogger
+load_dotenv()
+class EscalationAgent(BaseAgent):
+    """Agent responsible for escalation management and human-in-the-loop workflows"""
+    def __init__(self, config: Dict[str, Any] = None):
+        super().__init__("escalation_agent",config)
+        self.logger = StructuredLogger("EscalationAgent")
+        self.escalation_triggers = {
+            'high_risk' : {'route_to':'risk_manager','sla_hours':4},
+            'validation_failure': {'route_to':'finance_manager','sla_hours':8},
+            'high_value': {'route_to':'cfo','sla_hours':24},
+            'fraud_suspicion': {'route_to':'fraud_team','sla_hours':2},
+            'new_vendor':{'route_to':'procurement','sla_hours':48}
+        }
+    def _validate_preconditions(self, state: InvoiceProcessingState, workflow_type) -> bool:
+        # pass
+        return hasattr(state,'invoice_data') and hasattr(state,'risk_assessment')
+    def _validate_postconditions(self, state: InvoiceProcessingState) -> bool:
+        # pass
+        return hasattr(state,'escalation_details')
+    async def execute(self, state: InvoiceProcessingState, workflow_type) -> InvoiceProcessingState:
+        # pass
+        self.logger.logger.info('Executing Escalation Agent...')
+        if not self._validate_preconditions(state, workflow_type):
+            self.logger.logger.error("Preconditions not meet for Escalation handling")
+            state.status = ProcessingStatus.FAILED
+            self._log_decision(state, "Escalation Agent Failed", "Preconditions not met", confidence=0.0)
+            return state
+        escalation_type = self._determine_escalation_type(state)
+        if not escalation_type:
+            self.logger.logger.info("No escalation required for this invoice.")
+            state.escalation_required = False
+            state.overall_status = 'completed'
+            return state
+        priority_level = self._calculate_priority_level(state)
+        approver_info = self._route_to_approver(state, escalation_type,priority_level)
+        summary = await self._generate_escalation_summary(state,escalation_type,approver_info)
+        escalation_record = await self._create_escalation_record(state, escalation_type, priority_level, approver_info,summary)
+        await self._send_escalation_notifications(state,escalation_record,approver_info)
+        await self._setup_sla_monitoring(state,escalation_record,priority_level)
+        state.escalation_required = True
+        state.human_review_required = True
+        state.escalation_details = escalation_record
+        state.human_review_required = summary
+        state.escalation_reason = escalation_record["escalation_reason"]
+        state.current_agent = 'escalation_agent'
+        state.overall_status = 'escalated'
+        self._log_decision(
+            state,
+            "Escalation Successful",
+            "PDF successfully escalated to Human for review",
+            "N/A",
+            state.process_id
+        )
+        self.logger.logger.info('Escalation record successfully created and routed.')
+        return state
+    def _determine_escalation_type(self, state: InvoiceProcessingState) -> str:
+        # pass
+        risk = getattr(state,'risk_assessment',{})
+        validation = getattr(state,'validation_result',{})
+        invoice = getattr(state,'invoice_data',{})
+        risk_level = getattr(risk,'risk_level',{})
+        amount = getattr(invoice,'total',0)
+        vendor = getattr(invoice,'customer_name','')
+        # fraud_indicators = risk.get('fraud_indicators',[])
+        fraud_indicators = getattr(risk,'fraud_indicators',[])
+        if risk_level in ['high','critical']:
+            return 'high_risk'
+        elif state.validation_status == 'invalid' or state.validation_status == 'missing_po':
+            return 'validation_failure'
+        elif amount and amount>250000:
+            return 'high_value'
+        elif len(fraud_indicators) > 3:
+            return 'fraud_suspicion'
+        elif vendor and 'new' in vendor.lower():
+            return 'new_vendor'
+        else:
+            return None
+    def _calculate_priority_level(self, state: InvoiceProcessingState) -> str:
+        # pass
+        # risk = getattr(state,'risk_assessment',{}).get('risk_level','low').lower()
+        # amount = getattr(state,'invoice_data',{}).get('total',0)
+        risk_assessment = getattr(state,'risk_assessment',{})
+        invoice_data = getattr(state,'invoice_data',{})
+        risk = getattr(risk_assessment,'risk_level','low').lower()
+        amount = getattr(invoice_data,'total',0)
+        if risk == 'critical' or amount > 50000:
+            return 'urgent'
+        elif risk == 'high' or amount > 25000:
+            return 'high'
+        else:
+            return 'medium'
+    def _route_to_approver(self, state: InvoiceProcessingState,
+                          escalation_type: str, priority_level: str) -> Dict[str, Any]:
+        # pass
+        # print(self.escalation_triggers)
+        route_info = self.escalation_triggers.get(escalation_type,{})
+        # print("route_info..................", route_info)
+        assigned_to = route_info.get('route_to','finance_manager')
+        sla_hours = route_info.get('sla_hours',8)
+        approvers = ['finance_manager']
+        if assigned_to == 'cfo':
+            approvers.append('cfo')
+        return {
+            'assigned_to':assigned_to,
+            'sla_hours':sla_hours,
+            'approval_required_from':approvers
+        }
+    def _parse_date(self, date_str: str) -> Optional[datetime.date]:
+        # pass
+        try:
+            return datetime.strptime(date_str,"%Y-%m-%d").date()
+        except Exception:
+            return None
+    async def _generate_escalation_summary(self, state: InvoiceProcessingState,
+                                         escalation_type: str, approver_info: Dict[str, Any]) -> str:
+        # pass
+        risk = getattr(state,'risk_assessment',{})
+        invoice = getattr(state,'invoice_data',{})
+        risk_level = getattr(risk,'risk_level',{})
+        amount = getattr(invoice,'total',0)
+        # invoice = state.invoice_data
+        # risk = state.risk_assessment
+        reason = ""
+        if escalation_type == 'high_risk':
+            reason = f"Invoice marked as high risk ({risk_level})."
+        elif escalation_type == 'validation_failure':
+            reason = 'Validation discrepancies require finance approval.'
+        elif escalation_type == 'high_value':
+            reason = f"High-value invoice ({amount}) requires CFO approval."
+        elif escalation_type == 'fraud_suspicion':
+            reason = 'Fraud suspicion based on anomalies detected'
+        elif escalation_type == 'new_vendor':
+            reason = 'Vendor is new and not yet in approved list.'
+        return f"{reason} Routed to {approver_info['assigned_to']} for review."
+    async def _create_escalation_record(self, state: InvoiceProcessingState,
+                                      escalation_type: str, priority_level: str,
+                                      approver_info: Dict[str, Any], summary: str) -> Dict[str, Any]:
+        # pass
+        timestamp = datetime.utcnow()
+        sla_deadline = timestamp+timedelta(hours=approver_info['sla_hours'])
+        return {
+            'escalation_type':escalation_type,
+            'severity':priority_level,
+            'assigned_to':approver_info['assigned_to'],
+            'escalation_time':timestamp.isoformat()+'Z',
+            'sla_deadline':sla_deadline.isoformat()+'Z',
+            'notification_sent':True,
+            'approval_required_from':approver_info['approval_required_from'],
+            'escalation_reason':summary
+        }
+    async def _send_escalation_notifications(self, state: InvoiceProcessingState,
+                                           escalation_record: Dict[str, Any],
+                                           approver_info: Dict[str, Any]) -> Dict[str, Any]:
+        # pass
+        try:
+            subject = f"[Escalation Alert] Invoice requires {approver_info['assigned_to']} review"
+            body = f"""
+            Escalation Type: {escalation_record['escalation_type']}
+            severity: {escalation_record['severity']}
+            SLA Deadline: {escalation_record['sla_deadline']}
+            reason: {escalation_record['escalation_reason']}
+            """
+            to_email = f"{approver_info['assigned_to']}@company.com"
+            self._send_email(to_email,subject,body)
+            self.logger.logger.info(f"Escalation notification send to {to_email}.")
+            return {'status':'send','to':to_email}
+        except Exception as e:
+            self.logger.logger.error(f'Failed to send notification: {e}')
+            return {'status':'failed','error':str(e)}
+    def _send_email(self, to_email: str, subject: str, body: str) -> Dict[str, Any]:
+        # pass
+        try:
+            sender = os.getenv('EMAIL_SENDER','noreply@invoicesystem.com')
+            msg = MIMEMultipart()
+            msg['From'] = send
+            msg['To'] = to_email
+            msg['Subject'] = subject
+            msg.attach(MIMEText(body,'plain'))
+            with smtplib.SMTP('localhost') as server:
+                server.send_message(msg)
+            return {'sent':True}
+        except Exception as e:
+            return {'sent':False, 'error':str(e)}
+    async def _setup_sla_monitoring(self, state: InvoiceProcessingState,
+                                  escalation_record: Dict[str, Any], priority_level: str):
+        # pass
+        self.logger.logger.debug(
+            f"SLA monitoring initialized for {escalation_record['escalation_type']}"
+            f"with deadline {escalation_record['sla_deadline']}"
+        )
+    async def resolve_escalation(self, escalation_id: str, resolution: str,
+                               resolver: str) -> Dict[str, Any]:
+        # pass
+        return {
+            'escalation_id':escalation_id,
+            'resolved_by':resolver,
+            'resolution_notes':resolution,
+            'resolved_at':datetime.utcnow().isoformat()+'Z',
+            'status':'resolved'
+        }
+    async def health_check(self) -> Dict[str, Any]:
+        """
+        Performs a detailed health check for the Escalation Agent.
+        Includes operational metrics, configuration validation, and reliability stats.
+        """
+        start_time = datetime.utcnow()
+        self.logger.logger.info("Performing health check for EscalationAgent...")
+        executions = 0
+        avg_duration = 0.0
+        failures = 0
+        last_run = None
+        success_rate = 0.0
+        try:
+            if self.metrics:
+                executions = self.metrics["processed"]
+                avg_duration = self.metrics["avg_latency_ms"]
+                failures = self.metrics["errors"]
+                last_run = self.metrics["last_run_at"]
+                success_rate = (executions - failures) / (executions + 1e-8) * 100.0 if executions > 0 else 0.0
+            total_executions = executions
+            total_failures = failures
+            avg_duration_ms = avg_duration
+            # Email and trigger configuration validation
+            email_configured = bool(os.getenv('EMAIL_SENDER'))
+            missing_triggers = [k for k, v in self.escalation_triggers.items() if not v.get("route_to")]
+            # Duration calculation
+            # duration_ms = (datetime.utcnow() - start_time).total_seconds() * 1000
+            # last_run = self.metrics["last_run_at"]
+            health_report = {
+                "Agent": "Escalation Agent 🚨",
+                "Executions": total_executions,
+                "Success Rate (%)": round(success_rate, 2),
+                "Avg Duration (ms)": round(avg_duration_ms, 2) if avg_duration_ms else "Not Called",
+                "Total Failures": total_failures,
+                # "Email Configured": email_configured,
+                # "Available Triggers": list(self.escalation_triggers.keys()),
+                "Missing Routes": missing_triggers,
+                "Last Run": self.metrics["last_run_at"],
+                "Overall Health": "🟢 Healthy" if (success_rate > 70 or total_executions == 0) else "Degraded ⚠️",
+                # "Response Time (ms)": round(duration_ms, 2)
+                # "Timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
+            }
+            self.logger.logger.info("EscalationAgent health check completed successfully.")
+            return health_report
+        except Exception as e:
+            error_time = (datetime.utcnow() - start_time).total_seconds() * 1000
+            self.logger.logger.error(f"Health check failed: {e}")
+            # Return degraded health if something goes wrong
+            return {
+                "Agent": "EscalationAgent ❌",
+                "Overall Health": "Degraded",
+                "Error": str(e),
+                "Timestamp": datetime.utcnow().isoformat() + "Z"
+            }

Project/agents/forecast_agent.py ADDED Viewed

	@@ -0,0 +1,253 @@

+# agents/forecast_agent.py
+"""
+Forecast Agent (robust)
+- Accepts a list of invoice states (dicts or InvoiceProcessingState models).
+- Produces monthly historical spend and a simple forecast (moving average).
+- Performs lightweight anomaly detection.
+- Returns a dict containing a Plotly chart and numeric summary.
+"""
+from typing import List, Dict, Any, Union
+from datetime import datetime
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import math
+import os
+# keep the type import only for hints; we do NOT require reconstructing models
+try:
+    from state import InvoiceProcessingState
+except Exception:
+    InvoiceProcessingState = None  # type: ignore
+class ForecastAgent:
+    def __init__(self):
+        pass
+    # ---- Internal: normalize input states -> DataFrame ----
+    def _normalize_states_to_df(self, states: List[Union[dict, object]]) -> pd.DataFrame:
+        """
+        Accepts list of dicts or model instances.
+        Produces a cleaned DataFrame with columns:
+        ['file_name','invoice_date','due_date','total','vendor','risk_score','status']
+        """
+        rows = []
+        for s in states:
+            try:
+                # 1) obtain a plain dict representation without constructing pydantic models
+                if isinstance(s, dict):
+                    raw = dict(s)
+                else:
+                    # model-like object: try model_dump, to_dict, or __dict__
+                    if hasattr(s, "model_dump"):
+                        raw = s.model_dump(exclude_none=False)
+                    elif hasattr(s, "dict"):
+                        raw = s.dict()
+                    else:
+                        # best effort: convert attributes to dict
+                        raw = {
+                            k: getattr(s, k)
+                            for k in dir(s)
+                            if not k.startswith("_") and not callable(getattr(s, k))
+                        }
+                # 2) sanitize well-known problematic fields that break pydantic elsewhere
+                if "human_review_required" in raw and isinstance(raw["human_review_required"], str):
+                    v = raw["human_review_required"].strip().lower()
+                    raw["human_review_required"] = v in ("true", "yes", "1", "required")
+                if "escalation_details" in raw and isinstance(raw["escalation_details"], dict):
+                    # convert to string summary so downstream code doesn't expect a dict
+                    try:
+                        raw["escalation_details"] = str(raw["escalation_details"])
+                    except Exception:
+                        raw["escalation_details"] = ""
+                # 3) pull invoice_data safely (may be None, dict, or model)
+                inv = {}
+                if raw.get("invoice_data") is None:
+                    inv = {}
+                else:
+                    inv_raw = raw.get("invoice_data")
+                    if isinstance(inv_raw, dict):
+                        inv = dict(inv_raw)
+                    else:
+                        # model-like invoice_data
+                        if hasattr(inv_raw, "model_dump"):
+                            inv = inv_raw.model_dump(exclude_none=False)
+                        elif hasattr(inv_raw, "dict"):
+                            inv = inv_raw.dict()
+                        else:
+                            # fallback: read attributes
+                            inv = {
+                                k: getattr(inv_raw, k)
+                                for k in dir(inv_raw)
+                                if not k.startswith("_") and not callable(getattr(inv_raw, k))
+                            }
+                # 4) turnout the row items we care about
+                total = inv.get("total") or inv.get("amount") or raw.get("total") or 0.0
+                # risk may be under risk_assessment.risk_score or top-level
+                risk_src = raw.get("risk_assessment") or {}
+                if isinstance(risk_src, dict):
+                    risk_score = risk_src.get("risk_score") or 0.0
+                else:
+                    # model-like risk_assessment
+                    if hasattr(risk_src, "model_dump"):
+                        try:
+                            risk_score = risk_src.model_dump().get("risk_score", 0.0)
+                        except Exception:
+                            risk_score = 0.0
+                    else:
+                        risk_score = getattr(risk_src, "risk_score", 0.0)
+                # dates: prefer due_date then invoice_date - they could be strings or datetimes
+                due = inv.get("due_date") or inv.get("invoice_date") or raw.get("due_date") or raw.get("invoice_date")
+                vendor = inv.get("customer_name") or inv.get("vendor_name") or raw.get("vendor") or raw.get("customer_name") or "Unknown"
+                file_name = inv.get("file_name") or raw.get("file_name") or "unknown"
+                rows.append(
+                    {
+                        "file_name": file_name,
+                        "due_date": due,
+                        "invoice_date": inv.get("invoice_date") or raw.get("invoice_date"),
+                        "total": total,
+                        "vendor": vendor,
+                        "risk_score": risk_score,
+                        "status": raw.get("overall_status") or inv.get("status") or "unknown",
+                    }
+                )
+            except Exception:
+                # skip malformed state
+                continue
+        df = pd.DataFrame(rows)
+        if df.empty:
+            return df
+        # coerce and normalize
+        df["due_date"] = pd.to_datetime(df["due_date"], errors="coerce")
+        df["invoice_date"] = pd.to_datetime(df["invoice_date"], errors="coerce")
+        # if due_date missing, fallback to invoice_date
+        df["date"] = df["due_date"].fillna(df["invoice_date"])
+        df["total"] = pd.to_numeric(df["total"], errors="coerce").fillna(0.0)
+        df["risk_score"] = pd.to_numeric(df["risk_score"], errors="coerce").fillna(0.0)
+        df["vendor"] = df["vendor"].fillna("Unknown")
+        return df
+    # ---- Public: predict monthly cashflow and return a plotly chart ----
+    def predict_cashflow(self, states: List[Union[dict, object]], months: int = 6) -> Dict[str, Any]:
+        """
+        Produces a monthly historical spend + simple forecast for `months` into the future.
+        Returns:
+        {
+            "chart": plotly_figure,
+            "average_monthly_spend": float,
+            "total_forecast": float,
+            "forecast_values": {month_str: float, ...},
+            "historical": pandas.Series,
+            "forecast_start_month": str,
+            "forecast_end_month": str
+        }
+        """
+        df = self._normalize_states_to_df(states)
+        if df.empty or df["date"].dropna().empty:
+            return {"message": "No data to forecast", "chart": None}
+        # create monthly buckets (period start)
+        df = df.dropna(subset=["date"])
+        df["month"] = df["date"].dt.to_period("M").dt.to_timestamp()
+        monthly_hist = df.groupby("month")["total"].sum().sort_index()
+        # compute average monthly spend from available historical months
+        average_month = float(monthly_hist.mean()) if not monthly_hist.empty else 0.0
+        # build forecast months (next `months` starting from the next month after last historical)
+        last_hist_month = monthly_hist.index.max()
+        if pd.isnull(last_hist_month):
+            start_month = pd.Timestamp.now().to_period("M").to_timestamp()
+        else:
+            # next month
+            start_month = (last_hist_month + pd.offsets.MonthBegin(1)).normalize()
+        forecast_index = pd.date_range(start=start_month, periods=months, freq="MS")
+        # simple forecast: repeat the historical mean (interpretable and safe)
+        forecast_vals = [average_month for _ in range(len(forecast_index))]
+        # build plot dataframe (historical + forecast)
+        hist_df = monthly_hist.reset_index().rename(columns={"month": "date", "total": "amount"})
+        hist_df["type"] = "Historical"
+        fc_df = pd.DataFrame({"date": forecast_index, "amount": forecast_vals})
+        fc_df["type"] = "Forecast"
+        plot_df = pd.concat([hist_df, fc_df], ignore_index=True).sort_values("date")
+        # prepare a plotly figure with clear styling
+        fig = go.Figure()
+        # historical - solid line
+        hist_plot = plot_df[plot_df["type"] == "Historical"]
+        if not hist_plot.empty:
+            fig.add_trace(go.Scatter(
+                x=hist_plot["date"],
+                y=hist_plot["amount"],
+                mode="lines+markers",
+                name="Historical Spend",
+                line=dict(dash="solid"),
+            ))
+        # forecast - dashed line
+        fc_plot = plot_df[plot_df["type"] == "Forecast"]
+        if not fc_plot.empty:
+            fig.add_trace(go.Scatter(
+                x=fc_plot["date"],
+                y=fc_plot["amount"],
+                mode="lines+markers",
+                name="Forecast",
+                line=dict(dash="dash"),
+                marker=dict(symbol="circle-open")
+            ))
+        fig.update_layout(
+            title="Monthly Spend (Historical + Forecast)",
+            xaxis_title="Month",
+            yaxis_title="Total Spend (USD)",
+            hovermode="x unified",
+            template="plotly_dark",
+        )
+        forecast_series = pd.Series(forecast_vals, index=[d.strftime("%Y-%m") for d in forecast_index])
+        total_forecast = float(forecast_series.sum())
+        result = {
+            "chart": fig,
+            "average_monthly_spend": round(average_month, 2),
+            "total_forecast": round(total_forecast, 2),
+            "forecast_values": forecast_series.to_dict(),
+            "historical": monthly_hist,
+            "forecast_start_month": forecast_index[0].strftime("%Y-%m"),
+            "forecast_end_month": forecast_index[-1].strftime("%Y-%m"),
+        }
+        return result
+    # ---- Public: detect anomalies on sanitized data ----
+    def detect_anomalies(self, states: List[Union[dict, object]]) -> pd.DataFrame:
+        """
+        Returns DataFrame of anomalies:
+         - total > 2 * mean(total)
+         - OR risk_score >= 0.7
+        Columns returned: ['file_name','date','vendor','total','risk_score','anomaly_reason']
+        """
+        df = self._normalize_states_to_df(states)
+        if df.empty:
+            return pd.DataFrame()
+        mean_spend = df["total"].mean()
+        cond = (df["total"] > mean_spend * 2) | (df["risk_score"] >= 0.7)
+        anomalies = df.loc[cond, ["file_name", "date", "vendor", "total", "risk_score"]].copy()
+        if anomalies.empty:
+            return pd.DataFrame()
+        anomalies = anomalies.rename(columns={"date": "invoice_date"})
+        anomalies["anomaly_reason"] = anomalies.apply(
+            lambda r: "High Spend" if r["total"] > mean_spend * 2 else "High Risk",
+            axis=1,
+        )
+        return anomalies.reset_index(drop=True)

Project/agents/insights_agent.py ADDED Viewed

	@@ -0,0 +1,107 @@

+# agents/insights_agent.py
+"""
+Insight Agent
+-------------
+Generates analytical and visual insights from processed invoices.
+"""
+import pandas as pd
+import plotly.express as px
+from typing import List, Dict, Any
+from state import InvoiceProcessingState
+class InsightAgent:
+    def __init__(self):
+        pass
+    def _extract_invoice_records(self, results: List[InvoiceProcessingState]) -> pd.DataFrame:
+        """Extract flat invoice info for analysis"""
+        records = []
+        for r in results:
+            if isinstance(r, dict):
+                # Convert dict to InvoiceProcessingState if needed
+                try:
+                    r = InvoiceProcessingState(**r)
+                except Exception:
+                    continue
+            inv = getattr(r, "invoice_data", None)
+            risk = getattr(r, "risk_assessment", None)
+            val = getattr(r, "validation_result", None)
+            pay = getattr(r, "payment_decision", None)
+            records.append({
+                "file_name": getattr(inv, "file_name", None),
+                "invoice_number": getattr(inv, "invoice_number", None),
+                "customer_name": getattr(inv, "customer_name", None),
+                "invoice_date": getattr(inv, "invoice_date", None),
+                "total": getattr(inv, "total", None),
+                "validation_status": getattr(val, "validation_status", None),
+                "risk_score": getattr(risk, "risk_score", None),
+                "risk_level": getattr(risk, "risk_level", None),
+                "payment_status": getattr(pay, "status", None),
+                "decision": getattr(pay, "decision", None),
+            })
+        df = pd.DataFrame(records)
+        if df.empty:
+            return pd.DataFrame()
+        # Clean up data
+        df["customer_name"] = df["customer_name"].fillna("Unknown Vendor")
+        df["total"] = pd.to_numeric(df["total"], errors="coerce").fillna(0.0)
+        df["risk_score"] = pd.to_numeric(df["risk_score"], errors="coerce").fillna(0.0)
+        return df
+    def generate_insights(self, results: List[InvoiceProcessingState]) -> Dict[str, Any]:
+        """Generate charts and textual summary."""
+        df = self._extract_invoice_records(results)
+        if df.empty:
+            return {"summary": "No data available for insights.", "charts": []}
+        charts = []
+        # 🔹 Total spend per customer
+        if "customer_name" in df.columns:
+            spend_chart = px.bar(
+                df.groupby("customer_name", as_index=False)["total"].sum(),
+                x="customer_name",
+                y="total",
+                title="Total Spend per Customer"
+            )
+            charts.append(spend_chart)
+        # 🔹 Risk distribution
+        if "risk_level" in df.columns:
+            risk_chart = px.pie(
+                df,
+                names="risk_level",
+                title="Risk Level Distribution"
+            )
+            charts.append(risk_chart)
+        # 🔹 Validation status counts
+        if "validation_status" in df.columns:
+            val_chart = px.bar(
+                df.groupby("validation_status", as_index=False).size(),
+                x="validation_status",
+                y="size",
+                title="Validation Status Overview"
+            )
+            charts.append(val_chart)
+        # 🔹 Summary text
+        total_spend = df["total"].sum()
+        high_risk = (df["risk_score"] >= 0.7).sum()
+        valid_invoices = (df["validation_status"].astype(str).str.lower() == "valid").sum()
+        summary = (
+            f"💰 **Total Spend:** ₹{total_spend:,.2f}\n\n"
+            f"📄 **Invoices Processed:** {len(df)}\n\n"
+            f"✅ **Valid Invoices:** {valid_invoices}\n\n"
+            f"⚠️ **High Risk Invoices:** {high_risk}\n\n"
+        )
+        return {"summary": summary, "charts": charts}

Project/agents/payment_agent.py ADDED Viewed

	@@ -0,0 +1,348 @@

+"""Payment Agent for Invoice Processing"""
+# TODO: Implement agent
+import os
+import json
+import requests
+from typing import Dict, Any, Optional
+from datetime import datetime, timedelta
+import google.generativeai as genai
+from dotenv import load_dotenv
+import time
+import requests
+from agents.base_agent import BaseAgent
+from state import (
+    InvoiceProcessingState, PaymentDecision, PaymentStatus,
+    RiskLevel, ValidationStatus, ProcessingStatus, RiskAssessment
+)
+from utils.logger import StructuredLogger
+load_dotenv()
+class PaymentAgent(BaseAgent):
+    """Agent responsible for payment processing decisions and execution"""
+    # Persistent in-memory history (like validation agent)
+    health_history = []
+    def __init__(self, config: Dict[str, Any] = None):
+        # pass
+        super().__init__("payment_agent", config)
+        self.logger = StructuredLogger("PaymentAgent")
+        self.approved_vendor_list = ["Acme Corporation", "TechNova Ltd", "SupplyCo"]
+        self.retry_limit = 3
+        # Health metrics tracking
+        self.total_executions = 0
+        self.successful_executions = 0
+        self.failed_executions = 0
+        self.total_duration = 0.0
+        self.last_transaction_id = None
+        self.last_run = None
+    def _validate_preconditions(self, state: InvoiceProcessingState, workflow_type) -> bool:
+        # pass
+        if workflow_type == "expedited":
+            return bool(state.validation_result.validation_status.VALID and state.invoice_data)
+        else:
+            return bool(state.risk_assessment and state.invoice_data)
+    def _validate_postconditions(self, state: InvoiceProcessingState) -> bool:
+        # pass
+        return bool(state.payment_decision)
+    async def execute(self, state: InvoiceProcessingState, workflow_type) -> InvoiceProcessingState:
+        # pass
+        start_time = time.time()
+        try:
+            if not self._validate_preconditions(state, workflow_type):
+                state.overall_status = ProcessingStatus.FAILED
+                self._log_decision(state, "Payment Agent Failed", "Preconditions not met", confidence=0.0)
+                return state
+            invoice_data = state.invoice_data
+            validation_result = state.validation_result
+            if workflow_type == "expedited":
+                risk_assessment = RiskAssessment(
+                risk_level = RiskLevel.LOW,
+                risk_score = 0.3,
+                fraud_indicators = None,
+                compliance_issues = None,
+                recommendation = None,
+                reason = "Expedited Workflow Called",
+                requires_human_review = "Not needed due to Expedited Workflow"
+                )
+                payment_decision = PaymentDecision(
+                decision = "auto_pay",
+                status = PaymentStatus.APPROVED,
+                approved_amount = invoice_data.total,
+                transaction_id = f"TXN-{datetime.utcnow().strftime('%Y-%m-%d-%H%M%S')}",
+                payment_method = self._select_payment_method(invoice_data.total),
+                approval_chain = ["system_auto_approval"],
+                rejection_reason = None,
+                scheduled_date = self._calculate_payment_date(invoice_data.due_date, "ACH")
+                )
+                payment_result = await self._execute_payment(invoice_data, payment_decision)
+                payment_decision = self._update_payment_decision(payment_decision, payment_result)
+                justification = await self._generate_payment_justification(
+                    invoice_data, payment_decision, validation_result, risk_assessment
+                )
+                state.payment_decision = payment_decision
+                state.overall_status = ProcessingStatus.COMPLETED
+                state.current_agent = "payment_agent"
+                # success criteria
+                if payment_decision.status == PaymentStatus.APPROVED:
+                    self.successful_executions += 1
+                else:
+                    self.failed_executions += 1
+                self.last_transaction_id = payment_decision.transaction_id
+                self._log_decision(state, payment_decision.status, justification, 95.0, state.process_id)
+                return state
+            else:
+                risk_assessment = state.risk_assessment
+                payment_decision = await self._make_payment_decision(
+                    invoice_data, validation_result, risk_assessment, state
+                )
+                if payment_decision.decision == "auto_pay":
+                    state.approval_chain = [
+                        {
+                            "approved_by":"system_auto_approval in payment_agent",
+                            "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
+                        }
+                        ]
+                else:
+                    state.approval_chain = [{"payment_agent":"Failed or Rejected"}]
+                payment_result = await self._execute_payment(invoice_data, payment_decision)
+                payment_decision = self._update_payment_decision(payment_decision, payment_result)
+                justification = await self._generate_payment_justification(
+                    invoice_data, payment_decision, validation_result, risk_assessment
+                )
+                state.payment_decision = payment_decision
+                state.overall_status = ProcessingStatus.COMPLETED
+                state.current_agent = "payment_agent"
+                # success criteria
+                if payment_decision.status == PaymentStatus.APPROVED:
+                    print("self.successful_executions---", self.successful_executions)
+                    self.successful_executions += 1
+                else:
+                    self.failed_executions += 1
+                self.last_transaction_id = payment_decision.transaction_id
+                self._log_decision(state, payment_decision.status, justification, 95.0, state.process_id)
+                return state
+        except Exception as e:
+            self.failed_executions += 1
+            self.logger.logger.error(f"[PaymentAgent] Execution failed: {e}")
+            state.overall_status = ProcessingStatus.FAILED
+            return state
+        finally:
+            duration = (time.time() - start_time) * 1000  # in ms
+            print("self.total_executions---", self.total_executions)
+            self.last_run = datetime.utcnow().isoformat()
+            self.total_executions += 1
+            self.total_duration += duration
+            self._record_health_metrics(duration)
+    async def _make_payment_decision(self, invoice_data, validation_result,
+                                   risk_assessment, state: InvoiceProcessingState) -> PaymentDecision:
+        # pass
+        amount = invoice_data.total or invoice_data.total_amount or 0.0
+        risk_level = risk_assessment.risk_level
+        validation_status = validation_result.validation_status
+        if risk_level == RiskLevel.CRITICAL or validation_status == ValidationStatus.INVALID:
+            decision = PaymentDecision(
+                decision = "reject",
+                status = PaymentStatus.FAILED,
+                approved_amount = 0.0,
+                transaction_id = None,
+                payment_method = None,
+                approval_chain = [],
+                rejection_reason = "Critical Risk or Invalid Validation",
+                scheduled_date = None
+            )
+        elif risk_level == RiskLevel.LOW or amount < 5000:
+            decision = PaymentDecision(
+                decision = "auto_pay",
+                status = PaymentStatus.APPROVED,
+                approved_amount = amount,
+                transaction_id = f"TXN-{datetime.utcnow().strftime('%Y-%m-%d-%H%M%S')}",
+                payment_method = self._select_payment_method(amount),
+                approval_chain = ["system_auto_approval"],
+                rejection_reason = None,
+                scheduled_date = self._calculate_payment_date(invoice_data.due_date, "ACH")
+            )
+        elif risk_level == RiskLevel.MEDIUM or validation_status == ValidationStatus.PARTIAL_MATCH:
+            decision = PaymentDecision(
+                decision = "hold",
+                status = PaymentStatus.PENDING_APPROVAL,
+                approved_amount = amount,
+                transaction_id = None,
+                payment_method = self._select_payment_method(amount),
+                approval_chain = ["system_auto_approval", "finance_manager_approval"],
+                rejection_reason = None,
+                scheduled_date = self._calculate_payment_date(invoice_data.due_date, "ACH")
+            )
+        else:
+            decision = PaymentDecision(
+                decision = "manual_approval",
+                status = PaymentStatus.PENDING_APPROVAL,
+                approved_amount = amount,
+                transaction_id = None,
+                payment_method = self._select_payment_method(amount),
+                approval_chain = ["system_auto_approval", "executive_approval"],
+                rejection_reason = None,
+                scheduled_date = self._calculate_payment_date(invoice_data.due_date, "WIRE")
+            )
+        return decision
+    def _select_payment_method(self, amount: float) -> str:
+        # pass
+        if amount < 5000:
+            return "ACH"
+        elif amount < 25000:
+            return "WIRE"
+        return "MANUAL"
+    def _calculate_payment_date(self, due_date_str: Optional[str], payment_method: str) -> datetime:
+        # pass
+        due_date = self._parse_date(due_date_str)
+        if not due_date:
+            due_date = datetime.utcnow().date() + timedelta(days=3)
+        offset = 1 if payment_method == "ACH" else 2
+        return datetime.combine(due_date, datetime.min.time()) + timedelta(days=offset)
+    def _parse_date(self, date_str: str) -> Optional[datetime.date]:
+        # pass
+        if not date_str:
+            return None
+        try:
+            return datetime.strptime(date_str, "%Y-%m-%d").date()
+        except Exception:
+            return None
+    # async def _execute_payment(self, invoice_data, payment_decision: PaymentDecision) -> Dict[str, Any]:
+    #     # pass
+    #     await self._async_sleep(1)
+    #     response = requests.post("http://localhost:8000", data=PaymentRequest)
+    #     if payment_decision.status == PaymentStatus.FAILED:
+    #         return {"status": "failed", "message": "Payment rejected by policy."}
+    #     return {"status": "success", "transaction_id": payment_decision.transaction_id or f"TXN-{datetime.utcnow().strftime('%Y%m%d%H%M%S')}", "message": "Payment executed."}
+    async def _execute_payment(self, invoice_data, payment_decision: PaymentDecision) -> Dict[str, Any]:
+        """Send payment request to web API and return response with transaction_id"""
+        import asyncio
+        await asyncio.sleep(1)
+        payment_payload = {
+            "order_id": invoice_data.invoice_number or f"INV-{int(datetime.utcnow().timestamp())}",
+            "customer_name": invoice_data.customer_name or "Unknown Vendor",
+            "amount": float(invoice_data.total),
+            "currency": "USD",
+            # "method": payment_decision.payment_method.lower(),
+            "recipient_account": "auto_generated_account",
+            "due_date": str(invoice_data.due_date or datetime.utcnow().date())
+        }
+        try:
+            response = requests.post("http://localhost:8001/initiate_payment", json=payment_payload, timeout=10)
+            if response.status_code == 200:
+                result = response.json()
+                print("res from apiii =======", result)
+                return {
+                    "status": "success" if result["status"] == "SUCCESS" else "failed",
+                    "transaction_id": result["transaction_id"],
+                    "message": result["message"]
+                }
+            else:
+                print("res from apiii111111 =======", result)
+                return {"status": "failed", "message": f"HTTP {response.status_code}: {response.text}"}
+        except Exception as e:
+            print("res from apiii111111222222222222 =======", result)
+            return {"status": "failed", "message": f"Payment API error: {e}"}
+    async def _async_sleep(self, seconds: int):
+        # pass
+        import asyncio
+        await asyncio.sleep(seconds)
+    def _update_payment_decision(self, payment_decision: PaymentDecision,
+                               payment_result: Dict[str, Any]) -> PaymentDecision:
+        # pass
+        if payment_result.get("status") == "success":
+            payment_decision.status = PaymentStatus.APPROVED
+            payment_decision.transaction_id = payment_result.get("transaction_id")
+        else:
+            payment_decision.status = PaymentStatus.FAILED
+            payment_decision.rejection_reason = payment_result.get("message")
+        return payment_decision
+    async def _generate_payment_justification(self, invoice_data, payment_decision: PaymentDecision,
+                                            validation_result, risk_assessment) -> str:
+        # pass
+        reason = f"Payment Decision: {payment_decision.status}. "
+        if payment_decision.status == PaymentStatus.FAILED:
+            reason += f"Reason: {payment_decision.rejection_reason}"
+        reason += f"Risk level: {risk_assessment.risk_level}. Validation: {validation_result.validation_status}."
+        return reason
+    def _record_health_metrics(self, duration: float):
+        """Update and record health statistics"""
+        success_rate = (
+            (self.successful_executions / self.total_executions) * 100
+            if self.total_executions else 0
+        )
+        avg_duration = (
+            self.total_duration / self.total_executions
+            if self.total_executions else 0
+        )
+        overall_status = "🟢 Healthy"
+        if success_rate < 70:
+            overall_status = "🟠 Degraded"
+        if success_rate < 60:
+            overall_status = "🔴 Unhealthy"
+        metrics = {
+            "Agent": "Payment Agent 💳",
+            "Executions": self.total_executions,
+            "Success Rate (%)": round(success_rate, 2),
+            "Avg Duration (ms)": round(avg_duration, 2),
+            "Total Failures": self.failed_executions,
+            "Last Transaction ID": self.last_transaction_id or "N/A",
+            # "Timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
+            "Last Run": self.last_run,
+            "Overall Health": overall_status,
+        }
+        PaymentAgent.health_history.append(metrics)
+        PaymentAgent.health_history = PaymentAgent.health_history[-50:]  # keep last 50
+    async def health_check(self) -> Dict[str, Any]:
+        """Return the current or last known health state"""
+        await self._async_sleep(0.05)
+        if not PaymentAgent.health_history:
+            return {
+                "Agent": "Payment Agent 💳",
+                "Executions": 0,
+                "Success Rate (%)": 0.0,
+                "Avg Duration (ms)": 0.0,
+                "Total Failures": 0,
+                "Last Transaction ID": "N/A",
+            }
+        return PaymentAgent.health_history[-1]

Project/agents/risk_agent.py ADDED Viewed

	@@ -0,0 +1,644 @@

+"""Risk Assessment Agent for Invoice Processing"""
+# TODO: Implement agent
+import os
+import json
+import re
+from typing import Dict, Any, List
+import google.generativeai as genai
+from dotenv import load_dotenv
+import numpy as np
+from datetime import datetime, timedelta
+from statistics import mean
+import time
+from agents.base_agent import BaseAgent
+from state import (
+    InvoiceProcessingState, RiskAssessment, RiskLevel,
+    ValidationStatus, ProcessingStatus
+)
+from utils.logger import StructuredLogger
+load_dotenv()
+from collections import defaultdict
+class APIKeyBalancer:
+    SAVE_FILE = "key_stats.json"
+    def __init__(self, keys):
+        self.keys = keys
+        self.usage = defaultdict(int)
+        self.errors = defaultdict(int)
+        self.load()
+    def load(self):
+        if os.path.exists(self.SAVE_FILE):
+            data = json.load(open(self.SAVE_FILE))
+            self.usage.update(data.get("usage", {}))
+            self.errors.update(data.get("errors", {}))
+    def save(self):
+        json.dump({
+            "usage": self.usage,
+            "errors": self.errors
+        }, open(self.SAVE_FILE, "w"))
+    def get_best_key(self):
+        # choose least used or least errored key
+        best_key = min(self.keys, key=lambda k: (self.errors[k], self.usage[k]))
+        self.usage[best_key] += 1
+        self.save()
+        return best_key
+    def report_error(self, key):
+        self.errors[key] += 1
+        self.save()
+balancer = APIKeyBalancer([
+    os.getenv("GEMINI_API_KEY_1"),
+    os.getenv("GEMINI_API_KEY_2"),
+    os.getenv("GEMINI_API_KEY_3"),
+    # os.getenv("GEMINI_API_KEY_4"),
+    os.getenv("GEMINI_API_KEY_5"),
+    os.getenv("GEMINI_API_KEY_6"),
+    # os.getenv("GEMINI_API_KEY_7"),
+])
+class RiskAgent(BaseAgent):
+    """Agent responsible for risk assessment, fraud detection, and compliance checking"""
+    def __init__(self, config: Dict[str, Any] = None):
+        super().__init__("risk_agent",config)
+        # genai.configure(api_key=os.getenv("GEMINI_API_KEY_7"))
+        self.logger = StructuredLogger("risk_agent")
+        self.api_key = balancer.get_best_key()
+        print("self.api_key..........", self.api_key)
+        genai.configure(api_key=self.api_key)
+        self.model = genai.GenerativeModel("gemini-2.0-flash")
+        # --- Metrics tracking ---
+        self.execution_history: List[Dict[str, Any]] = []
+        self.max_history = 50  # keep last 50 runs
+    def generate(self, prompt):
+        try:
+            response = self.model.generate_content(prompt)
+            return response
+        except Exception as e:
+            balancer.report_error(self.api_key)
+            raise
+    def _validate_preconditions(self, state: InvoiceProcessingState, workflow_type) -> bool:
+        return bool(state.invoice_data and state.validation_result)
+    def _validate_postconditions(self, state: InvoiceProcessingState) -> bool:
+        return bool(state.risk_assessment and state.risk_assessment.risk_score is not None)
+    async def execute(self, state: InvoiceProcessingState, workflow_type) -> InvoiceProcessingState:
+        start_time = time.time()
+        success = False
+        try:
+            if not self._validate_preconditions(state, workflow_type):
+                state.overall_status = ProcessingStatus.FAILED
+                self._log_decision(state, "Risk Assessment Analysis Failed", "Preconditions not met", confidence=0.0)
+            invoice_data = state.invoice_data
+            validation_result = state.validation_result
+            base_score = await self._calculate_base_risk_score(invoice_data, validation_result)
+            print("base_score:",base_score)
+            fraud_indicators = await self._detect_fraud_indicators(invoice_data, validation_result)
+            print("fraud_indicators:",fraud_indicators)
+            compliance_issues = await self._check_compliance(invoice_data, state)
+            print("compliance_issues:",compliance_issues)
+            ai_assessment = await self._ai_risk_assessment(invoice_data, validation_result, fraud_indicators)
+            print("ai_assessment:",ai_assessment)
+            combined_score = self._combine_risk_factors(base_score, fraud_indicators, compliance_issues, ai_assessment)
+            print("combined_score:",combined_score)
+            risk_level = self._determine_risk_level(combined_score)
+            print("risk_level:",risk_level)
+            recommendation = self._generate_recommendation(risk_level, fraud_indicators, compliance_issues, validation_result)
+            print("recommendation:", recommendation)
+            state.risk_assessment = RiskAssessment(
+                risk_level = risk_level,
+                risk_score = combined_score,
+                fraud_indicators = fraud_indicators,
+                compliance_issues = compliance_issues,
+                recommendation = recommendation["action"],
+                reason = recommendation["reason"],
+                requires_human_review = recommendation["requires_human_review"]
+            )
+            state.current_agent = "risk_agent"
+            state.overall_status = ProcessingStatus.IN_PROGRESS
+            success = True
+            self._log_decision(
+                state,
+                "Risk Assessment Successful",
+                "PDF text successfully verified by Risk Agent and checked by AI",
+                combined_score,
+                state.process_id
+            )
+            return state
+        finally:
+            duration_ms = round((time.time() - start_time) * 1000, 2)
+            self._record_execution(success, duration_ms)
+    async def _calculate_base_risk_score(self, invoice_data, validation_result) -> float:
+        """
+        Calculates an intelligent risk score (0.0–1.0) based on validation results,
+        invoice metadata, and contextual financial factors.
+        """
+        score = 0.0
+        # --- 1. Validation & PO related risks ---
+        if validation_result:
+            if validation_result.validation_status == ValidationStatus.INVALID:
+                score += 0.4
+            elif validation_result.validation_status == ValidationStatus.PARTIAL_MATCH:
+                score += 0.25
+            elif validation_result.validation_status == ValidationStatus.MISSING_PO:
+                score += 0.3
+            # Core mismatch signals
+            if not validation_result.amount_match:
+                score += 0.2
+            if not validation_result.rate_match:
+                score += 0.15
+            if not validation_result.quantity_match:
+                score += 0.1
+            # Low confidence from validation adds risk
+            if validation_result.confidence_score is not None:
+                score += (0.5 - validation_result.confidence_score) * 0.3 if validation_result.confidence_score < 0.5 else 0
+        # --- 2. Invoice amount-based risk ---
+        if invoice_data and invoice_data.total is not None:
+            total = invoice_data.total
+            if total > 1_000_000:
+                score += 0.4   # Extremely high-value invoices
+            elif total > 100_000:
+                score += 0.25
+            elif total > 10_000:
+                score += 0.1
+            elif total < 10:
+                score += 0.15  # Suspiciously small invoice
+        # --- 3. Temporal risks (based on due date) ---
+        if invoice_data and getattr(invoice_data, "due_date", None):
+            try:
+                score += self._calculate_due_date_risk(invoice_data.due_date)
+            except Exception:
+                pass  # Graceful degradation if due_date is invalid
+        # --- 4. Vendor / Customer risks ---
+        if invoice_data and getattr(invoice_data, "customer_name", None):
+            name = invoice_data.customer_name.lower()
+            if "new_vendor" in name or "test" in name or "demo" in name:
+                score += 0.2
+            elif any(flag in name for flag in ["fraud", "fake", "invalid"]):
+                score += 0.3
+        # --- 5. Data reliability / extraction confidence ---
+        if invoice_data and getattr(invoice_data, "extraction_confidence", None) is not None:
+            conf = invoice_data.extraction_confidence
+            if conf < 0.5:
+                score += 0.2
+            elif conf < 0.7:
+                score += 0.1
+        # --- 6. Currency and metadata anomalies ---
+        currency = getattr(invoice_data, "currency", "USD") or "USD"
+        if currency.upper() not in {"USD", "EUR", "GBP", "INR"}:
+            score += 0.15  # uncommon currencies add risk
+        # Normalize score within [0, 1.0]
+        return round(min(score, 1.0), 3)
+    def _calculate_due_date_risk(self, due_date_str: str) -> float:
+        try:
+            due_date = self._parse_date(due_date_str)
+            days_until_due = (due_date - datetime.utcnow().date()).days
+            if days_until_due < 0:
+                return 0.2
+            elif days_until_due < 5:
+                return 0.1
+            return 0.0
+        except Exception:
+            return 0.05
+    def _parse_date(self, date_str: str) -> datetime.date:
+        return datetime.strptime(date_str,"%Y-%m-%d").date()
+    async def _detect_fraud_indicators(self, invoice_data, validation_result) -> List[str]:
+        """
+        Performs intelligent fraud detection on the given invoice and validation results.
+        Returns a list of detected fraud indicators.
+        """
+        indicators = []
+        # 1. PO / Validation mismatches
+        if validation_result:
+            if not validation_result.po_found:
+                indicators.append("No matching Purchase Order found")
+            if not validation_result.amount_match:
+                indicators.append("Amount discrepancy detected")
+            if not validation_result.rate_match:
+                indicators.append("Rate inconsistency with Purchase Order")
+            if not validation_result.quantity_match:
+                indicators.append("Quantity mismatch detected")
+            if validation_result.confidence_score is not None and validation_result.confidence_score < 0.6:
+                indicators.append(f"Low validation confidence ({validation_result.confidence_score:.2f})")
+        # 2. Vendor / Customer anomalies
+        customer_name = getattr(invoice_data, "customer_name", "") or ""
+        if "test" in customer_name.lower() or "demo" in customer_name.lower():
+            indicators.append("Suspicious vendor name (Test/Demo account)")
+        if "new_vendor" in customer_name.lower():
+            indicators.append("First-time or unverified vendor")
+        if any(keyword in customer_name.lower() for keyword in ["fraud", "fake", "invalid"]):
+            indicators.append("Vendor flagged with risky keywords")
+        # 3. Amount-level risk signals
+        total = getattr(invoice_data, "total", 0.0) or 0.0
+        if total > 1_000_000:
+            indicators.append(f"Unusually high invoice total (${total:,.2f})")
+        elif total < 10:
+            indicators.append(f"Suspiciously low invoice total (${total:,.2f})")
+        # 4. Date anomalies
+        due_date = getattr(invoice_data, "due_date", None)
+        invoice_date = getattr(invoice_data, "invoice_date", None)
+        if invoice_date and due_date and (due_date - invoice_date).days < 0:
+            indicators.append("Due date earlier than invoice date (possible manipulation)")
+        elif invoice_date and due_date and (due_date - invoice_date).days < 3:
+            indicators.append("Unusually short payment window")
+        # 5. Duplicate or pattern-based red flags
+        if invoice_data.invoice_number and invoice_data.invoice_number.lower().startswith("dup-"):
+            indicators.append("Possible duplicate invoice ID pattern")
+        if invoice_data.file_name and "copy" in invoice_data.file_name.lower():
+            indicators.append("Invoice filename suggests duplication")
+        # 6. Confidence anomalies (AI extraction)
+        if invoice_data.extraction_confidence is not None and invoice_data.extraction_confidence < 0.5:
+            indicators.append(f"Low extraction confidence ({invoice_data.extraction_confidence:.2f}) — possible OCR tampering")
+        # 7. Currency or unusual metadata patterns
+        if getattr(invoice_data, "currency", "").upper() not in {"USD", "EUR", "GBP", "INR"}:
+            indicators.append(f"Uncommon currency code: {invoice_data.currency}")
+        return indicators
+    async def _check_compliance(self, invoice_data, state: InvoiceProcessingState) -> List[str]:
+        """
+        Performs a multi-layer compliance check on invoice and state integrity.
+        Returns a list of detected compliance issues.
+        """
+        issues = []
+        # 1. Invoice integrity checks
+        if not invoice_data.invoice_number:
+            issues.append("Missing invoice number")
+        if not invoice_data.customer_name:
+            issues.append("Missing customer name")
+        if not invoice_data.total or invoice_data.total <= 0:
+            issues.append("Invalid or missing total amount")
+        if not invoice_data.due_date:
+            issues.append("Missing due date")
+        # 2. Item-level verification
+        if not invoice_data.item_details or len(invoice_data.item_details) == 0:
+            issues.append("No item details present")
+        else:
+            for item in invoice_data.item_details:
+                if not getattr(item, "item_name", None):
+                    issues.append("Item missing name")
+                if getattr(item, "quantity", 1) <= 0:
+                    issues.append(f"Invalid quantity for item '{item.item_name or 'Unknown'}'")
+        # 3. Confidence & quality checks
+        if invoice_data.extraction_confidence and invoice_data.extraction_confidence < 0.7:
+            issues.append(f"Low extraction confidence ({invoice_data.extraction_confidence:.2f})")
+        # 4. Workflow state checks
+        if not getattr(state, "approval_chain", True):
+            issues.append("Approval chain incomplete")
+        if getattr(state, "escalation_required", False):
+            issues.append("Escalation required before payment")
+        if getattr(state, "human_review_required", False):
+            issues.append("Pending human review")
+        # 5. Audit consistency
+        if len(state.audit_trail) == 0:
+            issues.append("No audit trail entries found")
+        # # 6. Optional receipt confirmation
+        # if not getattr(invoice_data, "receipt_confirmed", False):
+        #     issues.append("Missing receipt confirmation")
+        # 7. Risk-based compliance (if risk assessment exists)
+        if state.risk_assessment and state.risk_assessment.risk_score >= 0.7:
+            issues.append(f"High risk score detected ({state.risk_assessment.risk_score:.2f})")
+        return issues
+    async def _ai_risk_assessment(
+        self,
+        invoice_data,
+        validation_result,
+        fraud_indicators: List[str]
+    ) -> Dict[str, Any]:
+        """
+        Uses a Generative AI model (Gemini) to assess risk level based on
+        structured invoice data, validation results, and detected fraud indicators.
+        Returns:
+            dict: {
+                "risk_score": float between 0–1,
+                "reason": str (explanation for the score)
+            }
+        """
+        self.logger.logger.info("[RiskAgent] Running AI-based risk assessment...")
+        # model_name = "gemini-2.5-flash"
+        result = {"risk_score": 0.0, "reason": "Default – AI assessment not available"}
+        try:
+            # Initialize model
+            # model = genai.GenerativeModel(model_name)
+            # --- Construct dynamic and context-rich prompt ---
+            prompt = f"""
+            You are a financial risk analysis model for invoice fraud detection.
+            Carefully analyze the following details:
+            INVOICE DATA:
+            {invoice_data}
+            VALIDATION RESULT:
+            {validation_result}
+            DETECTED FRAUD INDICATORS:
+            {fraud_indicators}
+            TASK:
+            1. Assess overall risk of this invoice being fraudulent or non-compliant.
+            2. Provide reasoning.
+            3. Respond **only in JSON** with keys:
+               - "risk_score": a float between 0 and 1 (higher = higher risk)
+               - "reason": short explanation of what contributed to this score.
+            EXAMPLES:
+            {{
+                "risk_score": 0.85,
+                "reason": "High amount mismatch, new vendor, and unusual currency"
+            }}
+            {{
+                "risk_score": 0.25,
+                "reason": "Valid PO and consistent totals, low fraud signals"
+            }}
+            """
+            import asyncio
+            # --- Model call ---
+            response = self.generate(prompt)
+            # response = await asyncio.to_thread(model.generate_content, prompt)
+            # --- Clean and parse ---
+            raw_text = getattr(response, "text", "") or ""
+            cleaned_json = self._clean_json_response(raw_text)
+            ai_output = json.loads(cleaned_json)
+            # --- Validate AI output ---
+            score = float(ai_output.get("risk_score", 0.0))
+            reason = str(ai_output.get("reason", "No reason provided"))
+            # Clamp score between 0–1 for safety
+            result = {
+                "risk_score": max(0.0, min(score, 1.0)),
+                "reason": reason.strip()[:400]  # limit for logs
+            }
+            self.logger.logger.info(
+                f"[RiskAgent] AI Risk Assessment completed: score={result['risk_score']}, reason={result['reason']}"
+            )
+        except json.JSONDecodeError as e:
+            self.logger.logger.warning(f"[RiskAgent] JSON parsing failed: {e}")
+            result["reason"] = "AI response could not be parsed"
+        except Exception as e:
+            self.logger.logger.error(f"[RiskAgent] AI assessment error: {e}", exc_info=True)
+            result["reason"] = "Fallback to base risk model"
+        return result
+    def _clean_json_response(self, text: str) -> str:
+        text = re.sub(r'^[^{]*','',text)
+        text = re.sub(r'[^}]*$','',text)
+        return text
+    def _combine_risk_factors(
+        self,
+        base_score: float,
+        fraud_indicators: List[str],
+        compliance_issues: List[str],
+        ai_assessment: Dict[str, Any]
+    ) -> float:
+        """
+        Combines multiple risk components (base, fraud, compliance, and AI analysis)
+        into a single normalized risk score between 0.0 and 1.0.
+        Weighting strategy:
+            - Base Score: foundation derived from deterministic checks
+            - Fraud Indicators: +0.1 per flag (max +0.3)
+            - Compliance Issues: +0.05 per issue (max +0.2)
+            - AI Risk Score: contributes 40–50% of total weight
+        Returns:
+            float: final risk score clamped to [0, 1]
+        """
+        try:
+            # Extract and normalize AI risk
+            ai_score = float(ai_assessment.get("risk_score", 0.0))
+            ai_score = max(0.0, min(ai_score, 1.0))
+            # --- Weighted contributions ---
+            fraud_contrib = min(len(fraud_indicators) * 0.1, 0.3)
+            compliance_contrib = min(len(compliance_issues) * 0.05, 0.2)
+            ai_contrib = 0.5 * ai_score if ai_score > 0 else 0.2 * base_score
+            combined = base_score + fraud_contrib + compliance_contrib + ai_contrib
+            # Cap at 1.0 for safety
+            final_score = round(min(combined, 1.0), 3)
+            self.logger.logger.info(
+                f"[RiskAgent] Combined risk computed: base={base_score}, "
+                f"fraud_flags={len(fraud_indicators)}, compliance_flags={len(compliance_issues)}, "
+                f"ai_score={ai_score}, final={final_score}"
+            )
+            return final_score
+        except Exception as e:
+            self.logger.logger.error(f"[RiskAgent] Error combining risk factors: {e}", exc_info=True)
+            return min(base_score + 0.2, 1.0)  # fallback conservative estimate
+    def _determine_risk_level(self, risk_score: float) -> RiskLevel:
+        if risk_score<0.3:
+            return RiskLevel.LOW
+        elif risk_score<0.6:
+            return RiskLevel.MEDIUM
+        elif risk_score<0.8:
+            return RiskLevel.HIGH
+        return RiskLevel.CRITICAL
+    def _generate_recommendation(
+        self,
+        risk_level: RiskLevel,
+        fraud_indicators: List[str],
+        compliance_issues: List[str],
+        validation_result
+    ) -> Dict[str, Any]:
+        """
+        Generate a structured recommendation (approve, escalate, or reject)
+        based on overall risk, fraud, and compliance outcomes.
+        Decision Logic:
+            - HIGH / CRITICAL risk → escalate for human review
+            - INVALID validation → reject
+            - Medium risk with minor issues → escalate
+            - Otherwise → approve
+        Returns:
+            Dict[str, Any]: {
+                'action': str,              # 'approve', 'escalate', or 'reject'
+                'reason': str,              # Explanation summary
+                'requires_human_review': bool
+            }
+        """
+        try:
+            # --- Determine key flags ---
+            has_fraud = bool(fraud_indicators)
+            has_compliance_issues = bool(compliance_issues)
+            validation_invalid = (
+                validation_result and validation_result.validation_status == ValidationStatus.INVALID
+            )
+            # --- Decision Logic ---
+            if validation_invalid:
+                action = "reject"
+                requires_review = True
+                reason = "Validation failed: " + "; ".join(fraud_indicators + compliance_issues or ["Invalid invoice data"])
+            elif risk_level in [RiskLevel.HIGH, RiskLevel.CRITICAL]:
+                action = "escalate"
+                requires_review = True
+                reason = f"High risk level detected ({risk_level.value}). Issues: " + "; ".join(fraud_indicators + compliance_issues or ["Potential anomalies"])
+            elif has_fraud or has_compliance_issues:
+                action = "escalate"
+                requires_review = True
+                reason = "Minor irregularities found: " + "; ".join(fraud_indicators + compliance_issues)
+            else:
+                action = "approve"
+                requires_review = False
+                reason = "All checks passed; invoice appears valid and compliant."
+            # --- Structured Output ---
+            recommendation = {
+                "action": action,
+                "reason": reason,
+                "requires_human_review": requires_review,
+            }
+            self.logger.logger.info(
+                f"[DecisionAgent] Recommendation generated: {recommendation}"
+            )
+            return recommendation
+        except Exception as e:
+            self.logger.logger.error(f"[DecisionAgent] Error generating recommendation: {e}", exc_info=True)
+            # Safe fallback
+            return {
+                "action": "escalate",
+                "reason": "Error during recommendation generation",
+                "requires_human_review": True,
+            }
+    def _record_execution(self, success: bool, duration_ms: float):
+        self.execution_history.append({
+            # "timestamp": datetime.utcnow().isoformat(),
+            "success": success,
+            "duration_ms": duration_ms,
+        })
+        # Keep recent N only
+        if len(self.execution_history) > self.max_history:
+            self.execution_history.pop(0)
+    async def health_check(self) -> Dict[str, Any]:
+        total_runs = len(self.execution_history)
+        if total_runs == 0:
+            return {
+                "Agent": "Risk Agent ⚠️",
+                "Executions": 0,
+                "Success Rate (%)": 0.0,
+                "Avg Duration (ms)": 0.0,
+                "Total Failures": 0,
+                "Status": "idle",
+                # "Timestamp": datetime.utcnow().isoformat()
+            }
+        metrics_data = {}
+        executions = 0
+        success_rate = 0.0
+        avg_duration = 0.0
+        failures = 0
+        last_run = None
+        # 1. Try to get live metrics from state
+        # print("(self.state)-------",self.metrics)
+        # print("self.state.agent_metrics-------", self.state.agent_metrics)
+        if self.metrics:
+            executions = self.metrics["processed"]
+            avg_duration = self.metrics["avg_latency_ms"]
+            failures = self.metrics["errors"]
+            last_run = self.metrics["last_run_at"]
+            success_rate = (executions - failures) / (executions+1e-8)
+        # 2. API connectivity check
+        gemini_ok = bool(self.api_key)
+        api_status = "🟢 Active" if gemini_ok else "🔴 Missing Key"
+        # 3. Health logic
+        overall_status = "🟢 Healthy"
+        if not gemini_ok or failures > 3:
+            overall_status = "🟠 Degraded"
+        if executions > 0 and success_rate < 0.5:
+            overall_status = "🔴 Unhealthy"
+        successes = sum(1 for e in self.execution_history if e["success"])
+        failures = total_runs - successes
+        avg_duration = round(mean(e["duration_ms"] for e in self.execution_history), 2)
+        success_rate = round((successes / (total_runs+1e-8)) * 100, 2)
+        return {
+            "Agent": "Risk Agent ⚠️",
+            "Executions": total_runs,
+            "Success Rate (%)": success_rate,
+            "Avg Duration (ms)": avg_duration,
+            "API Status": api_status,
+            "Total Failures": failures,
+            "Last Run": str(last_run) if last_run else "Not applicable",
+            # "Timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
+            "Overall Health": overall_status,
+        }

Project/agents/smart_explainer_agent.py ADDED Viewed

	@@ -0,0 +1,220 @@

+"""
+Smart Explainer Agent (Enhanced + Gemini-powered)
+- Produces a detailed, human-readable explanation for a single InvoiceProcessingState.
+- Uses Gemini for natural summarization if API key is present.
+- Defensive, HTML-enhanced, and fully dashboard-ready.
+"""
+from state import InvoiceProcessingState, ValidationStatus, PaymentStatus, RiskLevel
+from datetime import datetime
+import google.generativeai as genai
+import json
+import os
+class SmartExplainerAgent:
+    def __init__(self):
+        # Configure Gemini only if available
+        self.api_key = os.environ.get("GEMINI_API_KEY_4")
+        self.use_gemini = bool(self.api_key)
+        if self.use_gemini:
+            genai.configure(api_key=self.api_key)
+            self.model = genai.GenerativeModel("gemini-2.0-flash")
+    # ---------- Helper functions ----------
+    def _safe_invoice_dict(self, state: InvoiceProcessingState) -> dict:
+        if not state or not getattr(state, "invoice_data", None):
+            return {}
+        return (
+            state.invoice_data.model_dump(exclude_none=True)
+            if hasattr(state.invoice_data, "model_dump")
+            else state.invoice_data.dict()
+        )
+    def _safe_validation(self, state: InvoiceProcessingState) -> dict:
+        if not state or not getattr(state, "validation_result", None):
+            return {}
+        return (
+            state.validation_result.model_dump(exclude_none=True)
+            if hasattr(state.validation_result, "model_dump")
+            else state.validation_result.dict()
+        )
+    def _safe_risk(self, state: InvoiceProcessingState) -> dict:
+        if not state or not getattr(state, "risk_assessment", None):
+            return {}
+        return (
+            state.risk_assessment.model_dump(exclude_none=True)
+            if hasattr(state.risk_assessment, "model_dump")
+            else state.risk_assessment.dict()
+        )
+    # ---------- Core explain logic ----------
+    def explain(self, state) -> str:
+        """
+        Generate a detailed HTML + markdown explanation for a given invoice.
+        Falls back gracefully if data or Gemini is unavailable.
+        """
+        # --- Defensive normalization ---
+        if state is None:
+            return "<p>⚠️ No invoice state provided.</p>"
+        if isinstance(state, dict):
+            try:
+                state = InvoiceProcessingState(**state)
+            except Exception:
+                pass
+        # --- Extract fields safely ---
+        invoice = self._safe_invoice_dict(state) or {}
+        validation = self._safe_validation(state) or {}
+        risk = self._safe_risk(state) or {}
+        payment = (
+            state.payment_decision.model_dump(exclude_none=True)
+            if getattr(state, "payment_decision", None)
+            and hasattr(state.payment_decision, "model_dump")
+            else getattr(state, "payment_decision", {}) or {}
+        )
+        discrepancies = validation.get("discrepencies", [])  # per schema
+        inv_id = invoice.get("invoice_number") or invoice.get("file_name") or "<unknown>"
+        vendor = invoice.get("customer_name") or invoice.get("vendor_name") or "Unknown"
+        total = invoice.get("total") or invoice.get("amount") or 0
+        status = getattr(state, "overall_status", "unknown")
+        status_val = status.value if hasattr(status, "value") else str(status)
+        # --- Interpret status fields ---
+        risk_level = risk.get("risk_level")
+        if hasattr(risk_level, "value"):
+            risk_level = risk_level.value
+        risk_score = risk.get("risk_score", 0) or 0.0
+        val_status = validation.get("validation_status")
+        if hasattr(val_status, "value"):
+            val_status = val_status.value
+        payment_status = payment.get("status")
+        if hasattr(payment_status, "value"):
+            payment_status = payment_status.value
+        # --- Badge colors ---
+        colors = {
+            "VALIDATION": "#ffc107",
+            "RISK": (
+                "#ff1744" if str(risk_level).lower() == "critical"
+                else "#ff9800" if str(risk_level).lower() == "medium"
+                else "#4caf50"
+            ),
+            "PAYMENT": "#4caf50",
+            "AUDIT": "#2196f3",
+        }
+        # --- Header layout ---
+        header_html = f"""
+        <div style="display:flex;justify-content:center;margin-bottom:1rem;">
+            <div style="flex:1;text-align:center;padding:0.8rem;
+                        border-radius:10px;background:{colors['VALIDATION']};
+                        color:white;margin:0 4px;">
+                <b>Validation</b>
+            </div>
+            <div style="flex:1;text-align:center;padding:0.8rem;
+                        border-radius:10px;background:{colors['RISK']};
+                        color:white;margin:0 4px;">
+                <b>Risk</b>
+            </div>
+            <div style="flex:1;text-align:center;padding:0.8rem;
+                        border-radius:10px;background:{colors['PAYMENT']};
+                        color:white;margin:0 4px;">
+                <b>Payment</b>
+            </div>
+            <div style="flex:1;text-align:center;padding:0.8rem;
+                        border-radius:10px;background:{colors['AUDIT']};
+                        color:white;margin:0 4px;box-shadow:0 0 10px rgba(0,255,0,0.7);">
+                <b>Audit</b>
+            </div>
+        </div>
+        """
+        # --- Formatter ---
+        def _fmt(val):
+            if val is None:
+                return "N/A"
+            if isinstance(val, (int, float)) and not isinstance(val, bool):
+                return f"${val:,.2f}"
+            return str(val)
+        # --- Base explanation (structured) ---
+        lines = [
+            f"<p><b>Invoice:</b> {inv_id}</p>",
+            f"<p><b>Vendor:</b> {vendor}</p>",
+            f"<p><b>Amount:</b> {_fmt(total)}</p>",
+            f"<p><b>Status:</b> {status_val}</p>",
+            "<hr>",
+            f"<p><b>Validation:</b> {val_status or 'unknown'}</p>",
+            f"<p><b>Risk Level:</b> {risk_level or 'low'} ({risk_score})</p>",
+            f"<p><b>Payment:</b> {payment.get('decision', 'N/A')} ({payment_status or 'pending'})</p>",
+        ]
+        if discrepancies:
+            lines.append("<p><b>Discrepancies Found:</b></p><ul>")
+            for d in discrepancies:
+                field = d.get("field", "unknown")
+                expected = d.get("expected", "")
+                actual = d.get("actual", "")
+                lines.append(f"<li>{field}: expected <code>{expected}</code>, got <code>{actual}</code></li>")
+            lines.append("</ul>")
+        # --- Recommendations ---
+        advice = []
+        if str(val_status).lower() == "invalid":
+            advice.append("❌ Invoice failed validation — requires manual review.")
+        elif str(val_status).lower() in ("partial", "partial_match"):
+            advice.append("⚠️ Partial validation — check mismatched fields.")
+        if str(risk_level).lower() == "critical":
+            advice.append("🚨 Critical risk detected — immediate escalation required.")
+        elif str(risk_level).lower() == "medium":
+            advice.append("⚠️ Medium risk — consider manual review.")
+        if not advice:
+            advice.append("✅ No major issues detected. Proceed as usual.")
+        lines.append("<p><b>Recommendation:</b></p><ul>")
+        for a in advice:
+            lines.append(f"<li>{a}</li>")
+        lines.append("</ul>")
+        explanation_html = header_html + "\n".join(lines)
+        # --- Gemini polishing (using your API key) ---
+        if self.use_gemini:
+            try:
+                import google.generativeai as genai
+                model = genai.GenerativeModel("models/gemini-2.0-flash")
+                prompt = f"""
+    You are a professional financial analyst.
+    Here is structured invoice data and an auto-generated explanation.
+    Invoice summary:
+    {json.dumps(invoice, indent=2)}
+    Validation details: {json.dumps(validation, indent=2)}
+    Risk assessment: {json.dumps(risk, indent=2)}
+    Payment info: {json.dumps(payment, indent=2)}
+    Rewrite the following explanation to sound executive-level, clear, and concise.
+    Use HTML for sections but do not remove any factual details.
+    Existing summary:
+    {explanation_html}
+    """
+                response = model.generate_content(prompt)
+                if response and getattr(response, "text", None):
+                    return response.text.strip()
+            except Exception as e:
+                return explanation_html + f"<p><i>Gemini explanation failed: {e}</i></p>"
+        return explanation_html

Project/agents/validation_agent.py ADDED Viewed

	@@ -0,0 +1,357 @@

+"""Validation Agent for Invoice Processing"""
+# TODO: Implement agent
+import asyncio
+import os
+import pandas as pd
+from typing import Dict, Any, List, Tuple
+from fuzzywuzzy import fuzz
+import numpy as np
+import time
+from agents.base_agent import BaseAgent
+from state import (
+    InvoiceProcessingState, ValidationResult, ValidationStatus,
+    ProcessingStatus
+)
+from datetime import datetime, timedelta
+from utils.logger import StructuredLogger
+from difflib import SequenceMatcher
+class ValidationAgent(BaseAgent):
+    """Agent responsible for validating invoice data against purchase orders"""
+    health_history: List[Dict[str, Any]] = []  # global history for metrics
+    def __init__(self, config: Dict[str, Any] = None):
+        # pass
+        super().__init__(agent_name="validation_agent",config=config or {})
+        self.logger = StructuredLogger(__name__)
+        self.po_file = self.config.get("po_file","data/purchase_orders.csv")
+        self.tolerance = self.config.get("tolerance",0.05)
+        self.successful_executions = 0
+        self.failed_executions = 0
+        self.total_duration = 0.0
+        self.total_executions = 0
+        self.last_run = None
+        # self.match_threshold = self.config.get("match_threshold",80)
+    def _validate_preconditions(self, state: InvoiceProcessingState, workflow_type) -> bool:
+        # pass
+        if not state.invoice_data:
+            self.logger.logger.error("No invoice data available for validation.")
+            return False
+        return True
+    def _validate_postconditions(self, state: InvoiceProcessingState) -> bool:
+        # pass
+        return hasattr(state,'validation_result') and state.validation_result is not None
+    async def execute(self, state: InvoiceProcessingState, workflow_type) -> InvoiceProcessingState:
+        # pass
+        self.logger.logger.info(f"[ValidationAgent] Starting validation for {state.file_name}")
+        start_time = time.time()
+        try:
+            if not self._validate_preconditions(state, workflow_type):
+                state.status = ProcessingStatus.FAILED
+                self._log_decision(state,"Validation Failed","Precondition not met",confidence = 0.0)
+                return state
+            invoice_data = state.invoice_data
+            matching_pos = await self._find_matching_pos(invoice_data)
+            validation_result = await self._validate_against_pos(invoice_data,matching_pos)
+            state.validation_result = validation_result
+            state.current_agent = "validation_agent"
+            state.overall_status = ProcessingStatus.IN_PROGRESS
+            if self._should_escalate_validation(validation_result, invoice_data):
+                state.escalation_required = True
+            self._validate_postconditions(state)
+            self.successful_executions += 1
+            self.last_run = datetime.utcnow().isoformat()
+            # print("ValidationResult().confidence_score", state.validation_result.confidence_score)
+            self._log_decision(
+                state,
+                "Validation Successful",
+                "PDF text successfully validated and checked by AI",
+                state.validation_result.confidence_score,
+                state.process_id
+            )
+            return state
+        except Exception as e:
+            self.logger.logger.error(f"[ValidationAgent] Execution failed: {e}")
+            self.failed_executions += 1
+            state.overall_status = ProcessingStatus.FAILED
+            return state
+        finally:
+            duration = (time.time() - start_time) * 1000  # ms
+            self.total_executions += 1
+            self.total_duration += duration
+            self._record_health_metrics(duration)
+    def _load_purchase_orders(self) -> pd.DataFrame:
+        # pass
+        """load po data from csv"""
+        try:
+            df = pd.read_csv(self.po_file)
+            self.logger.logger.info(f"[ValidationAgent] Loaded {len(df)} purchase orders")
+            return df
+        except Exception as e:
+            self.logger.logger.error(f"[ValidationAgent] failed to load purchase order: {e}")
+            raise
+    async def _find_matching_pos(self, invoice_data) -> List[Dict[str, Any]]:
+        """find POs matching invoice order_id or fuzzy customer/items"""
+        po_df = self._load_purchase_orders()
+        matches = []
+        for _,po in po_df.iterrows():
+            customer_score = fuzz.token_sort_ratio(po["customer_name"], invoice_data.customer_name)
+            order_id_score = fuzz.token_sort_ratio(po["order_id"], invoice_data.order_id)
+            for item in invoice_data.item_details:
+                item_score = fuzz.token_sort_ratio(po["item_name"],item.item_name)
+                print(f"Compairing PO item {po['item_name']} with invoice item {item.item_name}: score = {item_score}")
+            if (customer_score >= 80) and (item_score >=80) and (order_id_score >=80) and (po['invoice_number'] == int(invoice_data.invoice_number)):
+                matches.append(po.to_dict())
+        print("matches.....", matches)
+        return matches
+    async def _validate_against_pos(self, invoice_data, matching_pos: List[Dict[str, Any]]) -> ValidationResult:
+        # pass
+        if not matching_pos:
+            return ValidationResult(po_found=False, validation_status='missing_po',validation_result='No matching purchase order found',
+            discrepancies = [],
+            confidence_score = 0.0)
+        po_data = matching_pos[0]
+        discrepancies = self._validate_item_against_po(invoice_data,po_data)
+        discrepancies += self._validate_totals(invoice_data,po_data)
+        actual_amount = [item.amount for item in invoice_data.item_details][0]
+        actual_quantity = [item.quantity for item in invoice_data.item_details][0]
+        actual_rate = [item.rate for item in invoice_data.item_details][0]
+        amount_diff = abs(actual_amount - po_data.get('expected_amount',0))
+        tolerance_limit = po_data.get('expected_amount',0)*self.tolerance
+        amount_match = amount_diff <= tolerance_limit
+        validation_result = ValidationResult(
+            po_found=True,
+            quantity_match=actual_quantity == po_data.get('quantity'),
+            rate_match=abs(actual_rate - po_data.get('rate', 0)) <= tolerance_limit,
+            amount_match=amount_match,
+            validation_status=ValidationStatus.NOT_STARTED,  # temporary
+            validation_result="; ".join(discrepancies) if discrepancies else "All checks passed",
+            discrepencies=discrepancies,
+            confidence_score=0.0,  # temporary
+            expected_amount=po_data.get('amount'),
+            po_data=po_data
+        )
+        validation_result.validation_status = self._determine_validation_status(validation_result)
+        validation_result.confidence_score = self._calculate_validation_confidence(validation_result, matching_pos, invoice_data)
+        return validation_result
+    def _validate_item_against_po(self, item, po_data: Dict[str, Any]) -> List[str]:
+        # pass
+        # print("itemmmmmmmmm", item.item_details.quantity)
+        print("po_-------------", po_data)
+        discrepancies = []
+        for item in item.item_details:
+            if item.quantity != po_data.get('quantity'):
+                discrepancies.append(f"Quantity mismatch: Expected {po_data['quantity']}, Found {item.quantity}")
+            if abs(item.rate - po_data.get('rate',0)) > po_data.get('rate',0)*self.tolerance:
+                discrepancies.append(f"Rate mismatch: Expected {po_data['rate']}, Found {item.rate}")
+            return discrepancies
+    def _validate_totals(self, invoice_data, po_data: Dict[str, Any]) -> List[str]:
+        # pass
+        discrepancies = []
+        expected = po_data.get('expected_amount',0)
+        actual = [item.amount for item in invoice_data.item_details][0]
+        diff = abs(expected-actual)
+        if diff > expected*self.tolerance:
+            discrepancies.append(f"Total amount mismatch: Expected {expected}, Actual {actual} (Difference:{diff:.2f})")
+        return discrepancies
+    def _calculate_validation_confidence(self, validation_result: ValidationResult,
+                                         matching_pos: List[Dict[str, Any]], invoice_data) -> float:
+        """
+        Compute an intelligent, weighted confidence score across 7 key dimensions:
+        invoice_number, order_id, customer_name, item_name, amount, rate, quantity.
+        Each field contributes based on importance.
+        """
+        if not validation_result.po_found or not matching_pos:
+            return 0.0
+        po_data = matching_pos[0]
+        # Extract PO (expected) values
+        expected = {
+            "invoice_number": po_data.get("invoice_number", ""),
+            "order_id": po_data.get("order_id", ""),
+            "customer_name": po_data.get("customer_name", ""),
+            "item_name": po_data.get("item_name", ""),
+            "amount": float(po_data.get("expected_amount", po_data.get("amount", 0))),
+            "rate": float(po_data.get("rate", 0)),
+            "quantity": float(po_data.get("quantity", 0))
+        }
+        # Extract actual (from invoice)
+        actual = {
+            "invoice_number": invoice_data.invoice_number,
+            "order_id": invoice_data.order_id,
+            "customer_name": invoice_data.customer_name,
+        }
+        # Handle line-item level (assuming single dominant item)
+        if invoice_data.item_details:
+            item = invoice_data.item_details[0]
+            actual.update({
+                "item_name": item.item_name,
+                "amount": float(item.amount or 0),
+                "rate": float(item.rate or 0),
+                "quantity": float(item.quantity or 0)
+            })
+        # Define weights intelligently (sum = 1)
+        weights = {
+            "invoice_number": 0.20,
+            "order_id": 0.15,
+            "customer_name": 0.05,
+            "item_name": 0.05,
+            "amount": 0.25,
+            "rate": 0.15,
+            "quantity": 0.15
+        }
+        # --- Similarity functions ---
+        def numeric_similarity(expected_val, actual_val):
+            if expected_val == 0:
+                return 1.0 if actual_val == 0 else 0.0
+            diff_ratio = abs(expected_val - actual_val) / (abs(expected_val) + 1e-6)
+            return max(0.0, 1.0 - diff_ratio)
+        def text_similarity(a, b):
+            return SequenceMatcher(None, str(a).lower(), str(b).lower()).ratio()
+        # --- Compute weighted similarities ---
+        weighted_scores = []
+        for field, weight in weights.items():
+            exp_val, act_val = expected.get(field), actual.get(field)
+            if isinstance(exp_val, (int, float)) and isinstance(act_val, (int, float)):
+                score = numeric_similarity(exp_val, act_val)
+            else:
+                score = text_similarity(exp_val, act_val)
+            weighted_scores.append(weight * score)
+        # Combine to final confidence
+        confidence = sum(weighted_scores)
+        confidence = round(confidence * 100, 2)  # convert to %
+        confidence = max(0.0, min(confidence, 100.0))  # clamp 0–100
+        self.logger.logger.debug(f"Validation Confidence (weighted): {confidence}%")
+        return confidence
+    def _determine_validation_status(self, validation_result: ValidationResult) -> ValidationStatus:
+        """
+        Determine the final validation status based on PO existence, discrepancies, and amount match.
+        """
+        if not validation_result.po_found:
+            return ValidationStatus.MISSING_PO
+        discrepancies_count = len(validation_result.discrepencies)
+        if discrepancies_count == 0 and validation_result.amount_match:
+            return ValidationStatus.VALID
+        if validation_result.amount_match and discrepancies_count <= 2:
+            return ValidationStatus.PARTIAL_MATCH
+        return ValidationStatus.INVALID
+    def _should_escalate_validation(self, validation_result: ValidationResult, invoice_data) -> bool:
+        # pass
+        return validation_result.validation_status in ['invalid','missing_po']
+    def _record_health_metrics(self, duration: float):
+        """Record the health metrics after each execution"""
+        success_rate = (
+            (self.successful_executions / self.total_executions) * 100
+            if self.total_executions > 0 else 0
+        )
+        avg_duration = (
+            self.total_duration / self.total_executions
+            if self.total_executions > 0 else 0
+        )
+        metrics = {
+            "Agent": "Validation Agent ✅",
+            "Executions": self.total_executions,
+            "Success Rate (%)": round(success_rate, 2),
+            "Avg Duration (ms)": round(avg_duration, 2),
+            "Total Failures": self.failed_executions,
+            # "Timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
+        }
+        metrics_data = {}
+        executions = 0
+        success_rate = 0.0
+        avg_duration = 0.0
+        failures = 0
+        last_run = None
+        if self.metrics:
+            print("self.metrics from validation agent", self.metrics)
+            executions = self.metrics["processed"]
+            print("executions.....", executions)
+            avg_duration = self.metrics["avg_latency_ms"]
+            failures = self.metrics["errors"]
+            last_run = self.metrics["last_run_at"]
+            print("last_run.....", last_run)
+            success_rate = (executions - failures) / (executions + 1e-6)
+        # if last_run == None:
+        last_run = self.last_run
+        # 3. Health logic
+        overall_status = "🟢 Healthy"
+        if failures > 3:
+            overall_status = "🟠 Degraded"
+        if executions > 0 and success_rate < 0.5:
+            overall_status = "🔴 Unhealthy"
+        print("metrics from val---....1", metrics)
+        metrics.update({
+            "Last Run": str(last_run) if last_run else "Not applicable",
+            "Overall Health": overall_status,
+        })
+        print("metrics from val---....", metrics)
+        # maintain up to last 50 records
+        ValidationAgent.health_history.append(metrics)
+        # ValidationAgent.health_history = ValidationAgent.health_history[-50:]
+    async def health_check(self) -> Dict[str, Any]:
+        """
+        Returns the health metrics summary for UI display.
+        """
+        await asyncio.sleep(0.05)
+        if not ValidationAgent.health_history:
+            return {
+                "Agent": "Validation Agent ✅",
+                "Executions": 0,
+                "Success Rate (%)": 0.0,
+                "Avg Duration (ms)": 0.0,
+                "Total Failures": 0,
+            }
+        latest = ValidationAgent.health_history[-1]
+        print("latest.....", latest)
+        return latest

Project/bounding_box.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import fitz  # PyMuPDF
+import pandas as pd
+import os
+import re
+# === File paths ===
+DATA_DIR = os.path.join(os.getcwd(), "data")
+PDF_PATH = os.path.join(DATA_DIR, "invoices/Invoice-26.pdf")  # Update for new PDF if needed
+CSV_PATH = os.path.join(DATA_DIR, "purchase_orders.csv")
+OUTPUT_PATH = os.path.join(DATA_DIR, "annotated_invoice.pdf")
+# === Field coordinate map (from your data) ===
+FIELD_BOXES = {
+    "invoice_number": (525, 55, 575, 75),
+    "order_id": (45, 470, 230, 490),
+    "customer_name": (40, 135, 100, 155),
+    "quantity": (370, 235, 385, 250),
+    "rate": (450, 235, 500, 250),
+    "expected_amount": (520, 360, 570, 375),
+}
+# === Step 1: Open PDF and extract text ===
+pdf = fitz.open(PDF_PATH)
+page = pdf[0]
+pdf_text = page.get_text()
+# === Step 2: Helper to extract fields ===
+def extract_field(pattern, text, group=1):
+    match = re.search(pattern, text, re.IGNORECASE)
+    return match.group(group).strip() if match else None
+# Extract key identifiers
+invoice_number_pdf = extract_field(r"#\s*(\d+)", pdf_text)
+order_id_pdf = extract_field(r"Order ID\s*[:\-]?\s*(\S+)", pdf_text)
+customer_name_pdf = extract_field(r"Bill To:\s*(.*)", pdf_text)
+# === Step 3: Read CSV and match correct row ===
+po_df = pd.read_csv(CSV_PATH)
+matched_row = po_df[
+    (po_df['invoice_number'].astype(str) == str(invoice_number_pdf))
+    | (po_df['order_id'] == order_id_pdf)
+]
+if matched_row.empty:
+    raise ValueError(f"No matching CSV row found for Invoice {invoice_number_pdf} / Order {order_id_pdf}")
+expected = matched_row.iloc[0].to_dict()
+expected = {k.lower(): str(v).strip() for k, v in expected.items()}
+print("✅ Loaded expected data from CSV for this PDF:")
+for k, v in expected.items():
+    print(f"   {k}: {v}")
+# === Step 4: Extract fields from PDF ===
+invoice_data = {
+    "invoice_number": invoice_number_pdf,
+    "customer_name": customer_name_pdf,
+    "order_id": order_id_pdf,
+}
+# Numeric fields
+amounts = re.findall(r"\$?([\d,]+\.\d{2})", pdf_text)
+invoice_data["expected_amount"] = amounts[-1] if amounts else None
+# Extract first item (quantity, rate)
+item_lines = re.findall(
+    r"([A-Za-z0-9 ,\-]+)\s+(\d+)\s+\$?([\d,]+\.\d{2})\s+\$?([\d,]+\.\d{2})",
+    pdf_text,
+)
+if item_lines:
+    invoice_data["quantity"] = item_lines[0][1]
+    invoice_data["rate"] = item_lines[0][2]
+print("\n✅ Extracted data from PDF:")
+for k, v in invoice_data.items():
+    print(f"   {k}: {v}")
+# === Step 5: Compare PDF vs CSV ===
+discrepancies = []
+def add_discrepancy(field, expected_val, found_val):
+    discrepancies.append({"field": field, "expected": expected_val, "found": found_val})
+# Compare string fields
+for field in ["invoice_number", "order_id", "customer_name"]:
+    if str(invoice_data.get(field, "")).strip() != str(expected.get(field, "")).strip():
+        add_discrepancy(field, expected.get(field, ""), invoice_data.get(field, ""))
+# Compare numeric fields
+for field in ["quantity", "rate", "expected_amount"]:
+    try:
+        found_val = float(str(invoice_data.get(field, 0)).replace(",", "").replace("$", ""))
+        expected_val = float(str(expected.get(field, 0)).replace(",", "").replace("$", ""))
+        if round(found_val, 2) != round(expected_val, 2):
+            add_discrepancy(field, expected_val, found_val)
+    except:
+        if str(invoice_data.get(field, "")) != str(expected.get(field, "")):
+            add_discrepancy(field, expected.get(field, ""), invoice_data.get(field, ""))
+# === Step 6: Annotate mismatched fields using fixed coordinates ===
+for d in discrepancies:
+    field = d["field"]
+    if field not in FIELD_BOXES:
+        print(f"⚠️ No coordinates found for field '{field}' — skipping annotation.")
+        continue
+    rect_coords = FIELD_BOXES[field]
+    rect = fitz.Rect(rect_coords)
+    expected_text = (
+        f"{float(d['expected']):,.2f}"
+        if field in ["quantity", "rate", "expected_amount"]
+        else str(d["expected"])
+    )
+    # Draw red bounding box
+    page.draw_rect(rect, color=(1, 0, 0), width=1.5)
+    # Add expected value below box
+    page.insert_text(
+        (rect.x0, rect.y1 + 10),
+        expected_text,
+        fontsize=9,
+        color=(1, 0, 0),
+    )
+pdf.save(OUTPUT_PATH)
+pdf.close()
+print("\n✅ Annotated invoice saved at:", OUTPUT_PATH)
+if discrepancies:
+    print("\n⚠️ Mismatches found:")
+    for d in discrepancies:
+        print(f" - {d['field']}: expected {d['expected']}, found {d['found']}")
+else:
+    print("\n✅ No mismatches found! Invoice matches CSV.")

Project/data/annotated_invoice.pdf ADDED Viewed

Binary file (15.8 kB). View file

Project/data/invoices/Invoice-01.pdf ADDED Viewed

Binary file (13.5 kB). View file

Project/data/invoices/Invoice-02.pdf ADDED Viewed

Binary file (13.3 kB). View file

Project/data/invoices/Invoice-03.pdf ADDED Viewed

Binary file (14.5 kB). View file

Project/data/invoices/Invoice-04.pdf ADDED Viewed

Binary file (14.3 kB). View file

Project/data/invoices/Invoice-05.pdf ADDED Viewed

Binary file (15.6 kB). View file

Project/data/invoices/Invoice-06.pdf ADDED Viewed

Binary file (14.1 kB). View file

Project/data/invoices/Invoice-07.pdf ADDED Viewed

Binary file (13.3 kB). View file

Project/data/invoices/Invoice-08.pdf ADDED Viewed

Binary file (14.4 kB). View file

Project/data/invoices/Invoice-09.pdf ADDED Viewed

Binary file (14.7 kB). View file

Project/data/invoices/Invoice-10.pdf ADDED Viewed

Binary file (14.2 kB). View file

Project/data/invoices/Invoice-11.pdf ADDED Viewed

Binary file (14.2 kB). View file

Project/data/invoices/Invoice-12.pdf ADDED Viewed

Binary file (13.7 kB). View file

Project/data/invoices/Invoice-13.pdf ADDED Viewed

Binary file (13.6 kB). View file

Project/data/invoices/Invoice-14.pdf ADDED Viewed

Binary file (13.3 kB). View file

Project/data/invoices/Invoice-15.pdf ADDED Viewed

Binary file (13.6 kB). View file

Project/data/invoices/Invoice-16.pdf ADDED Viewed

Binary file (14.2 kB). View file

Project/data/invoices/Invoice-17.pdf ADDED Viewed

Binary file (14 kB). View file

Project/data/invoices/Invoice-18.pdf ADDED Viewed

Binary file (13.6 kB). View file

Project/data/invoices/Invoice-19.pdf ADDED Viewed

Binary file (13.7 kB). View file

Project/data/invoices/Invoice-20.pdf ADDED Viewed

Binary file (13.9 kB). View file

Project/data/invoices/Invoice-21.pdf ADDED Viewed

Binary file (13.5 kB). View file

Project/data/invoices/Invoice-22.pdf ADDED Viewed

Binary file (14 kB). View file

Project/data/invoices/Invoice-23.pdf ADDED Viewed

Binary file (13.8 kB). View file

Project/data/invoices/Invoice-24.pdf ADDED Viewed

Binary file (14.2 kB). View file

Project/data/invoices/Invoice-25.pdf ADDED Viewed

Binary file (13.4 kB). View file

Project/data/invoices/Invoice-26.pdf ADDED Viewed

Binary file (13.7 kB). View file

Project/data/invoices/Invoice-27.pdf ADDED Viewed

Binary file (13.5 kB). View file

Project/data/invoices/Invoice-28.pdf ADDED Viewed

Binary file (14.2 kB). View file

Project/data/invoices/Invoice-29.pdf ADDED Viewed

Binary file (13.8 kB). View file

Project/data/invoices/Invoice-30.pdf ADDED Viewed

Binary file (13.9 kB). View file

Project/data/invoices/Invoice-31.pdf ADDED Viewed

Binary file (14 kB). View file

Project/data/invoices/Invoice-32.pdf ADDED Viewed

Binary file (13.7 kB). View file

Project/data/invoices/Invoice-33.pdf ADDED Viewed

Binary file (14 kB). View file

Project/data/invoices/Invoice-34.pdf ADDED Viewed

Binary file (13.9 kB). View file

Project/data/invoices/Invoice-35.pdf ADDED Viewed

Binary file (14 kB). View file

Project/data/invoices/Invoice-36.pdf ADDED Viewed

Binary file (13.6 kB). View file