parthib07 commited on
Commit
61411b5
·
verified ·
1 Parent(s): 83a98e9

Upload 52 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. __init__.py +2 -0
  2. agents/__init__.py +2 -0
  3. agents/__pycache__/__init__.cpython-311.pyc +0 -0
  4. agents/__pycache__/decision_agent.cpython-311.pyc +0 -0
  5. agents/__pycache__/extraction_agent.cpython-311.pyc +0 -0
  6. agents/__pycache__/reporting_agent.cpython-311.pyc +0 -0
  7. agents/__pycache__/validation_agent.cpython-311.pyc +0 -0
  8. agents/__pycache__/vendor_verification_agent.cpython-311.pyc +0 -0
  9. agents/decision_agent.py +53 -0
  10. agents/extraction_agent.py +61 -0
  11. agents/reporting_agent.py +45 -0
  12. agents/validation_agent.py +84 -0
  13. agents/vendor_verification_agent.py +59 -0
  14. app.py +17 -0
  15. embeddings/__init__.py +2 -0
  16. embeddings/__pycache__/__init__.cpython-311.pyc +0 -0
  17. embeddings/__pycache__/embedding_model.cpython-311.pyc +0 -0
  18. embeddings/embedding_model.py +28 -0
  19. llm.py +14 -0
  20. prompts/__init__.py +2 -0
  21. prompts/__pycache__/__init__.cpython-311.pyc +0 -0
  22. prompts/__pycache__/decision_prompt.cpython-311.pyc +0 -0
  23. prompts/__pycache__/extraction_prompt.cpython-311.pyc +0 -0
  24. prompts/__pycache__/reporting_prompt.cpython-311.pyc +0 -0
  25. prompts/__pycache__/validation_prompt.cpython-311.pyc +0 -0
  26. prompts/__pycache__/vendor_prompt.cpython-311.pyc +0 -0
  27. prompts/decision_prompt.py +44 -0
  28. prompts/extraction_prompt.py +53 -0
  29. prompts/reporting_prompt.py +40 -0
  30. prompts/validation_prompt.py +57 -0
  31. prompts/vendor_prompt.py +39 -0
  32. tools/__init__.py +2 -0
  33. tools/__pycache__/__init__.cpython-311.pyc +0 -0
  34. tools/__pycache__/erp_tool.cpython-311.pyc +0 -0
  35. tools/__pycache__/web_search_tool.cpython-311.pyc +0 -0
  36. tools/erp_tool.py +27 -0
  37. tools/web_search_tool.py +36 -0
  38. ui/__init__.py +2 -0
  39. ui/__pycache__/__init__.cpython-311.pyc +0 -0
  40. ui/__pycache__/streamlit_dashboard.cpython-311.pyc +0 -0
  41. ui/streamlit_dashboard.py +413 -0
  42. utils.py +87 -0
  43. vectorstore/__init__.py +2 -0
  44. vectorstore/__pycache__/__init__.cpython-311.pyc +0 -0
  45. vectorstore/__pycache__/pinecone_client.cpython-311.pyc +0 -0
  46. vectorstore/pinecone_client.py +141 -0
  47. workflow/__init__.py +2 -0
  48. workflow/__pycache__/__init__.cpython-311.pyc +0 -0
  49. workflow/__pycache__/graph_builder.cpython-311.pyc +0 -0
  50. workflow/__pycache__/state_schema.cpython-311.pyc +0 -0
__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """AI Business Process Automation Agent package."""
2
+
agents/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """Agent implementations."""
2
+
agents/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (192 Bytes). View file
 
agents/__pycache__/decision_agent.cpython-311.pyc ADDED
Binary file (3.05 kB). View file
 
agents/__pycache__/extraction_agent.cpython-311.pyc ADDED
Binary file (3.36 kB). View file
 
agents/__pycache__/reporting_agent.cpython-311.pyc ADDED
Binary file (3.15 kB). View file
 
agents/__pycache__/validation_agent.cpython-311.pyc ADDED
Binary file (4.93 kB). View file
 
agents/__pycache__/vendor_verification_agent.cpython-311.pyc ADDED
Binary file (3.45 kB). View file
 
agents/decision_agent.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ from typing import Any, Dict
6
+
7
+ from langchain_core.messages import HumanMessage
8
+
9
+ from ai_business_automation_agent.prompts.decision_prompt import DECISION_PROMPT
10
+ from ai_business_automation_agent.utils import append_agent_log, parse_llm_json
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def run_decision_agent(state: Dict[str, Any], llm) -> Dict[str, Any]:
16
+ validation = state.get("validation_status") or {}
17
+ vendor_ver = state.get("vendor_verification") or {}
18
+
19
+ prompt = DECISION_PROMPT.format(
20
+ validation_json=json.dumps(validation, ensure_ascii=False),
21
+ vendor_verification_json=json.dumps(vendor_ver, ensure_ascii=False),
22
+ )
23
+ resp = llm.invoke([HumanMessage(content=prompt)])
24
+ text = getattr(resp, "content", str(resp))
25
+ parsed, err = parse_llm_json(text)
26
+
27
+ updates: Dict[str, Any] = {}
28
+ if err:
29
+ logger.warning("Decision JSON parse error: %s", err)
30
+ updates["decision"] = {
31
+ "decision": "manual_review",
32
+ "reason": f"Parsing failed: {err}",
33
+ "routing": {"requires_human_review": True, "queue": "ap"},
34
+ "raw_model_output": text,
35
+ }
36
+ updates.update(append_agent_log(state, agent="decision", event="error", payload={"error": err}))
37
+ else:
38
+ updates["decision"] = parsed
39
+ updates.update(append_agent_log(state, agent="decision", event="ok", payload=parsed))
40
+
41
+ updates.update(append_agent_log(state, agent="decision", event="prompt", payload={"prompt": prompt}))
42
+ updates.update(append_agent_log(state, agent="decision", event="raw_response", payload={"text": text}))
43
+ return updates
44
+
45
+
46
+ def decision_route(state: Dict[str, Any]) -> str:
47
+ decision = (state.get("decision") or {}).get("decision")
48
+ if decision == "approved":
49
+ return "approved"
50
+ if decision == "manual_review":
51
+ return "manual_review"
52
+ return "rejected"
53
+
agents/extraction_agent.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ from typing import Any, Dict
6
+
7
+ from langchain_core.messages import HumanMessage
8
+
9
+ from ai_business_automation_agent.prompts.extraction_prompt import EXTRACTION_PROMPT
10
+ from ai_business_automation_agent.utils import append_agent_log, parse_llm_json
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def run_extraction_agent(state: Dict[str, Any], llm) -> Dict[str, Any]:
16
+ email_content = state.get("email_content", "")
17
+ prompt = EXTRACTION_PROMPT.format(email_content=email_content)
18
+
19
+ msg = HumanMessage(content=prompt)
20
+ resp = llm.invoke([msg])
21
+ text = getattr(resp, "content", str(resp))
22
+
23
+ parsed, err = parse_llm_json(text)
24
+ updates: Dict[str, Any] = {}
25
+ if err:
26
+ logger.warning("Extraction JSON parse error: %s", err)
27
+ updates["extracted_data"] = {
28
+ "invoice": {},
29
+ "vendor": {},
30
+ "extraction_confidence": "low",
31
+ "notes": f"Parsing failed: {err}",
32
+ "raw_model_output": text,
33
+ }
34
+ updates.update(append_agent_log(state, agent="extraction", event="error", payload={"error": err}))
35
+ else:
36
+ updates["extracted_data"] = parsed
37
+ updates.update(append_agent_log(state, agent="extraction", event="ok", payload=parsed))
38
+
39
+ updates.update(append_agent_log(state, agent="extraction", event="prompt", payload={"prompt": prompt}))
40
+ updates.update(append_agent_log(state, agent="extraction", event="raw_response", payload={"text": text}))
41
+ return updates
42
+
43
+
44
+ def compact_extracted_summary(extracted: Dict[str, Any]) -> str:
45
+ try:
46
+ invoice = extracted.get("invoice", {})
47
+ vendor = extracted.get("vendor", {})
48
+ return json.dumps(
49
+ {
50
+ "invoice_number": invoice.get("invoice_number"),
51
+ "invoice_date": invoice.get("invoice_date"),
52
+ "total": invoice.get("total"),
53
+ "currency": invoice.get("currency"),
54
+ "vendor_name": vendor.get("name"),
55
+ "vendor_website": vendor.get("website"),
56
+ },
57
+ ensure_ascii=False,
58
+ )
59
+ except Exception:
60
+ return "{}"
61
+
agents/reporting_agent.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ from typing import Any, Dict
6
+
7
+ from langchain_core.messages import HumanMessage
8
+
9
+ from ai_business_automation_agent.prompts.reporting_prompt import REPORTING_PROMPT
10
+ from ai_business_automation_agent.utils import append_agent_log, parse_llm_json
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def run_reporting_agent(state: Dict[str, Any], llm) -> Dict[str, Any]:
16
+ prompt = REPORTING_PROMPT.format(
17
+ email_content=state.get("email_content", ""),
18
+ extracted_json=json.dumps(state.get("extracted_data") or {}, ensure_ascii=False),
19
+ vendor_verification_json=json.dumps(state.get("vendor_verification") or {}, ensure_ascii=False),
20
+ validation_json=json.dumps(state.get("validation_status") or {}, ensure_ascii=False),
21
+ decision_json=json.dumps(state.get("decision") or {}, ensure_ascii=False),
22
+ erp_json=json.dumps(state.get("erp_update_status") or {}, ensure_ascii=False),
23
+ )
24
+ resp = llm.invoke([HumanMessage(content=prompt)])
25
+ text = getattr(resp, "content", str(resp))
26
+ parsed, err = parse_llm_json(text)
27
+
28
+ updates: Dict[str, Any] = {}
29
+ if err:
30
+ logger.warning("Reporting JSON parse error: %s", err)
31
+ updates["report"] = (
32
+ "REPORT GENERATION FAILED\n\n"
33
+ f"Error: {err}\n\n"
34
+ "Raw model output:\n"
35
+ f"{text}"
36
+ )
37
+ updates.update(append_agent_log(state, agent="reporting", event="error", payload={"error": err}))
38
+ else:
39
+ updates["report"] = parsed.get("report", "")
40
+ updates.update(append_agent_log(state, agent="reporting", event="ok", payload=parsed))
41
+
42
+ updates.update(append_agent_log(state, agent="reporting", event="prompt", payload={"prompt": prompt}))
43
+ updates.update(append_agent_log(state, agent="reporting", event="raw_response", payload={"text": text}))
44
+ return updates
45
+
agents/validation_agent.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ from typing import Any, Dict, List
7
+
8
+ from langchain_core.messages import HumanMessage
9
+
10
+ from ai_business_automation_agent.prompts.validation_prompt import VALIDATION_PROMPT
11
+ from ai_business_automation_agent.utils import append_agent_log, parse_llm_json
12
+ from ai_business_automation_agent.vectorstore.pinecone_client import PineconeVectorStore
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def _format_policy_context(chunks: List[Dict[str, Any]]) -> str:
18
+ if not chunks:
19
+ return "No policy context available."
20
+ lines = []
21
+ for c in chunks:
22
+ score = c.get("score")
23
+ text = (c.get("text") or "").strip()
24
+ if text:
25
+ lines.append(f"- (score={score}) {text}")
26
+ return "\n".join(lines).strip() or "No policy context available."
27
+
28
+
29
+ def run_validation_agent(state: Dict[str, Any], llm) -> Dict[str, Any]:
30
+ extracted = state.get("extracted_data") or {}
31
+ vendor_ver = state.get("vendor_verification") or {}
32
+
33
+ policy_context = "No policy context available."
34
+ try:
35
+ vs = PineconeVectorStore(namespace="policies")
36
+ if os.getenv("SEED_VECTORSTORE", "true").lower() in {"1", "true", "yes"}:
37
+ vs.seed_default_policies()
38
+ query = json.dumps(
39
+ {
40
+ "invoice": extracted.get("invoice", {}),
41
+ "vendor": extracted.get("vendor", {}),
42
+ "vendor_verification": vendor_ver,
43
+ },
44
+ ensure_ascii=False,
45
+ )
46
+ chunks = vs.retrieve(query, top_k=5)
47
+ policy_context = _format_policy_context(chunks)
48
+ rag_payload = {"retrieved": chunks}
49
+ except Exception as e:
50
+ logger.warning("Pinecone retrieval unavailable: %s", e)
51
+ rag_payload = {"error": str(e)}
52
+
53
+ prompt = VALIDATION_PROMPT.format(
54
+ extracted_json=json.dumps(extracted, ensure_ascii=False),
55
+ vendor_verification_json=json.dumps(vendor_ver, ensure_ascii=False),
56
+ policy_context=policy_context,
57
+ )
58
+ resp = llm.invoke([HumanMessage(content=prompt)])
59
+ text = getattr(resp, "content", str(resp))
60
+ parsed, err = parse_llm_json(text)
61
+
62
+ updates: Dict[str, Any] = {}
63
+ if err:
64
+ logger.warning("Validation JSON parse error: %s", err)
65
+ updates["validation_status"] = {
66
+ "status": "needs_review",
67
+ "issues": [{"code": "PARSING_ERROR", "severity": "high", "message": err}],
68
+ "compliance_flags": [],
69
+ "validated_fields": {},
70
+ "recommendation": "manual_review",
71
+ "raw_model_output": text,
72
+ "rag": rag_payload,
73
+ }
74
+ updates.update(append_agent_log(state, agent="validation", event="error", payload={"error": err}))
75
+ else:
76
+ parsed["rag"] = rag_payload
77
+ updates["validation_status"] = parsed
78
+ updates.update(append_agent_log(state, agent="validation", event="ok", payload=parsed))
79
+
80
+ updates.update(append_agent_log(state, agent="validation", event="rag", payload=rag_payload))
81
+ updates.update(append_agent_log(state, agent="validation", event="prompt", payload={"prompt": prompt}))
82
+ updates.update(append_agent_log(state, agent="validation", event="raw_response", payload={"text": text}))
83
+ return updates
84
+
agents/vendor_verification_agent.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ from typing import Any, Dict, Optional
6
+
7
+ from langchain_core.messages import HumanMessage
8
+
9
+ from ai_business_automation_agent.prompts.vendor_prompt import VENDOR_VERIFICATION_PROMPT
10
+ from ai_business_automation_agent.tools.web_search_tool import TavilyWebSearchTool
11
+ from ai_business_automation_agent.utils import append_agent_log, parse_llm_json
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def run_vendor_verification_agent(
17
+ state: Dict[str, Any], llm, web_search: Optional[TavilyWebSearchTool]
18
+ ) -> Dict[str, Any]:
19
+ extracted = state.get("extracted_data") or {}
20
+ vendor = extracted.get("vendor") or {}
21
+ vendor_name = vendor.get("name") or "unknown vendor"
22
+
23
+ query = f"{vendor_name} company legitimacy business registration"
24
+ if web_search is None:
25
+ web_summary = "Tavily not configured; no web verification performed."
26
+ else:
27
+ search_raw = web_search.search(query=query, max_results=5)
28
+ web_summary = web_search.summarize(search_raw)
29
+
30
+ prompt = VENDOR_VERIFICATION_PROMPT.format(
31
+ vendor_json=json.dumps(vendor, ensure_ascii=False),
32
+ web_summary=web_summary,
33
+ )
34
+ resp = llm.invoke([HumanMessage(content=prompt)])
35
+ text = getattr(resp, "content", str(resp))
36
+ parsed, err = parse_llm_json(text)
37
+
38
+ updates: Dict[str, Any] = {}
39
+ if err:
40
+ logger.warning("Vendor verification JSON parse error: %s", err)
41
+ updates["vendor_verification"] = {
42
+ "status": "flagged",
43
+ "risk_score": 5,
44
+ "reason": f"Parsing failed: {err}",
45
+ "evidence_summary": "Vendor verification could not be reliably parsed; defaulting to manual review.",
46
+ "recommended_action": "manual_review",
47
+ "raw_model_output": text,
48
+ "web_search": {"query": query, "summary": web_summary},
49
+ }
50
+ updates.update(append_agent_log(state, agent="vendor_verification", event="error", payload={"error": err}))
51
+ else:
52
+ parsed["web_search"] = {"query": query, "summary": web_summary}
53
+ updates["vendor_verification"] = parsed
54
+ updates.update(append_agent_log(state, agent="vendor_verification", event="ok", payload=parsed))
55
+
56
+ updates.update(append_agent_log(state, agent="vendor_verification", event="prompt", payload={"prompt": prompt}))
57
+ updates.update(append_agent_log(state, agent="vendor_verification", event="raw_response", payload={"text": text}))
58
+ return updates
59
+
app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ # When running `streamlit run ai_business_automation_agent/app.py`, Python's sys.path
7
+ # may not include the project root, so absolute package imports can fail.
8
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent
9
+ if str(PROJECT_ROOT) not in sys.path:
10
+ sys.path.insert(0, str(PROJECT_ROOT))
11
+
12
+ from ai_business_automation_agent.ui.streamlit_dashboard import main
13
+
14
+
15
+ if __name__ == "__main__":
16
+ main()
17
+
embeddings/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """Embedding model utilities."""
2
+
embeddings/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (200 Bytes). View file
 
embeddings/__pycache__/embedding_model.cpython-311.pyc ADDED
Binary file (1.77 kB). View file
 
embeddings/embedding_model.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from functools import lru_cache
5
+ from typing import List
6
+
7
+ from sentence_transformers import SentenceTransformer
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ @lru_cache(maxsize=1)
13
+ def get_embedding_model(model_name: str = "sentence-transformers/all-MiniLM-L6-v2") -> SentenceTransformer:
14
+ """
15
+ Return a cached SentenceTransformers model instance.
16
+
17
+ Note: loading the model can be slow; caching keeps Streamlit responsive.
18
+ """
19
+
20
+ logger.info("Loading embedding model: %s", model_name)
21
+ return SentenceTransformer(model_name)
22
+
23
+
24
+ def embed_texts(texts: List[str], model_name: str = "sentence-transformers/all-MiniLM-L6-v2") -> List[List[float]]:
25
+ model = get_embedding_model(model_name=model_name)
26
+ vectors = model.encode(texts, normalize_embeddings=True)
27
+ return [v.tolist() for v in vectors]
28
+
llm.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from functools import lru_cache
5
+ from langchain_groq import ChatGroq
6
+
7
+
8
+ @lru_cache(maxsize=1)
9
+ def get_groq_llm(model: str = "llama-3.3-70b-versatile", temperature: float = 0.0) -> ChatGroq:
10
+ api_key = os.getenv("GROQ_API_KEY", "")
11
+ if not api_key:
12
+ raise ValueError("Missing GROQ_API_KEY.")
13
+ return ChatGroq(model=model, temperature=temperature, api_key=api_key)
14
+
prompts/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """Prompt templates for agents."""
2
+
prompts/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (199 Bytes). View file
 
prompts/__pycache__/decision_prompt.cpython-311.pyc ADDED
Binary file (1.2 kB). View file
 
prompts/__pycache__/extraction_prompt.cpython-311.pyc ADDED
Binary file (1.6 kB). View file
 
prompts/__pycache__/reporting_prompt.cpython-311.pyc ADDED
Binary file (844 Bytes). View file
 
prompts/__pycache__/validation_prompt.cpython-311.pyc ADDED
Binary file (1.68 kB). View file
 
prompts/__pycache__/vendor_prompt.cpython-311.pyc ADDED
Binary file (1.38 kB). View file
 
prompts/decision_prompt.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DECISION_PROMPT = """\
2
+ SYSTEM ROLE
3
+ You are the enterprise Financial Decision Agent.
4
+
5
+ OBJECTIVE
6
+ Determine whether an invoice should be approved, rejected, or sent for manual review.
7
+
8
+ CONTEXT
9
+ The decision should be deterministic and auditable.
10
+
11
+ INSTRUCTIONS
12
+ - Output MUST be strict JSON (no markdown, no extra text).
13
+ - Use these deterministic decision rules:
14
+
15
+ DECISION RULES
16
+ - APPROVED:
17
+ - validation_status.status == "pass"
18
+ - vendor_verification.status == "verified"
19
+ - MANUAL_REVIEW:
20
+ - vendor_verification.status == "flagged"
21
+ - OR validation_status.status == "needs_review"
22
+ - OR vendor evidence is insufficient/ambiguous
23
+ - REJECTED:
24
+ - vendor_verification.status == "suspicious"
25
+ - OR validation_status.status == "fail"
26
+
27
+ INPUT
28
+ validation_status:
29
+ {validation_json}
30
+
31
+ vendor_verification:
32
+ {vendor_verification_json}
33
+
34
+ OUTPUT FORMAT (STRICT JSON)
35
+ {{
36
+ "decision": "approved|manual_review|rejected",
37
+ "reason": "string",
38
+ "routing": {{
39
+ "requires_human_review": true,
40
+ "queue": "ap|compliance|vendor_management|none"
41
+ }}
42
+ }}
43
+ """
44
+
prompts/extraction_prompt.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ EXTRACTION_PROMPT = """\
2
+ SYSTEM ROLE
3
+ You are an enterprise-grade Invoice Data Extraction Agent.
4
+
5
+ OBJECTIVE
6
+ Extract structured invoice fields from unstructured business email or invoice text.
7
+
8
+ CONTEXT
9
+ The user provides the full email body and/or invoice text. You must extract fields reliably and conservatively.
10
+
11
+ INSTRUCTIONS
12
+ - Output MUST be strict JSON (no markdown, no extra text).
13
+ - If a field is missing, set it to null.
14
+ - Do not hallucinate addresses, tax IDs, or totals.
15
+ - Normalize dates to ISO 8601 if possible (YYYY-MM-DD). Otherwise null.
16
+ - Currency should be a 3-letter code when known (e.g., USD, EUR, INR), otherwise null.
17
+
18
+ INPUT
19
+ {email_content}
20
+
21
+ OUTPUT FORMAT (STRICT JSON)
22
+ {{
23
+ "invoice": {{
24
+ "invoice_number": "string|null",
25
+ "invoice_date": "YYYY-MM-DD|null",
26
+ "due_date": "YYYY-MM-DD|null",
27
+ "currency": "string|null",
28
+ "subtotal": "number|null",
29
+ "tax": "number|null",
30
+ "total": "number|null",
31
+ "purchase_order_number": "string|null",
32
+ "line_items": [
33
+ {{
34
+ "description": "string|null",
35
+ "quantity": "number|null",
36
+ "unit_price": "number|null",
37
+ "amount": "number|null"
38
+ }}
39
+ ]
40
+ }},
41
+ "vendor": {{
42
+ "name": "string|null",
43
+ "email": "string|null",
44
+ "phone": "string|null",
45
+ "address": "string|null",
46
+ "website": "string|null",
47
+ "tax_id": "string|null"
48
+ }},
49
+ "extraction_confidence": "low|medium|high",
50
+ "notes": "string"
51
+ }}
52
+ """
53
+
prompts/reporting_prompt.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ REPORTING_PROMPT = """\
2
+ SYSTEM ROLE
3
+ You are an enterprise Reporting Agent.
4
+
5
+ OBJECTIVE
6
+ Generate a professional, executive-ready report of the invoice processing outcome.
7
+
8
+ CONTEXT
9
+ The report will be shown in a dashboard and stored for audit.
10
+
11
+ INSTRUCTIONS
12
+ - Output MUST be strict JSON (no markdown, no extra text).
13
+ - Keep it concise, clear, and business-friendly.
14
+ - Include a short "Next steps" section.
15
+
16
+ INPUT
17
+ email_content:
18
+ {email_content}
19
+
20
+ extracted_data:
21
+ {extracted_json}
22
+
23
+ vendor_verification:
24
+ {vendor_verification_json}
25
+
26
+ validation_status:
27
+ {validation_json}
28
+
29
+ decision:
30
+ {decision_json}
31
+
32
+ erp_update_status:
33
+ {erp_json}
34
+
35
+ OUTPUT FORMAT (STRICT JSON)
36
+ {{
37
+ "report": "string"
38
+ }}
39
+ """
40
+
prompts/validation_prompt.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ VALIDATION_PROMPT = """\
2
+ SYSTEM ROLE
3
+ You are an enterprise Invoice Validation & Compliance Agent.
4
+
5
+ OBJECTIVE
6
+ Validate extracted invoice fields against business rules and compliance policies.
7
+
8
+ CONTEXT
9
+ You will receive:
10
+ - extracted invoice data
11
+ - vendor verification result
12
+ - retrieved policy/compliance context (RAG)
13
+
14
+ INSTRUCTIONS
15
+ - Output MUST be strict JSON (no markdown, no extra text).
16
+ - Apply the provided policy context. If a rule isn't mentioned, do not invent it.
17
+ - Validate: presence of key fields, total consistency (subtotal + tax ≈ total), and vendor risk.
18
+ - If totals are present, allow small rounding tolerance up to 0.02.
19
+ - Vendor risk interpretation:
20
+ - vendor_verification.status == "verified": proceed normally
21
+ - "flagged": bias towards needs_review unless everything else is clean
22
+ - "suspicious": bias towards fail unless policy context explicitly allows proceeding
23
+
24
+ INPUT
25
+ extracted_data:
26
+ {extracted_json}
27
+
28
+ vendor_verification:
29
+ {vendor_verification_json}
30
+
31
+ policy_context:
32
+ {policy_context}
33
+
34
+ OUTPUT FORMAT (STRICT JSON)
35
+ {{
36
+ "status": "pass|fail|needs_review",
37
+ "issues": [
38
+ {{
39
+ "code": "string",
40
+ "severity": "low|medium|high",
41
+ "message": "string"
42
+ }}
43
+ ],
44
+ "compliance_flags": [
45
+ "string"
46
+ ],
47
+ "validated_fields": {{
48
+ "invoice_number_present": true,
49
+ "invoice_date_present": false,
50
+ "vendor_name_present": true,
51
+ "total_present": true,
52
+ "total_consistency": "ok|mismatch|unknown"
53
+ }},
54
+ "recommendation": "approve|reject|manual_review"
55
+ }}
56
+ """
57
+
prompts/vendor_prompt.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ VENDOR_VERIFICATION_PROMPT = """\
2
+ SYSTEM ROLE
3
+ You are an enterprise Vendor Verification Agent.
4
+
5
+ OBJECTIVE
6
+ Assess vendor legitimacy using third-party search evidence and the extracted vendor identity.
7
+
8
+ CONTEXT
9
+ You will be provided:
10
+ - extracted vendor fields (may be incomplete)
11
+ - a summarized web search result set
12
+
13
+ INSTRUCTIONS
14
+ - Output MUST be strict JSON (no markdown, no extra text).
15
+ - Base your assessment on evidence in the search summary.
16
+ - Company names may appear in different but equivalent formats. Treat these as matches:
17
+ - capitalization differences (NetCore vs netcore)
18
+ - abbreviations (Pvt Ltd ≈ Private Limited, Inc ≈ Incorporated, LLC)
19
+ - punctuation differences and minor spacing
20
+ - Only mark a vendor as suspicious when there is clear negative evidence (scam/fraud reports, blacklists, fake registration).
21
+ - If evidence is insufficient, mark status as "flagged" (manual review) and explain what is missing.
22
+
23
+ INPUT
24
+ vendor:
25
+ {vendor_json}
26
+
27
+ web_search_summary:
28
+ {web_summary}
29
+
30
+ OUTPUT FORMAT (STRICT JSON)
31
+ {{
32
+ "status": "verified|flagged|suspicious",
33
+ "risk_score": 1,
34
+ "reason": "short explanation",
35
+ "evidence_summary": "string",
36
+ "recommended_action": "proceed|manual_review|block"
37
+ }}
38
+ """
39
+
tools/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """External tools used by agents."""
2
+
tools/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (199 Bytes). View file
 
tools/__pycache__/erp_tool.cpython-311.pyc ADDED
Binary file (1.52 kB). View file
 
tools/__pycache__/web_search_tool.cpython-311.pyc ADDED
Binary file (2.81 kB). View file
 
tools/erp_tool.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any, Dict
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ def simulate_erp_update(extracted_data: Dict[str, Any]) -> Dict[str, Any]:
10
+ """
11
+ Simulate an ERP update.
12
+
13
+ In production, replace with a real ERP connector (SAP/Oracle/Dynamics) and robust idempotency keys.
14
+ """
15
+
16
+ invoice = (extracted_data or {}).get("invoice", {}) if isinstance(extracted_data, dict) else {}
17
+ vendor = (extracted_data or {}).get("vendor", {}) if isinstance(extracted_data, dict) else {}
18
+ invoice_number = invoice.get("invoice_number")
19
+ vendor_name = vendor.get("name")
20
+
21
+ logger.info("Simulating ERP update for invoice=%s vendor=%s", invoice_number, vendor_name)
22
+ return {
23
+ "status": "updated",
24
+ "erp_reference_id": f"ERP-SIM-{invoice_number or 'UNKNOWN'}",
25
+ "message": "ERP update simulated successfully.",
26
+ }
27
+
tools/web_search_tool.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from tavily import TavilyClient
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class TavilyWebSearchTool:
13
+ def __init__(self, api_key: Optional[str] = None) -> None:
14
+ api_key = api_key or os.getenv("TAVILY_API_KEY", "")
15
+ if not api_key:
16
+ raise ValueError("Missing TAVILY_API_KEY.")
17
+ self._client = TavilyClient(api_key=api_key)
18
+
19
+ def search(self, query: str, *, max_results: int = 5) -> Dict[str, Any]:
20
+ logger.info("Tavily search: %s", query)
21
+ res = self._client.search(query=query, max_results=max_results)
22
+ return res
23
+
24
+ @staticmethod
25
+ def summarize(search_result: Dict[str, Any]) -> str:
26
+ results: List[Dict[str, Any]] = search_result.get("results", []) or []
27
+ lines = []
28
+ for r in results[:8]:
29
+ title = r.get("title") or ""
30
+ url = r.get("url") or ""
31
+ content = (r.get("content") or "").strip()
32
+ if len(content) > 400:
33
+ content = content[:400] + "..."
34
+ lines.append(f"- {title} ({url})\n {content}")
35
+ return "\n".join(lines).strip() or "No results."
36
+
ui/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """Streamlit UI."""
2
+
ui/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (179 Bytes). View file
 
ui/__pycache__/streamlit_dashboard.cpython-311.pyc ADDED
Binary file (19.6 kB). View file
 
ui/streamlit_dashboard.py ADDED
@@ -0,0 +1,413 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ from typing import Any, Dict, Optional
7
+
8
+ import streamlit as st
9
+
10
+ from ai_business_automation_agent.utils import load_environment, setup_logging
11
+ from ai_business_automation_agent.workflow.graph_builder import run_workflow
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def _read_uploaded_text(upload) -> Optional[str]:
17
+ if upload is None:
18
+ return None
19
+ raw = upload.read()
20
+ if not raw:
21
+ return None
22
+ try:
23
+ return raw.decode("utf-8")
24
+ except Exception:
25
+ try:
26
+ return raw.decode("latin-1")
27
+ except Exception:
28
+ return None
29
+
30
+
31
+ def _status_badge(label: str, status: str) -> None:
32
+ color = {
33
+ "ok": "green",
34
+ "pass": "green",
35
+ "approved": "green",
36
+ "updated": "green",
37
+ "needs_review": "orange",
38
+ "unknown": "orange",
39
+ "flagged": "orange",
40
+ "manual_review": "orange",
41
+ "suspicious": "red",
42
+ "fail": "red",
43
+ "rejected": "red",
44
+ "failed": "red",
45
+ }.get(status, "gray")
46
+ st.markdown(
47
+ f"""
48
+ <div class="status-badge">
49
+ <span class="status-dot" style="background:{color};"></span>
50
+ <span class="status-label">{label}</span>
51
+ <span class="status-text">({status})</span>
52
+ </div>
53
+ """,
54
+ unsafe_allow_html=True,
55
+ )
56
+
57
+
58
+ def _render_pipeline_timeline(
59
+ extracted: Dict[str, Any],
60
+ vendor_ver: Dict[str, Any],
61
+ validation: Dict[str, Any],
62
+ decision: Dict[str, Any],
63
+ erp: Dict[str, Any],
64
+ ) -> None:
65
+ """Render a horizontal stepper for the LangGraph pipeline."""
66
+
67
+ def step_state(is_done: bool, is_current: bool) -> str:
68
+ if is_current:
69
+ return "current"
70
+ return "done" if is_done else "pending"
71
+
72
+ steps = [
73
+ ("Extraction", bool(extracted)),
74
+ ("Vendor", bool(vendor_ver)),
75
+ ("Validation", bool(validation)),
76
+ ("Decision", bool(decision)),
77
+ ("ERP/Report", bool(erp) or bool(decision)),
78
+ ]
79
+
80
+ # Determine current step: first not-done, otherwise last.
81
+ current_idx = 0
82
+ for i, (_, done) in enumerate(steps):
83
+ if not done:
84
+ current_idx = i
85
+ break
86
+ else:
87
+ current_idx = len(steps) - 1
88
+
89
+ items = []
90
+ for idx, (label, done) in enumerate(steps):
91
+ state = step_state(done, idx == current_idx)
92
+ items.append(f'<div class="step step-{state}"><div class="step-dot"></div><div class="step-label">{label}</div></div>')
93
+ if idx < len(steps) - 1:
94
+ items.append('<div class="step-connector"></div>')
95
+
96
+ html = '<div class="pipeline-timeline">' + "".join(items) + "</div>"
97
+ st.markdown(html, unsafe_allow_html=True)
98
+
99
+
100
+ def main() -> None:
101
+ load_environment()
102
+ setup_logging()
103
+
104
+ st.set_page_config(page_title="AI Business Process Automation Agent", layout="wide")
105
+
106
+ # Global lightweight styling
107
+ st.markdown(
108
+ """
109
+ <style>
110
+ /* App background + typography */
111
+ .stApp {{
112
+ background: radial-gradient(circle at top, #1f2937 0, #020617 42%, #020617 100%);
113
+ color: #e5e7eb;
114
+ }}
115
+
116
+ /* Center container width */
117
+ .block-container {{
118
+ max-width: 1180px;
119
+ padding-top: 1.2rem;
120
+ }}
121
+
122
+ /* Status badges */
123
+ .status-badge {{
124
+ display: flex;
125
+ gap: 0.4rem;
126
+ align-items: center;
127
+ margin: 0.2rem 0 0.6rem 0;
128
+ font-size: 0.86rem;
129
+ }}
130
+ .status-dot {{
131
+ width: 10px;
132
+ height: 10px;
133
+ border-radius: 999px;
134
+ display: inline-block;
135
+ }}
136
+ .status-label {{
137
+ font-weight: 600;
138
+ }}
139
+ .status-text {{
140
+ color: #9ca3af;
141
+ }}
142
+
143
+ /* Card look */
144
+ .card {{
145
+ background: radial-gradient(circle at top left, rgba(56,189,248,0.12), rgba(15,23,42,0.98));
146
+ border-radius: 0.9rem;
147
+ border: 1px solid rgba(56,189,248,0.45);
148
+ padding: 1rem 1.2rem;
149
+ box-shadow: 0 24px 65px rgba(15,23,42,0.95);
150
+ }}
151
+ .card-soft {{
152
+ background: #020617;
153
+ border-radius: 0.9rem;
154
+ border: 1px solid #1f2937;
155
+ padding: 1rem 1.2rem;
156
+ }}
157
+
158
+ /* Tabs */
159
+ .stTabs [data-baseweb="tab-list"] {{
160
+ gap: 0.5rem;
161
+ }}
162
+ .stTabs [data-baseweb="tab"] {{
163
+ padding: 0.45rem 0.9rem;
164
+ border-radius: 999px;
165
+ background: #020617;
166
+ color: #e5e7eb;
167
+ }}
168
+ .stTabs [aria-selected="true"] {{
169
+ background: #1e293b !important;
170
+ border: 1px solid #38bdf8 !important;
171
+ }}
172
+
173
+ /* Metric tweaks */
174
+ div[data-testid="stMetric"] {{
175
+ background: #020617;
176
+ border-radius: 0.9rem;
177
+ border: 1px solid #1f2937;
178
+ padding: 0.6rem 0.6rem 0.2rem 0.6rem;
179
+ }}
180
+
181
+ /* Text areas */
182
+ textarea{{background: #020617 !important; color: #e5e7eb !important;}}
183
+
184
+ /* Pipeline timeline */
185
+ .pipeline-timeline {{
186
+ display: flex;
187
+ align-items: center;
188
+ gap: 0.45rem;
189
+ margin-top: 0.4rem;
190
+ padding: 0.45rem 0.6rem 0.2rem;
191
+ }}
192
+ .step {{
193
+ display: flex;
194
+ flex-direction: column;
195
+ align-items: center;
196
+ gap: 0.15rem;
197
+ font-size: 0.78rem;
198
+ }}
199
+ .step-dot {{
200
+ width: 12px;
201
+ height: 12px;
202
+ border-radius: 999px;
203
+ border: 2px solid #4b5563;
204
+ background: #020617;
205
+ }}
206
+ .step-label {{
207
+ color: #e5e7eb;
208
+ }}
209
+ .step-connector {{
210
+ flex: 1;
211
+ height: 2px;
212
+ background: linear-gradient(90deg, #1f2937, #4b5563, #1f2937);
213
+ opacity: 0.7;
214
+ }}
215
+ .step-done .step-dot {{
216
+ background: #22c55e;
217
+ border-color: #22c55e;
218
+ }}
219
+ .step-current .step-dot {{
220
+ background: #38bdf8;
221
+ border-color: #38bdf8;
222
+ box-shadow: 0 0 0 4px rgba(56,189,248,0.25);
223
+ }}
224
+ .step-current .step-label {{
225
+ color: #e5e7eb;
226
+ font-weight: 600;
227
+ }}
228
+ .step-pending .step-dot {{
229
+ background: #020617;
230
+ border-color: #4b5563;
231
+ }}
232
+ .step-pending .step-label {{
233
+ color: #9ca3af;
234
+ }}
235
+
236
+ </style>
237
+ """,
238
+ unsafe_allow_html=True,
239
+ )
240
+
241
+ # Hero header
242
+ st.markdown(
243
+ """
244
+ <div style="display:flex;justify-content:space-between;align-items:flex-start;gap:1.5rem;margin-bottom:0.8rem;">
245
+ <div>
246
+ <div style="font-size:0.78rem;font-weight:600;color:#38bdf8;letter-spacing:0.18em;text-transform:uppercase;margin-bottom:0.45rem;">
247
+ AI BUSINESS PROCESS AUTOMATION
248
+ </div>
249
+ <div style="font-size:1.7rem;font-weight:650;color:#f9fafb;margin-bottom:0.35rem;">
250
+ Invoice & Vendor Workflow Orchestration
251
+ </div>
252
+ <div style="font-size:0.9rem;color:#9ca3af;max-width:36rem;">
253
+ Multi-agent pipeline powered by LangGraph, Groq, Tavily, and Pinecone to extract, validate,
254
+ and route business invoices like an enterprise workflow engine.
255
+ </div>
256
+ </div>
257
+ <div style="text-align:right;font-size:0.78rem;color:#9ca3af;">
258
+ <div style="font-weight:600;color:#e5e7eb;margin-bottom:0.15rem;">Stack</div>
259
+ <div>LangGraph · LangChain</div>
260
+ <div>Groq llama-3.3-70b-versatile</div>
261
+ <div>Tavily · Pinecone · Streamlit</div>
262
+ </div>
263
+ </div>
264
+ """,
265
+ unsafe_allow_html=True,
266
+ )
267
+
268
+ with st.sidebar:
269
+ st.markdown("### Configuration")
270
+ st.caption("Keys are kept in memory for this session only.")
271
+
272
+ groq_key = st.text_input("GROQ_API_KEY", type="password", help="Required to run agents.")
273
+ tavily_key = st.text_input("TAVILY_API_KEY", type="password", help="Optional (vendor verification).")
274
+ pinecone_key = st.text_input("PINECONE_API_KEY", type="password", help="Optional (policy RAG).")
275
+
276
+ if groq_key.strip():
277
+ os.environ["GROQ_API_KEY"] = groq_key.strip()
278
+ if tavily_key.strip():
279
+ os.environ["TAVILY_API_KEY"] = tavily_key.strip()
280
+ if pinecone_key.strip():
281
+ os.environ["PINECONE_API_KEY"] = pinecone_key.strip()
282
+
283
+ st.markdown("---")
284
+ st.markdown("### Input")
285
+ upload = st.file_uploader("Upload email/invoice text (.txt)", type=["txt"])
286
+ uploaded_text = _read_uploaded_text(upload)
287
+
288
+ default_example = """Subject: Invoice INV-10492 - ACME Supplies
289
+
290
+ Hello Accounts Payable,
291
+
292
+ Please find below invoice details:
293
+ - Vendor: ACME Supplies Ltd
294
+ - Invoice Number: INV-10492
295
+ - Invoice Date: 2026-03-10
296
+ - Due Date: 2026-04-09
297
+ - Currency: USD
298
+ - Subtotal: 1200.00
299
+ - Tax: 96.00
300
+ - Total: 1296.00
301
+
302
+ Line items:
303
+ 1) Office chairs (qty 4) @ 300.00 = 1200.00
304
+
305
+ Regards,
306
+ ACME Billing
307
+ billing@acmesupplies.example
308
+ """
309
+
310
+ email_content = st.text_area(
311
+ "Paste email / invoice content",
312
+ value=uploaded_text or default_example,
313
+ height=280,
314
+ )
315
+ run_clicked = st.button("Run automation workflow", type="primary", use_container_width=True)
316
+
317
+ if run_clicked:
318
+ if not email_content.strip():
319
+ st.error("Please provide invoice/email text.")
320
+ st.stop()
321
+
322
+ with st.spinner("Running multi-agent workflow..."):
323
+ try:
324
+ result = run_workflow(email_content=email_content)
325
+ st.session_state["last_result"] = result
326
+ except Exception as e:
327
+ logger.exception("Workflow failed")
328
+ st.error(f"Workflow failed: {e}")
329
+ st.stop()
330
+
331
+ result: Dict[str, Any] = st.session_state.get("last_result") or {}
332
+
333
+ extracted = result.get("extracted_data") or {}
334
+ vendor_ver = result.get("vendor_verification") or {}
335
+ validation = result.get("validation_status") or {}
336
+ decision = result.get("decision") or {}
337
+ erp = result.get("erp_update_status") or {}
338
+
339
+ # Top decision summary card
340
+ with st.container():
341
+ col_a, col_b, col_c = st.columns([1.2, 1, 1], gap="medium")
342
+ with col_a:
343
+ st.markdown("#### Decision overview")
344
+ with st.container():
345
+ st.metric("Final decision", decision.get("decision", "unknown"))
346
+ reason = decision.get("reason") or "Run the workflow to see a decision."
347
+ st.markdown(
348
+ f"<div style='font-size:0.9rem;color:#cbd5f5;margin-top:0.35rem;'>{reason}</div>",
349
+ unsafe_allow_html=True,
350
+ )
351
+ with col_b:
352
+ st.markdown("#### Validation")
353
+ _status_badge("Validation", validation.get("status", "unknown"))
354
+ st.caption(f"Recommendation: {validation.get('recommendation', 'n/a')}")
355
+ with col_c:
356
+ st.markdown("#### Vendor risk")
357
+ _status_badge("Vendor", vendor_ver.get("status", "unknown"))
358
+ st.caption(vendor_ver.get("reason", vendor_ver.get("evidence_summary", "No vendor assessment yet.")))
359
+
360
+ # Visual pipeline timeline
361
+ _render_pipeline_timeline(extracted, vendor_ver, validation, decision, erp)
362
+
363
+ st.markdown("") # spacer
364
+
365
+ # Main content tabs
366
+ tabs = st.tabs(["🧠 Agents", "📄 Report", "📊 Logs & JSON"])
367
+
368
+ with tabs[0]:
369
+ st.markdown("### Agent pipeline outputs")
370
+
371
+ col1, col2 = st.columns([0.6, 0.4], gap="large")
372
+ with col1:
373
+ st.markdown("##### Extraction & Vendor")
374
+ with st.expander("1) Extraction Agent", expanded=True):
375
+ _status_badge("Extraction", (extracted.get("extraction_confidence") or "unknown"))
376
+ st.json(extracted)
377
+
378
+ with st.expander("2) Vendor Verification Agent", expanded=True):
379
+ _status_badge("Vendor verification", (vendor_ver.get("status") or "unknown"))
380
+ st.json(vendor_ver)
381
+
382
+ with col2:
383
+ st.markdown("##### Validation, Decision & ERP")
384
+ with st.expander("3) Validation Agent", expanded=True):
385
+ _status_badge("Validation", (validation.get("status") or "unknown"))
386
+ st.json(validation)
387
+
388
+ with st.expander("4) Decision Agent", expanded=True):
389
+ _status_badge("Decision", (decision.get("decision") or "unknown"))
390
+ st.json(decision)
391
+
392
+ with st.expander("5) ERP Update Tool", expanded=True):
393
+ _status_badge("ERP update", (erp.get("status") or "unknown"))
394
+ st.json(erp)
395
+
396
+ with tabs[1]:
397
+ st.markdown("### Generated business report")
398
+ report = result.get("report") or ""
399
+ if report:
400
+ st.text_area("Report", value=report, height=420)
401
+ else:
402
+ st.info("Run the workflow to generate a report.")
403
+
404
+ with tabs[2]:
405
+ st.markdown("### Agent logs (audit trail)")
406
+ logs = result.get("agent_logs") or []
407
+ if logs:
408
+ st.dataframe(logs, use_container_width=True, hide_index=True)
409
+ with st.expander("Raw result JSON"):
410
+ st.code(json.dumps(result, indent=2, ensure_ascii=False))
411
+ else:
412
+ st.caption("Logs will appear after running the workflow.")
413
+
utils.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ from pathlib import Path
7
+ from datetime import datetime, timezone
8
+ from typing import Any, Dict, Optional, Tuple
9
+
10
+ from dotenv import load_dotenv
11
+
12
+
13
+ def load_environment() -> None:
14
+ """Load environment variables from a local .env if present."""
15
+
16
+ # Prefer the project-local .env at ai_business_automation_agent/.env.
17
+ # This avoids surprises when Streamlit's working directory differs.
18
+ project_env = Path(__file__).resolve().parent / ".env"
19
+ # Use override=True to ensure .env values replace empty process env vars.
20
+ if project_env.exists():
21
+ load_dotenv(dotenv_path=project_env, override=True)
22
+ else:
23
+ load_dotenv(override=True)
24
+
25
+
26
+ def setup_logging() -> None:
27
+ level = os.getenv("LOG_LEVEL", "INFO").upper().strip()
28
+ logging.basicConfig(
29
+ level=level,
30
+ format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
31
+ )
32
+
33
+
34
+ def utc_now_iso() -> str:
35
+ return datetime.now(timezone.utc).isoformat()
36
+
37
+
38
+ def _extract_first_json_object(text: str) -> Optional[str]:
39
+ """Best-effort extraction of first top-level JSON object from text."""
40
+
41
+ start = text.find("{")
42
+ if start == -1:
43
+ return None
44
+ depth = 0
45
+ for i in range(start, len(text)):
46
+ ch = text[i]
47
+ if ch == "{":
48
+ depth += 1
49
+ elif ch == "}":
50
+ depth -= 1
51
+ if depth == 0:
52
+ return text[start : i + 1]
53
+ return None
54
+
55
+
56
+ def parse_llm_json(text: str) -> Tuple[Dict[str, Any], Optional[str]]:
57
+ """
58
+ Parse strict JSON from an LLM response.
59
+
60
+ Returns (obj, error). If parsing fails, obj will be {}, error will be a message.
61
+ """
62
+
63
+ raw = text.strip()
64
+ try:
65
+ return json.loads(raw), None
66
+ except Exception:
67
+ candidate = _extract_first_json_object(raw)
68
+ if not candidate:
69
+ return {}, "No JSON object found in model output."
70
+ try:
71
+ return json.loads(candidate), None
72
+ except Exception as e:
73
+ return {}, f"Failed to parse JSON: {e}"
74
+
75
+
76
+ def append_agent_log(state: Dict[str, Any], *, agent: str, event: str, payload: Any) -> Dict[str, Any]:
77
+ logs = list(state.get("agent_logs") or [])
78
+ logs.append(
79
+ {
80
+ "ts": utc_now_iso(),
81
+ "agent": agent,
82
+ "event": event,
83
+ "payload": payload,
84
+ }
85
+ )
86
+ return {"agent_logs": logs}
87
+
vectorstore/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """Vector store integrations (Pinecone)."""
2
+
vectorstore/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (212 Bytes). View file
 
vectorstore/__pycache__/pinecone_client.cpython-311.pyc ADDED
Binary file (8.21 kB). View file
 
vectorstore/pinecone_client.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from ai_business_automation_agent.embeddings.embedding_model import embed_texts
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class PineconeVectorStore:
13
+ """
14
+ Minimal Pinecone wrapper for policy/compliance retrieval.
15
+
16
+ Supports both:
17
+ - pinecone-client (legacy) import style: import pinecone
18
+ - newer pinecone SDK import style: from pinecone import Pinecone
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ *,
24
+ api_key: Optional[str] = None,
25
+ index_name: Optional[str] = None,
26
+ cloud: Optional[str] = None,
27
+ region: Optional[str] = None,
28
+ namespace: str = "policies",
29
+ ) -> None:
30
+ self.api_key = api_key or os.getenv("PINECONE_API_KEY", "")
31
+ self.index_name = index_name or os.getenv("PINECONE_INDEX_NAME", "ai-bpa-agent")
32
+ self.cloud = cloud or os.getenv("PINECONE_CLOUD", "aws")
33
+ self.region = region or os.getenv("PINECONE_REGION", "us-east-1")
34
+ self.namespace = namespace
35
+
36
+ if not self.api_key:
37
+ raise ValueError("Missing PINECONE_API_KEY.")
38
+
39
+ self._index = self._init_index()
40
+
41
+ def _init_index(self):
42
+ # Newer SDK
43
+ try:
44
+ from pinecone import Pinecone # type: ignore
45
+
46
+ pc = Pinecone(api_key=self.api_key)
47
+ # list_indexes shape varies by pinecone SDK version
48
+ raw = pc.list_indexes() # type: ignore[call-arg]
49
+ existing: set[str] = set()
50
+ if isinstance(raw, dict):
51
+ for i in raw.get("indexes", []) or []:
52
+ if isinstance(i, dict) and i.get("name"):
53
+ existing.add(str(i["name"]))
54
+ elif isinstance(raw, list):
55
+ for i in raw:
56
+ if isinstance(i, str):
57
+ existing.add(i)
58
+ else:
59
+ name = getattr(i, "name", None)
60
+ if name:
61
+ existing.add(str(name))
62
+ else:
63
+ # Some versions return an object with `.indexes`
64
+ indexes = getattr(raw, "indexes", None)
65
+ if isinstance(indexes, list):
66
+ for i in indexes:
67
+ if isinstance(i, dict) and i.get("name"):
68
+ existing.add(str(i["name"]))
69
+ else:
70
+ name = getattr(i, "name", None)
71
+ if name:
72
+ existing.add(str(name))
73
+ if self.index_name not in existing:
74
+ logger.info("Creating Pinecone index '%s' (cloud=%s region=%s)", self.index_name, self.cloud, self.region)
75
+ pc.create_index(
76
+ name=self.index_name,
77
+ dimension=384,
78
+ metric="cosine",
79
+ spec={"serverless": {"cloud": self.cloud, "region": self.region}},
80
+ )
81
+ return pc.Index(self.index_name)
82
+ except Exception:
83
+ pass
84
+
85
+ # Legacy pinecone-client
86
+ import pinecone # type: ignore
87
+
88
+ pinecone.init(api_key=self.api_key, environment=os.getenv("PINECONE_ENVIRONMENT", ""))
89
+ if self.index_name not in pinecone.list_indexes():
90
+ logger.info("Creating Pinecone index '%s' (legacy)", self.index_name)
91
+ pinecone.create_index(self.index_name, dimension=384, metric="cosine")
92
+ return pinecone.Index(self.index_name)
93
+
94
+ def seed_default_policies(self) -> None:
95
+ """
96
+ Idempotently seed a small set of example policy/rule documents.
97
+ In production, replace this with your real corp policies and compliance corpus.
98
+ """
99
+
100
+ docs = [
101
+ (
102
+ "policy-1",
103
+ "Invoices must include invoice number, invoice date, vendor name, and total amount.",
104
+ {"type": "policy", "topic": "required_fields"},
105
+ ),
106
+ (
107
+ "policy-2",
108
+ "If vendor is flagged or unknown, route invoice to manual review or reject based on risk severity.",
109
+ {"type": "policy", "topic": "vendor_risk"},
110
+ ),
111
+ (
112
+ "rule-1",
113
+ "Reject invoices where subtotal + tax differs from total by more than 0.02 (rounding tolerance).",
114
+ {"type": "rule", "topic": "totals_consistency"},
115
+ ),
116
+ (
117
+ "rule-2",
118
+ "For high-severity compliance issues (e.g., missing total, missing invoice number), reject the invoice.",
119
+ {"type": "rule", "topic": "compliance"},
120
+ ),
121
+ ]
122
+
123
+ texts = [d[1] for d in docs]
124
+ vectors = embed_texts(texts)
125
+ upserts = []
126
+ for (doc_id, text, meta), vec in zip(docs, vectors):
127
+ upserts.append({"id": doc_id, "values": vec, "metadata": {"text": text, **meta}})
128
+
129
+ self._index.upsert(vectors=upserts, namespace=self.namespace)
130
+
131
+ def retrieve(self, query: str, *, top_k: int = 5) -> List[Dict[str, Any]]:
132
+ vec = embed_texts([query])[0]
133
+ res = self._index.query(vector=vec, top_k=top_k, include_metadata=True, namespace=self.namespace)
134
+ matches = res.get("matches", []) if isinstance(res, dict) else getattr(res, "matches", [])
135
+ out: List[Dict[str, Any]] = []
136
+ for m in matches:
137
+ md = m.get("metadata", {}) if isinstance(m, dict) else getattr(m, "metadata", {}) # type: ignore
138
+ score = m.get("score") if isinstance(m, dict) else getattr(m, "score", None) # type: ignore
139
+ out.append({"score": score, "text": md.get("text"), "metadata": md})
140
+ return out
141
+
workflow/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """LangGraph workflow components."""
2
+
workflow/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (202 Bytes). View file
 
workflow/__pycache__/graph_builder.cpython-311.pyc ADDED
Binary file (5.87 kB). View file
 
workflow/__pycache__/state_schema.cpython-311.pyc ADDED
Binary file (1.12 kB). View file