AP_AGENT

Sleeping

App Files Files Community

Seth0330 commited on Jun 17, 2025

Commit

dd32a84

verified ·

1 Parent(s): 50aaed9

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -78

app.py CHANGED Viewed

@@ -5,23 +5,17 @@ import re
 import os
 import time
 import mimetypes
 st.set_page_config(page_title="PDF Tools", layout="wide")
-# -------- LLM Model Setup (same as before) --------
 MODELS = {
-    "DeepSeek v3": {
-        "api_url": "https://api.deepseek.com/v1/chat/completions",
-        "model": "deepseek-chat",
-        "key_env": "DEEPSEEK_API_KEY",
-        "response_format": {"type": "json_object"},
-    },
-    "DeepSeek R1": {
-        "api_url": "https://api.deepseek.com/v1/chat/completions",
-        "model": "deepseek-reasoner",
-        "key_env": "DEEPSEEK_API_KEY",
-        "response_format": None,
-    },
     "OpenAI GPT-4.1": {
         "api_url": "https://api.openai.com/v1/chat/completions",
         "model": "gpt-4-1106-preview",
@@ -29,16 +23,6 @@ MODELS = {
         "response_format": None,
         "extra_headers": {},
     },
-    "Mistral Small": {
-        "api_url": "https://openrouter.ai/api/v1/chat/completions",
-        "model": "mistralai/ministral-8b",
-        "key_env": "OPENROUTER_API_KEY",
-        "response_format": {"type": "json_object"},
-        "extra_headers": {
-            "HTTP-Referer": "https://huggingface.co",
-            "X-Title": "Invoice Extractor",
-        },
-    },
 }
 def get_api_key(model_choice):
@@ -68,10 +52,7 @@ def query_llm(model_choice, prompt):
         with st.spinner(f"🔍 Querying {model_choice}..."):
             r = requests.post(cfg["api_url"], headers=headers, json=payload, timeout=90)
         if r.status_code != 200:
-            if "No instances available" in r.text or r.status_code == 503:
-                st.error(f"{model_choice} is currently unavailable. Please try again later or select another model.")
-            else:
-                st.error(f"🚨 API Error {r.status_code}: {r.text}")
             return None
         content = r.json()["choices"][0]["message"]["content"]
         st.session_state.last_api = content
@@ -201,18 +182,6 @@ def extract_invoice_info(model_choice, text):
     data = clean_json_response(raw)
     if not data:
         return None
-    if model_choice.startswith("DeepSeek"):
-        header = {k: v for k, v in data.items() if k != "line_items"}
-        items = data.get("line_items", [])
-        if not isinstance(items, list):
-            items = []
-        for itm in items:
-            if not isinstance(itm, dict):
-                continue
-            for k in ("description","quantity","unit_price","total_price"):
-                itm.setdefault(k, None)
-        return {"invoice_header": header, "line_items": items}
     hdr = data.get("invoice_header", {})
     if not hdr and any(k in data for k in ("invoice_number","supplier_name","customer_name")):
         hdr = data
@@ -230,32 +199,27 @@ def extract_invoice_info(model_choice, text):
             itm.setdefault(k, None)
     return {"invoice_header": hdr, "line_items": items}
-# --------- File type/content-type detection ---------
 def get_content_type(filename):
     mime, _ = mimetypes.guess_type(filename)
     ext = filename.lower().split('.')[-1]
-    # Special case for PDF (Unstract quirk)
     if ext == "pdf":
         return "text/plain"
     if mime is None:
         return "application/octet-stream"
     return mime
-# --------- UNSTRACT API Multi-file PDF/Doc/Image-to-Text ---------
 UNSTRACT_BASE = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
-UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY")  # Set this in your environment!
 def extract_text_from_unstract(uploaded_file):
     filename = getattr(uploaded_file, "name", "uploaded_file")
     file_bytes = uploaded_file.read()
     content_type = get_content_type(filename)
     headers = {
         "unstract-key": UNSTRACT_API_KEY,
         "Content-Type": content_type,
     }
     url = f"{UNSTRACT_BASE}/whisper"
     with st.spinner("Uploading and processing document with Unstract..."):
         r = requests.post(url, headers=headers, data=file_bytes)
         if r.status_code != 202:
@@ -265,9 +229,8 @@ def extract_text_from_unstract(uploaded_file):
         if not whisper_hash:
             st.error("Unstract: No whisper_hash received.")
             return None
     status_url = f"{UNSTRACT_BASE}/whisper-status?whisper_hash={whisper_hash}"
-    for i in range(30):  # Wait up to 60s (2s x 30)
         status_r = requests.get(status_url, headers={"unstract-key": UNSTRACT_API_KEY})
         if status_r.status_code != 200:
             st.error(f"Unstract: Error checking status: {status_r.status_code} - {status_r.text}")
@@ -280,7 +243,6 @@ def extract_text_from_unstract(uploaded_file):
     else:
         st.error("Unstract: Timeout waiting for OCR to finish.")
         return None
     retrieve_url = f"{UNSTRACT_BASE}/whisper-retrieve?whisper_hash={whisper_hash}&text_only=true"
     r = requests.get(retrieve_url, headers={"unstract-key": UNSTRACT_API_KEY})
     if r.status_code != 200:
@@ -292,11 +254,23 @@ def extract_text_from_unstract(uploaded_file):
     except Exception:
         return r.text
-# --------- INVOICE EXTRACTOR UI ---------
 st.title("Invoice/Document Extractor")
 mdl = st.selectbox("Model", list(MODELS.keys()), key="extract_model")
 inv_file = st.file_uploader(
-    "Invoice or Document File",
     type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"]
 )
 extracted_info = None
@@ -314,34 +288,75 @@ if st.button("Extract") and inv_file:
             st.table(extracted_info["line_items"])
             st.session_state["last_extracted_info"] = extracted_info  # store in session
-# If we've already extracted info, or in this session, show further controls
 extracted_info = extracted_info or st.session_state.get("last_extracted_info", None)
-if extracted_info:
-    st.markdown("---")
-    st.subheader("📝 Fine-tune Extracted Data with Your Own Prompt")
-    user_prompt = st.text_area(
-        "Enter your prompt for further processing or transformation (the extracted JSON will be available as context).",
-        height=120,
-        key="custom_prompt"
     )
-    model_2 = st.selectbox("Model for Fine-Tuning Prompt", list(MODELS.keys()), key="refine_model")
-    if st.button("Run Custom Prompt"):
-        refine_input = (
-            "Here is an extracted invoice in JSON format:\n"
-            f"{json.dumps(extracted_info, indent=2)}\n"
-            "Follow this instruction and return the result as a JSON object only (no explanation):\n"
-            f"{user_prompt}"
-        )
-        result = query_llm(model_2, refine_input)
-        refined_json = clean_json_response(result)
-        st.subheader("Fine-Tuned Output")
-        if refined_json:
-            st.json(refined_json)
-        else:
-            st.error("Could not parse a valid JSON output from the model.")
-    st.caption("The prompt is run on the above-extracted fields as JSON. Try instructions like: 'Add a new field for net_amount (amount minus tax) to each line item', or 'Summarize the total quantity ordered', etc.")
-if "last_api" in st.session_state:
-    with st.expander("Debug"):
-        st.code(st.session_state.last_api)
-        st.code(st.session_state.last_raw)

 import os
 import time
 import mimetypes
+import pandas as pd
+# NEW: LangGraph & LangChain imports
+from langchain_community.chat_models import ChatOpenAI
+from langgraph.graph import StateGraph, END
+from langgraph.prebuilt import create_react_agent
 st.set_page_config(page_title="PDF Tools", layout="wide")
+# -------- LLM Model Setup (your unchanged code) --------
 MODELS = {
     "OpenAI GPT-4.1": {
         "api_url": "https://api.openai.com/v1/chat/completions",
         "model": "gpt-4-1106-preview",
         "response_format": None,
         "extra_headers": {},
     },
 }
 def get_api_key(model_choice):
         with st.spinner(f"🔍 Querying {model_choice}..."):
             r = requests.post(cfg["api_url"], headers=headers, json=payload, timeout=90)
         if r.status_code != 200:
+            st.error(f"🚨 API Error {r.status_code}: {r.text}")
             return None
         content = r.json()["choices"][0]["message"]["content"]
         st.session_state.last_api = content
     data = clean_json_response(raw)
     if not data:
         return None
     hdr = data.get("invoice_header", {})
     if not hdr and any(k in data for k in ("invoice_number","supplier_name","customer_name")):
         hdr = data
             itm.setdefault(k, None)
     return {"invoice_header": hdr, "line_items": items}
 def get_content_type(filename):
     mime, _ = mimetypes.guess_type(filename)
     ext = filename.lower().split('.')[-1]
     if ext == "pdf":
         return "text/plain"
     if mime is None:
         return "application/octet-stream"
     return mime
 UNSTRACT_BASE = "https://llmwhisperer-api.us-central.unstract.com/api/v2"
+UNSTRACT_API_KEY = os.getenv("UNSTRACT_API_KEY")
 def extract_text_from_unstract(uploaded_file):
     filename = getattr(uploaded_file, "name", "uploaded_file")
     file_bytes = uploaded_file.read()
     content_type = get_content_type(filename)
     headers = {
         "unstract-key": UNSTRACT_API_KEY,
         "Content-Type": content_type,
     }
     url = f"{UNSTRACT_BASE}/whisper"
     with st.spinner("Uploading and processing document with Unstract..."):
         r = requests.post(url, headers=headers, data=file_bytes)
         if r.status_code != 202:
         if not whisper_hash:
             st.error("Unstract: No whisper_hash received.")
             return None
     status_url = f"{UNSTRACT_BASE}/whisper-status?whisper_hash={whisper_hash}"
+    for i in range(30):
         status_r = requests.get(status_url, headers={"unstract-key": UNSTRACT_API_KEY})
         if status_r.status_code != 200:
             st.error(f"Unstract: Error checking status: {status_r.status_code} - {status_r.text}")
     else:
         st.error("Unstract: Timeout waiting for OCR to finish.")
         return None
     retrieve_url = f"{UNSTRACT_BASE}/whisper-retrieve?whisper_hash={whisper_hash}&text_only=true"
     r = requests.get(retrieve_url, headers={"unstract-key": UNSTRACT_API_KEY})
     if r.status_code != 200:
     except Exception:
         return r.text
+# --------- NEW: UPLOAD PO CSV ---------
+st.sidebar.header("Step 1: Upload Active Purchase Orders (POs)")
+po_file = st.sidebar.file_uploader(
+    "Upload POs CSV (must include PO number, Supplier, Items, etc.)",
+    type=["csv"],
+    key="po_csv"
+)
+po_df = None
+if po_file:
+    po_df = pd.read_csv(po_file)
+    st.sidebar.success(f"Loaded {len(po_df)} Purchase Orders.")
+    st.sidebar.dataframe(po_df.head())
 st.title("Invoice/Document Extractor")
 mdl = st.selectbox("Model", list(MODELS.keys()), key="extract_model")
 inv_file = st.file_uploader(
+    "Step 2: Upload Invoice or Document File",
     type=["pdf", "docx", "xlsx", "xls", "png", "jpg", "jpeg", "tiff"]
 )
 extracted_info = None
             st.table(extracted_info["line_items"])
             st.session_state["last_extracted_info"] = extracted_info  # store in session
 extracted_info = extracted_info or st.session_state.get("last_extracted_info", None)
+# -------------------------------
+# LANGGRAPH ReAct DECISION AGENT
+# -------------------------------
+def po_match_tool(query: str, context: dict):
+    invoice = context['invoice']
+    po_df = context['po_df']
+    inv_hdr = invoice["invoice_header"]
+    inv_po_number = inv_hdr.get("purchase_order_number") or inv_hdr.get("order_number") or inv_hdr.get("our_order_number")
+    inv_supplier = inv_hdr.get("supplier_name")
+    explanation = ""
+    matched_po = None
+    if inv_po_number:
+        for idx, row in po_df.iterrows():
+            if (
+                str(row.get("PO Number", "")).lower().replace(" ", "") == str(inv_po_number).lower().replace(" ", "")
+            ):
+                matched_po = row
+                explanation += f"Matched on PO Number: {inv_po_number}\n"
+                break
+    if matched_po is None and inv_supplier:
+        potential_matches = po_df[po_df["Supplier Name"].str.lower().str.strip() == inv_supplier.lower().strip()]
+        if not potential_matches.empty:
+            matched_po = potential_matches.iloc[0]
+            explanation += f"Matched on Supplier Name: {inv_supplier}\n"
+    if matched_po is not None:
+        return f"PO matched: {matched_po.to_dict()}"
+    return "No matching PO found."
+def build_decision_agent():
+    openai_api_key = os.getenv("OPENAI_API_KEY")
+    llm = ChatOpenAI(
+        openai_api_key=openai_api_key,
+        model="gpt-4-1106-preview",
+        temperature=0,
+        streaming=False,
     )
+    tools = [
+        {
+            "name": "po_match_tool",
+            "description": "Looks up a PO for a given invoice context.",
+            "func": po_match_tool,
+        }
+    ]
+    agent = create_react_agent(llm, tools)
+    graph_builder = StateGraph(agent)
+    def finish_decision(state, context):
+        return END, state
+    graph_builder.add_node("finish", finish_decision)
+    graph_builder.set_entry_point(agent)
+    graph_builder.add_edge(agent, END)
+    return graph_builder.compile()
+if extracted_info and po_df is not None:
+    if st.button("Make a decision (AI Agent)"):
+        with st.spinner("Reasoning and making a decision with LangGraph agent..."):
+            agent_graph = build_decision_agent()
+            task = (
+                "Here is an invoice JSON and a list of active POs in context. "
+                "Step by step, reason whether the invoice matches an active PO and can be approved. "
+                "If there is a match, state the matched PO, otherwise explain why not. "
+                "Give a clear final decision: APPROVED or REJECTED."
+            )
+            context = {
+                "invoice": extracted_info,
+                "po_df": po_df,
+            }
+            out = agent_graph.invoke(task, context=context)
+        st.subheader("AI Decision")
+        st.write(out)