Spaces:

itsalissonsilva
/

test

Sleeping

App Files Files Community

itsalissonsilva commited on Jun 9, 2025

Commit

83202f8

verified ·

1 Parent(s): f3f9999

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +72 -34

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,78 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
 """
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import pandas as pd
+import json
+import openai
+import os
+openai.api_key = os.getenv("OPENAI_API_KEY")
+PROMPT_INSTRUCTIONS_TEXT = """
+You are a forensic auditor AI with deep domain expertise and a sharp eye for irregularities. Your job is to identify **anomalies** in financial transaction data.
+Approach the task in four logical steps:
+1. Understand the dataset: infer normal ranges, typical patterns, and common behaviors.
+2. Spot transactions that deviate significantly from these norms.
+3. Evaluate if the deviation is meaningful enough to be flagged.
+4. Provide a clear explanation for each anomaly you identify.
+You must detect:
+- **Numerical outliers**: suspicious values, e.g., round numbers in contexts where rounding is unusual, or extremely high/low values.
+- **Value outliers**: transactions significantly outside typical ranges, such as those 5x higher than the median in context.
+- **Duplicates**: repeated transactions (same amount, date, vendor, department, etc.).
+- **Rare combinations**: unusual or infrequent category pairings (e.g., Department + Vendor + Category).
+- **Temporal anomalies**: large transactions on weekends, holidays, or unusual hours.
+- **Categorical inconsistencies**: transactions where the categorical labels are at odds with the amount or vendor characteristics.
+Emphasize *contextual anomalies* — those that conventional rule-based systems might overlook.
+You will also be provided with some **known false positives**. Learn from these and avoid flagging similar patterns.
+Output your findings in this JSON format:
+{
+  "anomalies": [
+    {
+      "transaction_identifier": {"Transaction_No": "...", "Payment_Date": "..."},
+      "amount": 1234.56,
+      "anomaly_type": "Value Outlier",
+      "explanation": "Amount is significantly higher than historical average for this department and vendor",
+      "confidence": 0.88
+    }
+  ]
+}
 """
+def query_openai(prompt: str) -> dict:
+    response = openai.ChatCompletion.create(
+        model="gpt-4-turbo",
+        messages=[
+            {"role": "system", "content": "You analyze financial transactions for anomalies."},
+            {"role": "user", "content": prompt}
+        ],
+        temperature=0.2,
+        max_tokens=2048
+    )
+    try:
+        return json.loads(response.choices[0].message["content"])
+    except json.JSONDecodeError:
+        return {"error": "Failed to parse JSON from LLM response."}
+st.set_page_config(page_title="LLM Financial Anomaly Detector", layout="wide")
+st.title("LLM-Powered Financial Anomaly Detector")
+st.write("Upload a CSV file containing transaction data. The model will analyze and return possible anomalies.")
+uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
+if uploaded_file:
+    df = pd.read_csv(uploaded_file)
+    st.subheader("Preview of Uploaded Data")
+    st.dataframe(df.head(20), use_container_width=True)
+    if st.button("Run Anomaly Detection"):
+        with st.spinner("Analyzing transactions..."):
+            df_json = df.to_dict(orient="records")
+            full_prompt = PROMPT_INSTRUCTIONS_TEXT + "\n\nDATA:\n" + json.dumps(df_json, indent=2)
+            result = query_openai(full_prompt)
+            if "anomalies" in result:
+                st.success(f"Found {len(result['anomalies'])} anomalies.")
+                anomalies_df = pd.json_normalize(result["anomalies"])
+                st.dataframe(anomalies_df, use_container_width=True)
+            else:
+                st.warning("No anomalies found or response could not be parsed.")
+else:
+    st.info("Please upload a CSV file to begin.")