itsalissonsilva commited on
Commit
83202f8
·
verified ·
1 Parent(s): f3f9999

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +72 -34
src/streamlit_app.py CHANGED
@@ -1,40 +1,78 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
4
  import streamlit as st
 
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  """
7
- # Welcome to Streamlit!
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import json
4
+ import openai
5
+ import os
6
 
7
+ openai.api_key = os.getenv("OPENAI_API_KEY")
8
+
9
+ PROMPT_INSTRUCTIONS_TEXT = """
10
+ You are a forensic auditor AI with deep domain expertise and a sharp eye for irregularities. Your job is to identify **anomalies** in financial transaction data.
11
+ Approach the task in four logical steps:
12
+ 1. Understand the dataset: infer normal ranges, typical patterns, and common behaviors.
13
+ 2. Spot transactions that deviate significantly from these norms.
14
+ 3. Evaluate if the deviation is meaningful enough to be flagged.
15
+ 4. Provide a clear explanation for each anomaly you identify.
16
+ You must detect:
17
+ - **Numerical outliers**: suspicious values, e.g., round numbers in contexts where rounding is unusual, or extremely high/low values.
18
+ - **Value outliers**: transactions significantly outside typical ranges, such as those 5x higher than the median in context.
19
+ - **Duplicates**: repeated transactions (same amount, date, vendor, department, etc.).
20
+ - **Rare combinations**: unusual or infrequent category pairings (e.g., Department + Vendor + Category).
21
+ - **Temporal anomalies**: large transactions on weekends, holidays, or unusual hours.
22
+ - **Categorical inconsistencies**: transactions where the categorical labels are at odds with the amount or vendor characteristics.
23
+ Emphasize *contextual anomalies* — those that conventional rule-based systems might overlook.
24
+ You will also be provided with some **known false positives**. Learn from these and avoid flagging similar patterns.
25
+ Output your findings in this JSON format:
26
+ {
27
+ "anomalies": [
28
+ {
29
+ "transaction_identifier": {"Transaction_No": "...", "Payment_Date": "..."},
30
+ "amount": 1234.56,
31
+ "anomaly_type": "Value Outlier",
32
+ "explanation": "Amount is significantly higher than historical average for this department and vendor",
33
+ "confidence": 0.88
34
+ }
35
+ ]
36
+ }
37
  """
 
38
 
39
+ def query_openai(prompt: str) -> dict:
40
+ response = openai.ChatCompletion.create(
41
+ model="gpt-4-turbo",
42
+ messages=[
43
+ {"role": "system", "content": "You analyze financial transactions for anomalies."},
44
+ {"role": "user", "content": prompt}
45
+ ],
46
+ temperature=0.2,
47
+ max_tokens=2048
48
+ )
49
+ try:
50
+ return json.loads(response.choices[0].message["content"])
51
+ except json.JSONDecodeError:
52
+ return {"error": "Failed to parse JSON from LLM response."}
53
 
54
+ st.set_page_config(page_title="LLM Financial Anomaly Detector", layout="wide")
55
+ st.title("LLM-Powered Financial Anomaly Detector")
56
+ st.write("Upload a CSV file containing transaction data. The model will analyze and return possible anomalies.")
57
+
58
+ uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
59
+
60
+ if uploaded_file:
61
+ df = pd.read_csv(uploaded_file)
62
+ st.subheader("Preview of Uploaded Data")
63
+ st.dataframe(df.head(20), use_container_width=True)
64
+
65
+ if st.button("Run Anomaly Detection"):
66
+ with st.spinner("Analyzing transactions..."):
67
+ df_json = df.to_dict(orient="records")
68
+ full_prompt = PROMPT_INSTRUCTIONS_TEXT + "\n\nDATA:\n" + json.dumps(df_json, indent=2)
69
+ result = query_openai(full_prompt)
70
 
71
+ if "anomalies" in result:
72
+ st.success(f"Found {len(result['anomalies'])} anomalies.")
73
+ anomalies_df = pd.json_normalize(result["anomalies"])
74
+ st.dataframe(anomalies_df, use_container_width=True)
75
+ else:
76
+ st.warning("No anomalies found or response could not be parsed.")
77
+ else:
78
+ st.info("Please upload a CSV file to begin.")