Spaces:

itsalissonsilva
/

test

Sleeping

App Files Files Community

itsalissonsilva commited on Jun 10, 2025

Commit

b0330bc

verified ·

1 Parent(s): 9dd1f1b

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +44 -60

src/streamlit_app.py CHANGED Viewed

@@ -8,8 +8,8 @@ os.environ["STREAMLIT_HOME"] = "/tmp"
 import streamlit as st
 import pandas as pd
 import json
-from sklearn.ensemble import IsolationForest
 from openai import OpenAI
 # Initialize OpenAI client
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
@@ -51,94 +51,78 @@ def query_openai(prompt: str) -> dict:
             max_tokens=2048
         )
         raw_output = response.choices[0].message.content
-        print("\n🔵 RAW OUTPUT:\n", raw_output)
         json_start = raw_output.find("{")
         json_end = raw_output.rfind("}")
         if json_start != -1 and json_end != -1:
-            return json.loads(raw_output[json_start:json_end+1])
         return {"error": "Could not locate JSON structure in LLM response."}
     except json.JSONDecodeError as e:
-        return {"error": f"Failed to parse JSON: {str(e)}"}
     except Exception as e:
         return {"error": str(e)}
-# ---------------- UI HEADER ----------------
-st.set_page_config(page_title="LLM-Assisted Anomaly Detector", layout="wide")
-st.title("🧠 LLM-Assisted + 🛡️ Isolation Forest Anomaly Detector")
 st.markdown("""
-Welcome! This app combines two anomaly detection approaches:
-- 🛡️ **Isolation Forest** to flag numeric and structural outliers across the whole dataset
-- 🤖 **LLM Analysis** to detect unusual values in a **single column** (like odd formats or rare entries)
-Get started by uploading your own dataset or trying our sample one.
 """)
-# ---------------- DATA SELECTION ----------------
 df = None
-col1, col2 = st.columns(2)
-with col1:
-    use_uploaded = st.button("📁 Upload your own file")
-with col2:
-    use_sample = st.button("📊 Use sample dataset")
-if use_uploaded:
-    uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
-    if uploaded_file:
-        try:
-            df = pd.read_csv(uploaded_file)
-            st.success("File uploaded successfully.")
-        except Exception as e:
-            st.error(f"Could not read uploaded CSV. Error: {e}")
-elif use_sample:
-    sample_path = "src/df_crypto.csv"
-    try:
-        df = pd.read_csv(sample_path)
-        st.success("Sample dataset loaded from `src/df_crypto.csv`.")
-    except Exception as e:
-        st.error(f"Could not load sample dataset: {e}")
-# ---------------- MAIN ANALYSIS ----------------
 if df is not None:
-    st.subheader("🔍 Dataset Preview")
     st.dataframe(df, use_container_width=True)
-    # --- Isolation Forest ---
-    st.subheader("🛡️ Isolation Forest Results")
-    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
-    if len(numeric_cols) > 0:
-        iso_forest = IsolationForest(contamination=0.05, random_state=42)
-        df_numeric = df[numeric_cols].dropna()
-        iso_preds = iso_forest.fit_predict(df_numeric)
-        anomalies_df = df_numeric[iso_preds == -1]
-        st.write(f"Found {len(anomalies_df)} anomalies based on numerical features.")
-        st.dataframe(anomalies_df, use_container_width=True)
-    else:
-        st.warning("No numeric columns found for Isolation Forest.")
-    # --- LLM-Based Single Column Analysis ---
-    st.subheader("🤖 LLM-Based Single Column Analysis")
-    selected_column = st.selectbox("Select a column to analyze for anomalies:", df.columns)
     if st.button("Run LLM Anomaly Detection"):
-        with st.spinner("Analyzing with LLM..."):
-            values = df[selected_column].dropna().tolist()[:500]  # Trim to token budget
-            value_list_with_index = [
-                {"index": idx, "value": str(val)} for idx, val in enumerate(values)
-            ]
             prompt = PROMPT_INSTRUCTIONS_TEXT + "\n\nVALUES:\n" + json.dumps(value_list_with_index, indent=2)
             result = query_openai(prompt)
             if "anomalies" in result:
-                st.success(f"Found {len(result['anomalies'])} anomalies in column `{selected_column}`.")
                 st.dataframe(pd.json_normalize(result["anomalies"]), use_container_width=True)
             else:
-                st.warning("No anomalies found or the model response was invalid.")
                 st.subheader("Raw Model Output")
                 st.json(result)
 else:
-    st.info("Please upload a file or use the sample dataset to begin.")

 import streamlit as st
 import pandas as pd
 import json
 from openai import OpenAI
+from sklearn.ensemble import IsolationForest
 # Initialize OpenAI client
 client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
             max_tokens=2048
         )
         raw_output = response.choices[0].message.content
+        print("🔵 RAW OUTPUT:\n", raw_output)
         json_start = raw_output.find("{")
         json_end = raw_output.rfind("}")
         if json_start != -1 and json_end != -1:
+            json_str = raw_output[json_start:json_end + 1]
+            return json.loads(json_str)
         return {"error": "Could not locate JSON structure in LLM response."}
     except json.JSONDecodeError as e:
+        return {"error": f"Failed to parse JSON: {str(e)}"}
     except Exception as e:
         return {"error": str(e)}
+st.set_page_config(page_title="LLM Financial Anomaly Detector", layout="wide")
+st.title("LLM-Assisted Financial Anomaly Detector")
 st.markdown("""
+This app helps you detect unusual transactions in financial datasets. First, it applies an **Isolation Forest** model to highlight statistical anomalies.
+Then, you can choose a specific column and let an **LLM** (Large Language Model) inspect the values and report unusual entries based on format, rarity, or inconsistency.
 """)
+use_sample = st.button("Use Sample Dataset")
+uploaded_file = st.file_uploader("Or upload your own CSV file", type=["csv"])
 df = None
+if use_sample:
+    df = pd.read_csv("richer_500_crypto.csv")
+elif uploaded_file:
+    df = pd.read_csv(uploaded_file)
 if df is not None:
+    st.subheader("Dataset Preview")
     st.dataframe(df, use_container_width=True)
+    # Isolation Forest - simple numeric anomaly detection
+    st.subheader("Anomalies Detected with Isolation Forest")
+    try:
+        numeric_df = df.select_dtypes(include=["float64", "int64"]).dropna()
+        if not numeric_df.empty:
+            iso_model = IsolationForest(contamination=0.05, random_state=42)
+            preds = iso_model.fit_predict(numeric_df)
+            scores = iso_model.decision_function(numeric_df)
+            numeric_df["anomaly"] = preds
+            numeric_df["score"] = scores
+            st.write("Isolation Forest applied to numeric columns only:")
+            st.dataframe(numeric_df[numeric_df["anomaly"] == -1], use_container_width=True)
+        else:
+            st.warning("No numeric columns found to run Isolation Forest.")
+    except Exception as e:
+        st.error(f"Error running Isolation Forest: {e}")
+    # LLM-based anomaly detection
+    st.subheader("LLM-Based Column Anomaly Detection")
+    selected_column = st.selectbox("Select a column to analyze:", df.columns)
     if st.button("Run LLM Anomaly Detection"):
+        with st.spinner("Analyzing column with LLM..."):
+            values = df[selected_column].dropna().tolist()[:500]
+            value_list_with_index = [{"index": idx, "value": str(val)} for idx, val in enumerate(values)]
             prompt = PROMPT_INSTRUCTIONS_TEXT + "\n\nVALUES:\n" + json.dumps(value_list_with_index, indent=2)
             result = query_openai(prompt)
             if "anomalies" in result:
+                st.success(f"Found {len(result['anomalies'])} anomalies in `{selected_column}`.")
                 st.dataframe(pd.json_normalize(result["anomalies"]), use_container_width=True)
             else:
+                st.warning("No anomalies found or LLM response was invalid.")
                 st.subheader("Raw Model Output")
                 st.json(result)
 else:
+    st.info("Please load a dataset to begin.")