Spaces:

itsalissonsilva
/

test

Sleeping

App Files Files Community

itsalissonsilva commited on Jun 10, 2025

Commit

c4559e8

verified ·

1 Parent(s): 4e3122d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +54 -45

src/streamlit_app.py CHANGED Viewed

@@ -67,7 +67,6 @@ def query_openai(prompt: str) -> dict:
         return {"error": str(e)}
 def apply_isolation_forest(df):
-    # Copy and encode categorical columns
     df_encoded = df.copy()
     for col in df_encoded.select_dtypes(include=["object", "category"]).columns:
         df_encoded[col] = LabelEncoder().fit_transform(df_encoded[col].astype(str))
@@ -86,54 +85,64 @@ def apply_isolation_forest(df):
         st.error(f"Isolation Forest failed: {e}")
         return None
 st.set_page_config(page_title="LLM-Assisted Anomaly Detector", layout="wide")
 st.title("🧠 LLM-Assisted + 🛡️ Isolation Forest Anomaly Detector")
-uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
-if uploaded_file:
-    st.write("Uploaded file:", uploaded_file.name)
     try:
-        df = pd.read_csv(uploaded_file)
-        st.subheader("Full Dataset")
-        st.dataframe(df, use_container_width=True)
-        # ------------------------ Isolation Forest Section ------------------------
-        st.markdown("### 🛡️ Anomaly Detection with Isolation Forest (whole dataset)")
-        iforest_df = apply_isolation_forest(df)
-        if iforest_df is not None:
-            st.success("Isolation Forest analysis completed.")
-            st.dataframe(iforest_df[iforest_df["Anomaly"] == "Yes"], use_container_width=True)
-        # ------------------------ LLM Analysis Section ------------------------
-        st.markdown("### 🔍 LLM-Based Anomaly Detection (specific column)")
-        selected_column = st.selectbox("Select a column to analyze with LLM:", df.columns)
-        if st.button("Run LLM Anomaly Detection on selected column"):
-            with st.spinner("Analyzing column with LLM..."):
-                values = df[selected_column].dropna().tolist()
-                values = values[:500]
-                value_list_with_index = [
-                    {"index": idx, "value": str(val)} for idx, val in enumerate(values)
-                ]
-                prompt = PROMPT_INSTRUCTIONS_TEXT + "\n\nVALUES:\n" + json.dumps(value_list_with_index, indent=2)
-                result = query_openai(prompt)
-                if "anomalies" in result:
-                    st.success(f"LLM found {len(result['anomalies'])} anomalies in `{selected_column}`.")
-                    st.dataframe(pd.json_normalize(result["anomalies"]), use_container_width=True)
-                else:
-                    st.warning("No anomalies found or invalid response from LLM.")
-                    st.subheader("Raw Model Output")
-                    st.json(result)
     except Exception as e:
-        st.error(f"Could not read CSV. Error: {e}")
 else:
-    st.info("Please upload a CSV file to begin.")

         return {"error": str(e)}
 def apply_isolation_forest(df):
     df_encoded = df.copy()
     for col in df_encoded.select_dtypes(include=["object", "category"]).columns:
         df_encoded[col] = LabelEncoder().fit_transform(df_encoded[col].astype(str))
         st.error(f"Isolation Forest failed: {e}")
         return None
+# ---------------- Streamlit UI ----------------
 st.set_page_config(page_title="LLM-Assisted Anomaly Detector", layout="wide")
 st.title("🧠 LLM-Assisted + 🛡️ Isolation Forest Anomaly Detector")
+use_sample = st.checkbox("Use built-in sample dataset (df_crypto.csv)?", value=False)
+df = None
+if use_sample:
+    sample_path = "src/df_crypto.csv"
     try:
+        df = pd.read_csv(sample_path)
+        st.success("Sample dataset loaded from `src/df_crypto.csv`.")
     except Exception as e:
+        st.error(f"Could not load sample dataset: {e}")
+else:
+    uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
+    if uploaded_file:
+        try:
+            df = pd.read_csv(uploaded_file)
+        except Exception as e:
+            st.error(f"Could not read uploaded CSV. Error: {e}")
+if df is not None:
+    st.subheader("Full Dataset")
+    st.dataframe(df, use_container_width=True)
+    # ---------------- Isolation Forest ----------------
+    st.markdown("### 🛡️ Anomaly Detection with Isolation Forest (whole dataset)")
+    iforest_df = apply_isolation_forest(df)
+    if iforest_df is not None:
+        st.success("Isolation Forest analysis completed.")
+        st.dataframe(iforest_df[iforest_df["Anomaly"] == "Yes"], use_container_width=True)
+    # ---------------- LLM Section ----------------
+    st.markdown("### 🔍 LLM-Based Anomaly Detection (specific column)")
+    selected_column = st.selectbox("Select a column to analyze with LLM:", df.columns)
+    if st.button("Run LLM Anomaly Detection on selected column"):
+        with st.spinner("Analyzing column with LLM..."):
+            values = df[selected_column].dropna().tolist()
+            values = values[:500]  # keep within token limits
+            value_list_with_index = [
+                {"index": idx, "value": str(val)} for idx, val in enumerate(values)
+            ]
+            prompt = PROMPT_INSTRUCTIONS_TEXT + "\n\nVALUES:\n" + json.dumps(value_list_with_index, indent=2)
+            result = query_openai(prompt)
+            if "anomalies" in result:
+                st.success(f"LLM found {len(result['anomalies'])} anomalies in `{selected_column}`.")
+                st.dataframe(pd.json_normalize(result["anomalies"]), use_container_width=True)
+            else:
+                st.warning("No anomalies found or invalid response from LLM.")
+                st.subheader("Raw Model Output")
+                st.json(result)
 else:
+    st.info("Please upload a CSV or use the sample dataset.")