itsalissonsilva commited on
Commit
c4559e8
·
verified ·
1 Parent(s): 4e3122d

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +54 -45
src/streamlit_app.py CHANGED
@@ -67,7 +67,6 @@ def query_openai(prompt: str) -> dict:
67
  return {"error": str(e)}
68
 
69
  def apply_isolation_forest(df):
70
- # Copy and encode categorical columns
71
  df_encoded = df.copy()
72
  for col in df_encoded.select_dtypes(include=["object", "category"]).columns:
73
  df_encoded[col] = LabelEncoder().fit_transform(df_encoded[col].astype(str))
@@ -86,54 +85,64 @@ def apply_isolation_forest(df):
86
  st.error(f"Isolation Forest failed: {e}")
87
  return None
88
 
 
89
  st.set_page_config(page_title="LLM-Assisted Anomaly Detector", layout="wide")
90
  st.title("🧠 LLM-Assisted + 🛡️ Isolation Forest Anomaly Detector")
91
 
92
- uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
 
93
 
94
- if uploaded_file:
95
- st.write("Uploaded file:", uploaded_file.name)
96
  try:
97
- df = pd.read_csv(uploaded_file)
98
- st.subheader("Full Dataset")
99
- st.dataframe(df, use_container_width=True)
100
-
101
- # ------------------------ Isolation Forest Section ------------------------
102
- st.markdown("### 🛡️ Anomaly Detection with Isolation Forest (whole dataset)")
103
-
104
- iforest_df = apply_isolation_forest(df)
105
-
106
- if iforest_df is not None:
107
- st.success("Isolation Forest analysis completed.")
108
- st.dataframe(iforest_df[iforest_df["Anomaly"] == "Yes"], use_container_width=True)
109
-
110
- # ------------------------ LLM Analysis Section ------------------------
111
- st.markdown("### 🔍 LLM-Based Anomaly Detection (specific column)")
112
-
113
- selected_column = st.selectbox("Select a column to analyze with LLM:", df.columns)
114
-
115
- if st.button("Run LLM Anomaly Detection on selected column"):
116
- with st.spinner("Analyzing column with LLM..."):
117
- values = df[selected_column].dropna().tolist()
118
- values = values[:500]
119
-
120
- value_list_with_index = [
121
- {"index": idx, "value": str(val)} for idx, val in enumerate(values)
122
- ]
123
-
124
- prompt = PROMPT_INSTRUCTIONS_TEXT + "\n\nVALUES:\n" + json.dumps(value_list_with_index, indent=2)
125
-
126
- result = query_openai(prompt)
127
-
128
- if "anomalies" in result:
129
- st.success(f"LLM found {len(result['anomalies'])} anomalies in `{selected_column}`.")
130
- st.dataframe(pd.json_normalize(result["anomalies"]), use_container_width=True)
131
- else:
132
- st.warning("No anomalies found or invalid response from LLM.")
133
- st.subheader("Raw Model Output")
134
- st.json(result)
135
-
136
  except Exception as e:
137
- st.error(f"Could not read CSV. Error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  else:
139
- st.info("Please upload a CSV file to begin.")
 
67
  return {"error": str(e)}
68
 
69
  def apply_isolation_forest(df):
 
70
  df_encoded = df.copy()
71
  for col in df_encoded.select_dtypes(include=["object", "category"]).columns:
72
  df_encoded[col] = LabelEncoder().fit_transform(df_encoded[col].astype(str))
 
85
  st.error(f"Isolation Forest failed: {e}")
86
  return None
87
 
88
+ # ---------------- Streamlit UI ----------------
89
  st.set_page_config(page_title="LLM-Assisted Anomaly Detector", layout="wide")
90
  st.title("🧠 LLM-Assisted + 🛡️ Isolation Forest Anomaly Detector")
91
 
92
+ use_sample = st.checkbox("Use built-in sample dataset (df_crypto.csv)?", value=False)
93
+ df = None
94
 
95
+ if use_sample:
96
+ sample_path = "src/df_crypto.csv"
97
  try:
98
+ df = pd.read_csv(sample_path)
99
+ st.success("Sample dataset loaded from `src/df_crypto.csv`.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  except Exception as e:
101
+ st.error(f"Could not load sample dataset: {e}")
102
+ else:
103
+ uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
104
+ if uploaded_file:
105
+ try:
106
+ df = pd.read_csv(uploaded_file)
107
+ except Exception as e:
108
+ st.error(f"Could not read uploaded CSV. Error: {e}")
109
+
110
+ if df is not None:
111
+ st.subheader("Full Dataset")
112
+ st.dataframe(df, use_container_width=True)
113
+
114
+ # ---------------- Isolation Forest ----------------
115
+ st.markdown("### 🛡️ Anomaly Detection with Isolation Forest (whole dataset)")
116
+ iforest_df = apply_isolation_forest(df)
117
+
118
+ if iforest_df is not None:
119
+ st.success("Isolation Forest analysis completed.")
120
+ st.dataframe(iforest_df[iforest_df["Anomaly"] == "Yes"], use_container_width=True)
121
+
122
+ # ---------------- LLM Section ----------------
123
+ st.markdown("### 🔍 LLM-Based Anomaly Detection (specific column)")
124
+
125
+ selected_column = st.selectbox("Select a column to analyze with LLM:", df.columns)
126
+
127
+ if st.button("Run LLM Anomaly Detection on selected column"):
128
+ with st.spinner("Analyzing column with LLM..."):
129
+ values = df[selected_column].dropna().tolist()
130
+ values = values[:500] # keep within token limits
131
+
132
+ value_list_with_index = [
133
+ {"index": idx, "value": str(val)} for idx, val in enumerate(values)
134
+ ]
135
+
136
+ prompt = PROMPT_INSTRUCTIONS_TEXT + "\n\nVALUES:\n" + json.dumps(value_list_with_index, indent=2)
137
+
138
+ result = query_openai(prompt)
139
+
140
+ if "anomalies" in result:
141
+ st.success(f"LLM found {len(result['anomalies'])} anomalies in `{selected_column}`.")
142
+ st.dataframe(pd.json_normalize(result["anomalies"]), use_container_width=True)
143
+ else:
144
+ st.warning("No anomalies found or invalid response from LLM.")
145
+ st.subheader("Raw Model Output")
146
+ st.json(result)
147
  else:
148
+ st.info("Please upload a CSV or use the sample dataset.")