itsalissonsilva commited on
Commit
ebbcc75
·
verified ·
1 Parent(s): 031cc89

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +5 -9
src/streamlit_app.py CHANGED
@@ -1,6 +1,5 @@
1
  import os
2
 
3
- # Fix permission issues in Hugging Face Spaces or Docker
4
  os.environ["XDG_CONFIG_HOME"] = "/tmp"
5
  os.environ["STREAMLIT_RUNTIME_CONFIG_DIR"] = "/tmp"
6
  os.environ["STREAMLIT_HOME"] = "/tmp"
@@ -12,7 +11,6 @@ from openai import OpenAI
12
  from sklearn.ensemble import IsolationForest
13
  from sklearn.preprocessing import LabelEncoder
14
 
15
- # Initialize OpenAI client
16
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
17
 
18
  PROMPT_INSTRUCTIONS_TEXT = """
@@ -87,18 +85,16 @@ def apply_isolation_forest(df):
87
 
88
  # ---------------- Streamlit UI ----------------
89
  st.set_page_config(page_title="LLM-Assisted Anomaly Detector", layout="wide")
90
- st.title("🧠 LLM-Assisted + 🛡️ Isolation Forest Anomaly Detector")
91
 
92
  st.markdown("""
93
- This tool combines **machine learning** and **large language models** to detect financial anomalies:
94
- - We first apply **Isolation Forest** to the full dataset to flag data-level outliers.
95
- - Then, you can select **one column** to perform a second pass of analysis using **OpenAI's GPT-4**, which focuses on semantic and contextual anomalies within that column only.
96
  """)
97
 
98
  # Button to load sample data
99
  df = None
100
  sample_loaded = False
101
- if st.button("Load sample dataset (df_crypto.csv)"):
102
  sample_path = "src/df_crypto.csv"
103
  try:
104
  df = pd.read_csv(sample_path)
@@ -121,7 +117,7 @@ if df is not None:
121
  st.dataframe(df, use_container_width=True)
122
 
123
  # ---------------- Isolation Forest ----------------
124
- st.markdown("### 🛡️ Anomaly Detection with Isolation Forest (whole dataset)")
125
  iforest_df = apply_isolation_forest(df)
126
 
127
  if iforest_df is not None:
@@ -129,7 +125,7 @@ if df is not None:
129
  st.dataframe(iforest_df[iforest_df["Anomaly"] == "Yes"], use_container_width=True)
130
 
131
  # ---------------- LLM Section ----------------
132
- st.markdown("### 🔍 LLM-Based Anomaly Detection (specific column)")
133
 
134
  selected_column = st.selectbox("Select a column to analyze with LLM:", df.columns)
135
 
 
1
  import os
2
 
 
3
  os.environ["XDG_CONFIG_HOME"] = "/tmp"
4
  os.environ["STREAMLIT_RUNTIME_CONFIG_DIR"] = "/tmp"
5
  os.environ["STREAMLIT_HOME"] = "/tmp"
 
11
  from sklearn.ensemble import IsolationForest
12
  from sklearn.preprocessing import LabelEncoder
13
 
 
14
  client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
15
 
16
  PROMPT_INSTRUCTIONS_TEXT = """
 
85
 
86
  # ---------------- Streamlit UI ----------------
87
  st.set_page_config(page_title="LLM-Assisted Anomaly Detector", layout="wide")
88
+ st.title("LLM-Assisted Anomaly Detector")
89
 
90
  st.markdown("""
91
+ This tool combines **machine learning** and **large language models** to detect anomalies in datasets. We first apply **Isolation Forest** to the full dataset to flag data-level outliers. Then, you can select **one column** to perform a second pass of analysis using **OpenAI's GPT-4**, which focuses on semantic and contextual anomalies within that column only.
 
 
92
  """)
93
 
94
  # Button to load sample data
95
  df = None
96
  sample_loaded = False
97
+ if st.button("Load sample dataset"):
98
  sample_path = "src/df_crypto.csv"
99
  try:
100
  df = pd.read_csv(sample_path)
 
117
  st.dataframe(df, use_container_width=True)
118
 
119
  # ---------------- Isolation Forest ----------------
120
+ st.markdown("### Anomaly Detection with Isolation Forest (whole dataset)")
121
  iforest_df = apply_isolation_forest(df)
122
 
123
  if iforest_df is not None:
 
125
  st.dataframe(iforest_df[iforest_df["Anomaly"] == "Yes"], use_container_width=True)
126
 
127
  # ---------------- LLM Section ----------------
128
+ st.markdown("### LLM-Based Anomaly Detection (specific column)")
129
 
130
  selected_column = st.selectbox("Select a column to analyze with LLM:", df.columns)
131