Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +5 -9
src/streamlit_app.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
-
# Fix permission issues in Hugging Face Spaces or Docker
|
| 4 |
os.environ["XDG_CONFIG_HOME"] = "/tmp"
|
| 5 |
os.environ["STREAMLIT_RUNTIME_CONFIG_DIR"] = "/tmp"
|
| 6 |
os.environ["STREAMLIT_HOME"] = "/tmp"
|
|
@@ -12,7 +11,6 @@ from openai import OpenAI
|
|
| 12 |
from sklearn.ensemble import IsolationForest
|
| 13 |
from sklearn.preprocessing import LabelEncoder
|
| 14 |
|
| 15 |
-
# Initialize OpenAI client
|
| 16 |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 17 |
|
| 18 |
PROMPT_INSTRUCTIONS_TEXT = """
|
|
@@ -87,18 +85,16 @@ def apply_isolation_forest(df):
|
|
| 87 |
|
| 88 |
# ---------------- Streamlit UI ----------------
|
| 89 |
st.set_page_config(page_title="LLM-Assisted Anomaly Detector", layout="wide")
|
| 90 |
-
st.title("
|
| 91 |
|
| 92 |
st.markdown("""
|
| 93 |
-
This tool combines **machine learning** and **large language models** to detect
|
| 94 |
-
- We first apply **Isolation Forest** to the full dataset to flag data-level outliers.
|
| 95 |
-
- Then, you can select **one column** to perform a second pass of analysis using **OpenAI's GPT-4**, which focuses on semantic and contextual anomalies within that column only.
|
| 96 |
""")
|
| 97 |
|
| 98 |
# Button to load sample data
|
| 99 |
df = None
|
| 100 |
sample_loaded = False
|
| 101 |
-
if st.button("Load sample dataset
|
| 102 |
sample_path = "src/df_crypto.csv"
|
| 103 |
try:
|
| 104 |
df = pd.read_csv(sample_path)
|
|
@@ -121,7 +117,7 @@ if df is not None:
|
|
| 121 |
st.dataframe(df, use_container_width=True)
|
| 122 |
|
| 123 |
# ---------------- Isolation Forest ----------------
|
| 124 |
-
st.markdown("###
|
| 125 |
iforest_df = apply_isolation_forest(df)
|
| 126 |
|
| 127 |
if iforest_df is not None:
|
|
@@ -129,7 +125,7 @@ if df is not None:
|
|
| 129 |
st.dataframe(iforest_df[iforest_df["Anomaly"] == "Yes"], use_container_width=True)
|
| 130 |
|
| 131 |
# ---------------- LLM Section ----------------
|
| 132 |
-
st.markdown("###
|
| 133 |
|
| 134 |
selected_column = st.selectbox("Select a column to analyze with LLM:", df.columns)
|
| 135 |
|
|
|
|
| 1 |
import os
|
| 2 |
|
|
|
|
| 3 |
os.environ["XDG_CONFIG_HOME"] = "/tmp"
|
| 4 |
os.environ["STREAMLIT_RUNTIME_CONFIG_DIR"] = "/tmp"
|
| 5 |
os.environ["STREAMLIT_HOME"] = "/tmp"
|
|
|
|
| 11 |
from sklearn.ensemble import IsolationForest
|
| 12 |
from sklearn.preprocessing import LabelEncoder
|
| 13 |
|
|
|
|
| 14 |
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
| 15 |
|
| 16 |
PROMPT_INSTRUCTIONS_TEXT = """
|
|
|
|
| 85 |
|
| 86 |
# ---------------- Streamlit UI ----------------
|
| 87 |
st.set_page_config(page_title="LLM-Assisted Anomaly Detector", layout="wide")
|
| 88 |
+
st.title("LLM-Assisted Anomaly Detector")
|
| 89 |
|
| 90 |
st.markdown("""
|
| 91 |
+
This tool combines **machine learning** and **large language models** to detect anomalies in datasets. We first apply **Isolation Forest** to the full dataset to flag data-level outliers. Then, you can select **one column** to perform a second pass of analysis using **OpenAI's GPT-4**, which focuses on semantic and contextual anomalies within that column only.
|
|
|
|
|
|
|
| 92 |
""")
|
| 93 |
|
| 94 |
# Button to load sample data
|
| 95 |
df = None
|
| 96 |
sample_loaded = False
|
| 97 |
+
if st.button("Load sample dataset"):
|
| 98 |
sample_path = "src/df_crypto.csv"
|
| 99 |
try:
|
| 100 |
df = pd.read_csv(sample_path)
|
|
|
|
| 117 |
st.dataframe(df, use_container_width=True)
|
| 118 |
|
| 119 |
# ---------------- Isolation Forest ----------------
|
| 120 |
+
st.markdown("### Anomaly Detection with Isolation Forest (whole dataset)")
|
| 121 |
iforest_df = apply_isolation_forest(df)
|
| 122 |
|
| 123 |
if iforest_df is not None:
|
|
|
|
| 125 |
st.dataframe(iforest_df[iforest_df["Anomaly"] == "Yes"], use_container_width=True)
|
| 126 |
|
| 127 |
# ---------------- LLM Section ----------------
|
| 128 |
+
st.markdown("### LLM-Based Anomaly Detection (specific column)")
|
| 129 |
|
| 130 |
selected_column = st.selectbox("Select a column to analyze with LLM:", df.columns)
|
| 131 |
|