Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
| 9 |
import torch
|
| 10 |
|
| 11 |
# ---------------------------------------------------------------------
|
| 12 |
-
# MUST BE FIRST STREAMLIT
|
| 13 |
# ---------------------------------------------------------------------
|
| 14 |
st.set_page_config(page_title="Smart Log Copilot", layout="wide")
|
| 15 |
|
|
@@ -28,7 +28,7 @@ def load_llm():
|
|
| 28 |
|
| 29 |
tokenizer, model = load_llm()
|
| 30 |
|
| 31 |
-
def llm(prompt, max_new_tokens=150):
|
| 32 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 33 |
outputs = model.generate(
|
| 34 |
**inputs,
|
|
@@ -71,7 +71,6 @@ Explain risks clearly + list recommended actions.
|
|
| 71 |
PLACEHOLDER_IMG = "https://dummyimage.com/600x300/ff0000/ffffff&text=Anomaly+Screenshot"
|
| 72 |
|
| 73 |
# ------------------ CSV & ANALYTICS ------------------
|
| 74 |
-
# Auto-map CSVs with different column names
|
| 75 |
COLUMN_MAP = {
|
| 76 |
"username": "user",
|
| 77 |
"userid": "user",
|
|
@@ -89,11 +88,20 @@ def normalize(df):
|
|
| 89 |
df.rename(columns={old: new}, inplace=True)
|
| 90 |
return df
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
def basic_filter(df, users):
|
| 93 |
-
if users == "any":
|
| 94 |
-
|
| 95 |
-
if isinstance(users, str):
|
| 96 |
-
users = [users]
|
| 97 |
return df[df["user"].str.lower().isin([u.lower() for u in users])]
|
| 98 |
|
| 99 |
def detect_anomalies(df):
|
|
@@ -113,9 +121,10 @@ def detect_anomalies(df):
|
|
| 113 |
anomalies.append({"type": "many_systems", "details": "5+ systems accessed in a day"})
|
| 114 |
|
| 115 |
if "country" in df.columns:
|
| 116 |
-
|
| 117 |
-
if any(
|
| 118 |
-
anomalies.append({"type": "impossible_travel", "details": "multiple countries in
|
|
|
|
| 119 |
return anomalies
|
| 120 |
|
| 121 |
def risk_score(anoms):
|
|
@@ -123,7 +132,7 @@ def risk_score(anoms):
|
|
| 123 |
if len(anoms) <= 2: return "🟡", "Medium"
|
| 124 |
return "🔴", "High"
|
| 125 |
|
| 126 |
-
# ------------------ PDF
|
| 127 |
def build_pdf(risk_icon, risk_label, summary, anomalies):
|
| 128 |
pdf = FPDF()
|
| 129 |
pdf.add_page()
|
|
@@ -165,14 +174,16 @@ with col1:
|
|
| 165 |
if chat_input and df is not None:
|
| 166 |
with st.spinner("🧠 Analyzing logs using AI..."):
|
| 167 |
try:
|
| 168 |
-
|
| 169 |
-
intent
|
|
|
|
|
|
|
|
|
|
| 170 |
params = intent["parameters"]
|
| 171 |
filtered = basic_filter(df, params["users"])
|
| 172 |
anomalies = detect_anomalies(filtered)
|
| 173 |
icon, label = risk_score(anomalies)
|
| 174 |
|
| 175 |
-
# Summary
|
| 176 |
p = SUMMARY_SYSTEM_PROMPT + f"\nQUESTION: {chat_input}\nMATCHED: {len(filtered)} rows\nANOMALIES: {json.dumps(anomalies)}\n\nWrite summary:"
|
| 177 |
summary = llm(p)
|
| 178 |
|
|
|
|
| 9 |
import torch
|
| 10 |
|
| 11 |
# ---------------------------------------------------------------------
|
| 12 |
+
# MUST BE FIRST STREAMLIT COMMAND
|
| 13 |
# ---------------------------------------------------------------------
|
| 14 |
st.set_page_config(page_title="Smart Log Copilot", layout="wide")
|
| 15 |
|
|
|
|
| 28 |
|
| 29 |
tokenizer, model = load_llm()
|
| 30 |
|
| 31 |
+
def llm(prompt, max_new_tokens=150):
|
| 32 |
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 33 |
outputs = model.generate(
|
| 34 |
**inputs,
|
|
|
|
| 71 |
PLACEHOLDER_IMG = "https://dummyimage.com/600x300/ff0000/ffffff&text=Anomaly+Screenshot"
|
| 72 |
|
| 73 |
# ------------------ CSV & ANALYTICS ------------------
|
|
|
|
| 74 |
COLUMN_MAP = {
|
| 75 |
"username": "user",
|
| 76 |
"userid": "user",
|
|
|
|
| 88 |
df.rename(columns={old: new}, inplace=True)
|
| 89 |
return df
|
| 90 |
|
| 91 |
+
# ---- Intent safe decoding ----
|
| 92 |
+
def extract_intent_safe(question):
|
| 93 |
+
raw = llm(INTENT_SYSTEM_PROMPT + "\nUSER QUESTION: " + question + "\nReturn JSON only:")
|
| 94 |
+
if "{" not in raw or "}" not in raw:
|
| 95 |
+
return None
|
| 96 |
+
cleaned = raw[raw.find("{"): raw.rfind("}") + 1]
|
| 97 |
+
try:
|
| 98 |
+
return json.loads(cleaned)
|
| 99 |
+
except:
|
| 100 |
+
return None
|
| 101 |
+
|
| 102 |
def basic_filter(df, users):
|
| 103 |
+
if users == "any": return df
|
| 104 |
+
if isinstance(users, str): users = [users]
|
|
|
|
|
|
|
| 105 |
return df[df["user"].str.lower().isin([u.lower() for u in users])]
|
| 106 |
|
| 107 |
def detect_anomalies(df):
|
|
|
|
| 121 |
anomalies.append({"type": "many_systems", "details": "5+ systems accessed in a day"})
|
| 122 |
|
| 123 |
if "country" in df.columns:
|
| 124 |
+
loc = df.groupby(df["timestamp"].dt.date).country.nunique()
|
| 125 |
+
if any(loc >= 2):
|
| 126 |
+
anomalies.append({"type": "impossible_travel", "details": "multiple countries in one day"})
|
| 127 |
+
|
| 128 |
return anomalies
|
| 129 |
|
| 130 |
def risk_score(anoms):
|
|
|
|
| 132 |
if len(anoms) <= 2: return "🟡", "Medium"
|
| 133 |
return "🔴", "High"
|
| 134 |
|
| 135 |
+
# ------------------ PDF GENERATION ------------------
|
| 136 |
def build_pdf(risk_icon, risk_label, summary, anomalies):
|
| 137 |
pdf = FPDF()
|
| 138 |
pdf.add_page()
|
|
|
|
| 174 |
if chat_input and df is not None:
|
| 175 |
with st.spinner("🧠 Analyzing logs using AI..."):
|
| 176 |
try:
|
| 177 |
+
intent = extract_intent_safe(chat_input)
|
| 178 |
+
if intent is None:
|
| 179 |
+
intent = {"action": "run_log_query",
|
| 180 |
+
"parameters": {"users": "any", "time_range": "all_time", "focus": "general", "extra": chat_input}}
|
| 181 |
+
|
| 182 |
params = intent["parameters"]
|
| 183 |
filtered = basic_filter(df, params["users"])
|
| 184 |
anomalies = detect_anomalies(filtered)
|
| 185 |
icon, label = risk_score(anomalies)
|
| 186 |
|
|
|
|
| 187 |
p = SUMMARY_SYSTEM_PROMPT + f"\nQUESTION: {chat_input}\nMATCHED: {len(filtered)} rows\nANOMALIES: {json.dumps(anomalies)}\n\nWrite summary:"
|
| 188 |
summary = llm(p)
|
| 189 |
|