SuriRaja commited on
Commit
bc24340
·
verified ·
1 Parent(s): 567e525

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -9,7 +9,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
9
  import torch
10
 
11
  # ---------------------------------------------------------------------
12
- # MUST BE FIRST STREAMLIT UI CALL
13
  # ---------------------------------------------------------------------
14
  st.set_page_config(page_title="Smart Log Copilot", layout="wide")
15
 
@@ -28,7 +28,7 @@ def load_llm():
28
 
29
  tokenizer, model = load_llm()
30
 
31
- def llm(prompt, max_new_tokens=150): # faster responses
32
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
33
  outputs = model.generate(
34
  **inputs,
@@ -71,7 +71,6 @@ Explain risks clearly + list recommended actions.
71
  PLACEHOLDER_IMG = "https://dummyimage.com/600x300/ff0000/ffffff&text=Anomaly+Screenshot"
72
 
73
  # ------------------ CSV & ANALYTICS ------------------
74
- # Auto-map CSVs with different column names
75
  COLUMN_MAP = {
76
  "username": "user",
77
  "userid": "user",
@@ -89,11 +88,20 @@ def normalize(df):
89
  df.rename(columns={old: new}, inplace=True)
90
  return df
91
 
 
 
 
 
 
 
 
 
 
 
 
92
  def basic_filter(df, users):
93
- if users == "any":
94
- return df
95
- if isinstance(users, str):
96
- users = [users]
97
  return df[df["user"].str.lower().isin([u.lower() for u in users])]
98
 
99
  def detect_anomalies(df):
@@ -113,9 +121,10 @@ def detect_anomalies(df):
113
  anomalies.append({"type": "many_systems", "details": "5+ systems accessed in a day"})
114
 
115
  if "country" in df.columns:
116
- locations = df.groupby(df["timestamp"].dt.date).country.nunique()
117
- if any(locations >= 2):
118
- anomalies.append({"type": "impossible_travel", "details": "multiple countries in a day"})
 
119
  return anomalies
120
 
121
  def risk_score(anoms):
@@ -123,7 +132,7 @@ def risk_score(anoms):
123
  if len(anoms) <= 2: return "🟡", "Medium"
124
  return "🔴", "High"
125
 
126
- # ------------------ PDF EXPORT ------------------
127
  def build_pdf(risk_icon, risk_label, summary, anomalies):
128
  pdf = FPDF()
129
  pdf.add_page()
@@ -165,14 +174,16 @@ with col1:
165
  if chat_input and df is not None:
166
  with st.spinner("🧠 Analyzing logs using AI..."):
167
  try:
168
- # Intent + log reasoning
169
- intent = json.loads(llm(INTENT_SYSTEM_PROMPT + "\nUSER: " + chat_input))
 
 
 
170
  params = intent["parameters"]
171
  filtered = basic_filter(df, params["users"])
172
  anomalies = detect_anomalies(filtered)
173
  icon, label = risk_score(anomalies)
174
 
175
- # Summary
176
  p = SUMMARY_SYSTEM_PROMPT + f"\nQUESTION: {chat_input}\nMATCHED: {len(filtered)} rows\nANOMALIES: {json.dumps(anomalies)}\n\nWrite summary:"
177
  summary = llm(p)
178
 
 
9
  import torch
10
 
11
  # ---------------------------------------------------------------------
12
+ # MUST BE FIRST STREAMLIT COMMAND
13
  # ---------------------------------------------------------------------
14
  st.set_page_config(page_title="Smart Log Copilot", layout="wide")
15
 
 
28
 
29
  tokenizer, model = load_llm()
30
 
31
+ def llm(prompt, max_new_tokens=150):
32
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
33
  outputs = model.generate(
34
  **inputs,
 
71
  PLACEHOLDER_IMG = "https://dummyimage.com/600x300/ff0000/ffffff&text=Anomaly+Screenshot"
72
 
73
  # ------------------ CSV & ANALYTICS ------------------
 
74
  COLUMN_MAP = {
75
  "username": "user",
76
  "userid": "user",
 
88
  df.rename(columns={old: new}, inplace=True)
89
  return df
90
 
91
+ # ---- Intent safe decoding ----
92
+ def extract_intent_safe(question):
93
+ raw = llm(INTENT_SYSTEM_PROMPT + "\nUSER QUESTION: " + question + "\nReturn JSON only:")
94
+ if "{" not in raw or "}" not in raw:
95
+ return None
96
+ cleaned = raw[raw.find("{"): raw.rfind("}") + 1]
97
+ try:
98
+ return json.loads(cleaned)
99
+ except:
100
+ return None
101
+
102
  def basic_filter(df, users):
103
+ if users == "any": return df
104
+ if isinstance(users, str): users = [users]
 
 
105
  return df[df["user"].str.lower().isin([u.lower() for u in users])]
106
 
107
  def detect_anomalies(df):
 
121
  anomalies.append({"type": "many_systems", "details": "5+ systems accessed in a day"})
122
 
123
  if "country" in df.columns:
124
+ loc = df.groupby(df["timestamp"].dt.date).country.nunique()
125
+ if any(loc >= 2):
126
+ anomalies.append({"type": "impossible_travel", "details": "multiple countries in one day"})
127
+
128
  return anomalies
129
 
130
  def risk_score(anoms):
 
132
  if len(anoms) <= 2: return "🟡", "Medium"
133
  return "🔴", "High"
134
 
135
+ # ------------------ PDF GENERATION ------------------
136
  def build_pdf(risk_icon, risk_label, summary, anomalies):
137
  pdf = FPDF()
138
  pdf.add_page()
 
174
  if chat_input and df is not None:
175
  with st.spinner("🧠 Analyzing logs using AI..."):
176
  try:
177
+ intent = extract_intent_safe(chat_input)
178
+ if intent is None:
179
+ intent = {"action": "run_log_query",
180
+ "parameters": {"users": "any", "time_range": "all_time", "focus": "general", "extra": chat_input}}
181
+
182
  params = intent["parameters"]
183
  filtered = basic_filter(df, params["users"])
184
  anomalies = detect_anomalies(filtered)
185
  icon, label = risk_score(anomalies)
186
 
 
187
  p = SUMMARY_SYSTEM_PROMPT + f"\nQUESTION: {chat_input}\nMATCHED: {len(filtered)} rows\nANOMALIES: {json.dumps(anomalies)}\n\nWrite summary:"
188
  summary = llm(p)
189