Spaces:

SuriRaja
/

PharmaScientistLogIntelligenceCopilot

Sleeping

App Files Files Community

SuriRaja commited on Dec 5, 2025

Commit

bc24340

verified ·

1 Parent(s): 567e525

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -14

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 # ---------------------------------------------------------------------
-# MUST BE FIRST STREAMLIT UI CALL
 # ---------------------------------------------------------------------
 st.set_page_config(page_title="Smart Log Copilot", layout="wide")
@@ -28,7 +28,7 @@ def load_llm():
 tokenizer, model = load_llm()
-def llm(prompt, max_new_tokens=150):   # faster responses
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         **inputs,
@@ -71,7 +71,6 @@ Explain risks clearly + list recommended actions.
 PLACEHOLDER_IMG = "https://dummyimage.com/600x300/ff0000/ffffff&text=Anomaly+Screenshot"
 # ------------------ CSV & ANALYTICS ------------------
-# Auto-map CSVs with different column names
 COLUMN_MAP = {
     "username": "user",
     "userid": "user",
@@ -89,11 +88,20 @@ def normalize(df):
             df.rename(columns={old: new}, inplace=True)
     return df
 def basic_filter(df, users):
-    if users == "any":
-        return df
-    if isinstance(users, str):
-        users = [users]
     return df[df["user"].str.lower().isin([u.lower() for u in users])]
 def detect_anomalies(df):
@@ -113,9 +121,10 @@ def detect_anomalies(df):
         anomalies.append({"type": "many_systems", "details": "5+ systems accessed in a day"})
     if "country" in df.columns:
-        locations = df.groupby(df["timestamp"].dt.date).country.nunique()
-        if any(locations >= 2):
-            anomalies.append({"type": "impossible_travel", "details": "multiple countries in a day"})
     return anomalies
 def risk_score(anoms):
@@ -123,7 +132,7 @@ def risk_score(anoms):
     if len(anoms) <= 2: return "🟡", "Medium"
     return "🔴", "High"
-# ------------------ PDF EXPORT ------------------
 def build_pdf(risk_icon, risk_label, summary, anomalies):
     pdf = FPDF()
     pdf.add_page()
@@ -165,14 +174,16 @@ with col1:
     if chat_input and df is not None:
         with st.spinner("🧠 Analyzing logs using AI..."):
             try:
-                # Intent + log reasoning
-                intent = json.loads(llm(INTENT_SYSTEM_PROMPT + "\nUSER: " + chat_input))
                 params = intent["parameters"]
                 filtered = basic_filter(df, params["users"])
                 anomalies = detect_anomalies(filtered)
                 icon, label = risk_score(anomalies)
-                # Summary
                 p = SUMMARY_SYSTEM_PROMPT + f"\nQUESTION: {chat_input}\nMATCHED: {len(filtered)} rows\nANOMALIES: {json.dumps(anomalies)}\n\nWrite summary:"
                 summary = llm(p)

 import torch
 # ---------------------------------------------------------------------
+# MUST BE FIRST STREAMLIT COMMAND
 # ---------------------------------------------------------------------
 st.set_page_config(page_title="Smart Log Copilot", layout="wide")
 tokenizer, model = load_llm()
+def llm(prompt, max_new_tokens=150):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         **inputs,
 PLACEHOLDER_IMG = "https://dummyimage.com/600x300/ff0000/ffffff&text=Anomaly+Screenshot"
 # ------------------ CSV & ANALYTICS ------------------
 COLUMN_MAP = {
     "username": "user",
     "userid": "user",
             df.rename(columns={old: new}, inplace=True)
     return df
+# ---- Intent safe decoding ----
+def extract_intent_safe(question):
+    raw = llm(INTENT_SYSTEM_PROMPT + "\nUSER QUESTION: " + question + "\nReturn JSON only:")
+    if "{" not in raw or "}" not in raw:
+        return None
+    cleaned = raw[raw.find("{"): raw.rfind("}") + 1]
+    try:
+        return json.loads(cleaned)
+    except:
+        return None
 def basic_filter(df, users):
+    if users == "any": return df
+    if isinstance(users, str): users = [users]
     return df[df["user"].str.lower().isin([u.lower() for u in users])]
 def detect_anomalies(df):
         anomalies.append({"type": "many_systems", "details": "5+ systems accessed in a day"})
     if "country" in df.columns:
+        loc = df.groupby(df["timestamp"].dt.date).country.nunique()
+        if any(loc >= 2):
+            anomalies.append({"type": "impossible_travel", "details": "multiple countries in one day"})
     return anomalies
 def risk_score(anoms):
     if len(anoms) <= 2: return "🟡", "Medium"
     return "🔴", "High"
+# ------------------ PDF GENERATION ------------------
 def build_pdf(risk_icon, risk_label, summary, anomalies):
     pdf = FPDF()
     pdf.add_page()
     if chat_input and df is not None:
         with st.spinner("🧠 Analyzing logs using AI..."):
             try:
+                intent = extract_intent_safe(chat_input)
+                if intent is None:
+                    intent = {"action": "run_log_query",
+                              "parameters": {"users": "any", "time_range": "all_time", "focus": "general", "extra": chat_input}}
                 params = intent["parameters"]
                 filtered = basic_filter(df, params["users"])
                 anomalies = detect_anomalies(filtered)
                 icon, label = risk_score(anomalies)
                 p = SUMMARY_SYSTEM_PROMPT + f"\nQUESTION: {chat_input}\nMATCHED: {len(filtered)} rows\nANOMALIES: {json.dumps(anomalies)}\n\nWrite summary:"
                 summary = llm(p)