Spaces:

SuriRaja
/

PharmaScientistLogIntelligenceCopilot

Paused

App Files Files Community

SuriRaja commited on Dec 5, 2025

Commit

954b14a

verified ·

1 Parent(s): 82077f0

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -525

app.py CHANGED Viewed

@@ -1,582 +1,195 @@
-import json
-from typing import Any, Dict, List, Optional, Tuple
-from io import BytesIO
-import tempfile
-import gradio as gr
 import pandas as pd
-import matplotlib
-matplotlib.use("Agg")
 import matplotlib.pyplot as plt
-from transformers import AutoTokenizer, AutoModelForCausalLM
 from fpdf import FPDF
 # ------------------ MODEL LOADING ------------------
 MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
-print("Loading model... This runs once at startup.")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    device_map="auto",
-    torch_dtype="auto"
-)
-# ------------------ LLM HELPERS ------------------
-def generate_llm(
-    prompt: str,
-    max_new_tokens: int = 512,
-    temperature: float = 0.1
-) -> str:
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         **inputs,
         max_new_tokens=max_new_tokens,
-        do_sample=(temperature > 0),
-        temperature=temperature,
-        pad_token_id=tokenizer.eos_token_id
     )
-    full = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return full[len(prompt):].strip()
 INTENT_SYSTEM_PROMPT = """
-You are a Log Intelligence Planner for a pharma company.
-You receive a natural-language question from a user about login/access activity
-of scientists or employees across multiple systems and time ranges.
-Your job is to convert the question into a JSON object describing WHAT to do.
-ALLOWED actions:
-- "run_log_query"      : Basic filtered query on logs.
-- "scan_anomalies"     : Scan for suspicious behaviour (off-hours, many systems, failures).
-- "user_risk_report"   : High-level risk report for one or more users.
-- "global_risk_report" : High-level risk report for all users.
-JSON SCHEMA (always follow this):
 {
-  "action": "<one of the above>",
-  "parameters": {
-    "users": "any" OR ["Name1", "Name2"],
-    "time_range": "all_time" OR natural text like "last_7_days", "yesterday", "this_week",
-    "focus": "login_failures" | "off_hours" | "many_systems" | "impossible_travel" | "general",
-    "extra": "<free text, optional>"
-  }
 }
 RULES:
-- ALWAYS output ONLY valid JSON. No explanation, no markdown, no comments.
-- If you are unsure, choose a reasonable default:
-  - users = "any"
-  - time_range = "all_time"
-  - focus = "general"
-- If question is not about logs at all, still output JSON with action "run_log_query"
-  and parameters filled with "any"/"all_time"/"general".
 """
-def extract_intent(user_message: str) -> Dict[str, Any]:
-    user_block = f'USER_QUESTION: "{user_message}"\n\nReturn ONLY the JSON object now:'
-    prompt = INTENT_SYSTEM_PROMPT + "\n" + user_block
-    raw = generate_llm(prompt, max_new_tokens=256, temperature=0.1)
-    try:
-        first = raw.find("{")
-        last = raw.rfind("}")
-        if first != -1 and last != -1:
-            raw_json = raw[first:last + 1]
-        else:
-            raw_json = raw
-        data = json.loads(raw_json)
-    except Exception:
-        data = {
-            "action": "run_log_query",
-            "parameters": {
-                "users": "any",
-                "time_range": "all_time",
-                "focus": "general",
-                "extra": user_message
-            }
-        }
-    return data
 SUMMARY_SYSTEM_PROMPT = """
-You are a Security & Compliance Analyst for a pharma company.
-You receive:
-1) The original user question.
-2) A short description of how the logs were filtered.
-3) A small sample of matching rows (already filtered from CSV).
-4) A list of detected anomalies (if any).
-You must:
-- Explain findings in clear, simple language for HR / Security managers.
-- Highlight suspicious behaviour and why it might be risky.
-- Suggest 2–5 next actions (e.g., confirm travel, reset password, investigate device, etc.).
-FORMAT:
-- Start with a 1–2 line summary.
-- Then bullet points of key observations.
-- Then "Recommended actions:" with bullet points.
 """
-def generate_summary(
-    user_question: str,
-    filter_description: str,
-    sample_rows: pd.DataFrame,
-    anomalies: List[Dict[str, Any]]
-) -> str:
-    if not sample_rows.empty:
-        sample_text = sample_rows.to_markdown(index=False)
-    else:
-        sample_text = "No matching rows."
-    anomalies_text = json.dumps(anomalies, indent=2) if anomalies else "[]"
-    prompt = SUMMARY_SYSTEM_PROMPT + "\n\n"
-    prompt += "USER QUESTION:\n" + user_question + "\n\n"
-    prompt += "FILTER DESCRIPTION:\n" + filter_description + "\n\n"
-    prompt += "SAMPLE MATCHING ROWS (first few):\n" + sample_text + "\n\n"
-    prompt += "DETECTED ANOMALIES (JSON list):\n" + anomalies_text + "\n\n"
-    prompt += "Now write the report:\n"
-    return generate_llm(prompt, max_new_tokens=512, temperature=0.2)
-# ------------------ CSV & ANOMALY ENGINE ------------------
-def normalize_column_names(df: pd.DataFrame) -> pd.DataFrame:
-    df = df.copy()
-    df.columns = [c.strip().lower() for c in df.columns]
     return df
-def basic_time_filter(df: pd.DataFrame, time_range: str) -> pd.DataFrame:
-    if "timestamp" not in df.columns:
-        return df
-    df = df.copy()
-    df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
-    df = df.dropna(subset=["timestamp"])
-    if time_range in ["all_time", None, "unknown"]:
         return df
-    now = df["timestamp"].max()
-    if pd.isna(now):
-        return df
-    if time_range in ["last_7_days", "this_week"]:
-        cutoff = now - pd.Timedelta(days=7)
-        return df[df["timestamp"] >= cutoff]
-    elif time_range in ["yesterday"]:
-        start = (now - pd.Timedelta(days=1)).normalize()
-        end = start + pd.Timedelta(days=1)
-        return df[(df["timestamp"] >= start) & (df["timestamp"] < end)]
-    elif time_range in ["last_30_days", "this_month"]:
-        cutoff = now - pd.Timedelta(days=30)
-        return df[df["timestamp"] >= cutoff]
-    else:
-        return df
-def basic_user_filter(df: pd.DataFrame, users: Any) -> pd.DataFrame:
-    df = df.copy()
-    user_col = None
-    for cand in ["user", "username", "scientist", "employee"]:
-        if cand in df.columns:
-            user_col = cand
-            break
-    if user_col is None:
-        return df
-    if users == "any" or users is None:
-        return df
     if isinstance(users, str):
         users = [users]
-    users_norm = [u.strip().lower() for u in users]
-    return df[df[user_col].astype(str).str.lower().isin(users_norm)]
-def detect_anomalies(
-    df: pd.DataFrame,
-    focus: str = "general"
-) -> List[Dict[str, Any]]:
-    anomalies: List[Dict[str, Any]] = []
-    if df.empty:
-        return anomalies
-    df = df.copy()
-    if "timestamp" in df.columns:
-        df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
-    # 1) Login failures
-    if focus in ["general", "login_failures"]:
-        fail_mask = False
-        for col in ["status", "result", "action"]:
-            if col in df.columns:
-                fail_mask = fail_mask | df[col].astype(str).str.lower().str.contains("fail")
-        failed = df[fail_mask]
-        if not failed.empty:
-            user_col = None
-            for cand in ["user", "username", "scientist", "employee"]:
-                if cand in df.columns:
-                    user_col = cand
-                    break
-            if user_col:
-                by_user = failed.groupby(user_col)
-                for user, group in by_user:
-                    if len(group) >= 3:
-                        anomalies.append({
-                            "type": "login_failures",
-                            "user": str(user),
-                            "count": int(len(group)),
-                            "details": f"{len(group)} failed events found for {user}"
-                        })
-    # 2) Off-hours (23:00–06:00)
-    if "timestamp" in df.columns and focus in ["general", "off_hours"]:
-        df["hour"] = df["timestamp"].dt.hour
-        off = df[(df["hour"] >= 23) | (df["hour"] < 6)]
-        if not off.empty:
-            user_col = None
-            for cand in ["user", "username", "scientist", "employee"]:
-                if cand in df.columns:
-                    user_col = cand
-                    break
-            if user_col:
-                off_counts = off.groupby(user_col).size().reset_index(name="count")
-                for _, row in off_counts.iterrows():
-                    anomalies.append({
-                        "type": "off_hours",
-                        "user": str(row[user_col]),
-                        "count": int(row["count"]),
-                        "details": f"{row['count']} off-hours events"
-                    })
-    # 3) Many systems in a day (>= 5)
-    if focus in ["general", "many_systems"]:
-        user_col = None
-        for cand in ["user", "username", "scientist", "employee"]:
-            if cand in df.columns:
-                user_col = cand
-                break
-        sys_col = None
-        for cand in ["system", "application", "app"]:
-            if cand in df.columns:
-                sys_col = cand
-                break
-        if user_col and sys_col and "timestamp" in df.columns:
-            df["date"] = df["timestamp"].dt.date
-            combo = df.groupby([user_col, "date"])[sys_col].nunique().reset_index(name="system_count")
-            many = combo[combo["system_count"] >= 5]
-            for _, row in many.iterrows():
-                anomalies.append({
-                    "type": "many_systems",
-                    "user": str(row[user_col]),
-                    "date": str(row["date"]),
-                    "system_count": int(row["system_count"]),
-                    "details": f"Accessed {row['system_count']} systems on {row['date']}"
-                })
-    # 4) Impossible travel – same user, 2 locations in same day
-    if focus in ["general", "impossible_travel"]:
-        user_col = None
-        for cand in ["user", "username", "scientist", "employee"]:
-            if cand in df.columns:
-                user_col = cand
-                break
-        loc_col = None
-        for cand in ["country", "location", "geo"]:
-            if cand in df.columns:
-                loc_col = cand
-                break
-        if user_col and loc_col and "timestamp" in df.columns:
-            df["date"] = df["timestamp"].dt.date
-            grouped = df.groupby([user_col, "date"])
-            for (user, date), group in grouped:
-                locations = group[loc_col].astype(str).str.strip().str.lower().unique()
-                if len(locations) >= 2:
-                    anomalies.append({
-                        "type": "impossible_travel",
-                        "user": str(user),
-                        "date": str(date),
-                        "locations": list(map(str, locations)),
-                        "details": f"Multiple locations {list(locations)} in single day"
-                    })
     return anomalies
-def apply_intent_to_dataframe(
-    df: pd.DataFrame,
-    intent: Dict[str, Any]
-) -> Tuple[pd.DataFrame, List[Dict[str, Any]], str]:
-    df = normalize_column_names(df)
-    action = intent.get("action", "run_log_query")
-    params = intent.get("parameters", {})
-    users = params.get("users", "any")
-    time_range = params.get("time_range", "all_time")
-    focus = params.get("focus", "general")
-    filtered = basic_time_filter(df, time_range)
-    filtered = basic_user_filter(filtered, users)
-    filter_desc = f"Action: {action}, Users: {users}, Time: {time_range}, Focus: {focus}"
-    anomalies: List[Dict[str, Any]] = []
-    if action in ["scan_anomalies", "user_risk_report", "global_risk_report", "run_log_query"]:
-        anomalies = detect_anomalies(filtered, focus=focus)
-    return filtered, anomalies, filter_desc
-def calculate_risk_score(anomalies: List[Dict[str, Any]]):
-    if not anomalies:
-        return "🟢", "Low", 0
-    count = len(anomalies)
-    if count <= 2:
-        return "🟡", "Medium", count
-    return "🔴", "High", count
-def generate_bar_chart(df: pd.DataFrame):
-    if df.empty or "system" not in df.columns:
-        return None
-    fig, ax = plt.subplots(figsize=(6, 3))
-    data = df["system"].value_counts()
-    ax.bar(data.index, data.values)
-    ax.set_title("Events per System")
-    ax.set_xlabel("System")
-    ax.set_ylabel("Events")
-    plt.xticks(rotation=20)
-    fig.tight_layout()
-    return fig
-def build_pdf_report(summary_text, anomalies, risk_icon, risk_label):
     pdf = FPDF()
     pdf.add_page()
     pdf.set_font("Arial", size=12)
-    pdf.multi_cell(0, 10, "Security Report – Smart Log Copilot", align="L")
-    pdf.ln(2)
-    pdf.multi_cell(0, 10, f"Risk Level: {risk_icon} {risk_label}", align="L")
-    pdf.ln(5)
-    pdf.set_font("Arial", size=11)
-    pdf.multi_cell(0, 7, "Summary:", align="L")
-    pdf.set_font("Arial", size=10)
-    pdf.multi_cell(0, 6, summary_text)
-    pdf.ln(5)
-    pdf.set_font("Arial", size=11)
-    pdf.multi_cell(0, 7, "Detected Anomalies:", align="L")
-    pdf.set_font("Arial", size=10)
     if anomalies:
-        for an in anomalies:
-            line = f"- {an.get('type', '')}: {an.get('details', '')}"
-            pdf.multi_cell(0, 6, line)
     else:
-        pdf.multi_cell(0, 6, "No anomalies detected.")
     tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
     pdf.output(tmp.name)
     return tmp.name
-# ------------------ DEMO DESCRIPTION ------------------
-DESCRIPTION_MD = """
-# 🔍 Smart Log Copilot (CSV Demo)
-**Use case:** Pharma / corporate security teams analysing login & access logs.
-1. Upload a **CSV log file** (with columns like `timestamp`, `user`, `system`, `status`, `country`, etc.)
-2. Ask questions in **plain English**, e.g.:
-   - *"Was Dr. Rao doing anything suspicious this week?"*
-   - *"Who logged in late at night?"*
-   - *"Who accessed too many systems in a day?"*
-3. The app will:
-   - Interpret your question via a local LLM (Qwen 1.5B)
-   - Filter & analyse the CSV with Pandas
-   - Run anomaly rules (off-hours, failures, many systems, impossible travel)
-   - Return an easy-to-read summary + risk level + optional PDF report.
-> For demo: a **placeholder anomaly screenshot** is shown whenever anomalies are found.
-"""
-PLACEHOLDER_IMAGE_URL = "https://dummyimage.com/600x300/ff0000/ffffff&text=Anomaly+Screenshot+Placeholder"
-# ------------------ CORE CHAT LOGIC ------------------
-def load_csv(file_obj):
-    if file_obj is None:
-        return pd.DataFrame(), pd.DataFrame(), "No file uploaded yet."
-    try:
-        df = pd.read_csv(file_obj.name)
-        df = normalize_column_names(df)
-        info = f"Loaded CSV with {len(df)} rows and {len(df.columns)} columns."
-        return df, df.head(20), info
-    except Exception as e:
-        return pd.DataFrame(), pd.DataFrame(), f"Error loading CSV: {e}"
-def chat_logic(user_message: str, df_state: pd.DataFrame):
-    intent = extract_intent(user_message)
-    filtered_df, anomalies, filter_desc = apply_intent_to_dataframe(df_state, intent)
-    sample = filtered_df.head(30)
-    summary = generate_summary(
-        user_question=user_message,
-        filter_description=filter_desc,
-        sample_rows=sample,
-        anomalies=anomalies
-    )
-    img = PLACEHOLDER_IMAGE_URL if anomalies else ""
-    return summary, img, filtered_df, anomalies
-def on_user_message(user_message, chat_history, df):
-    # Append user message
-    chat_history = chat_history + [{"role": "user", "content": user_message}]
-    if df is None or df.empty:
-        reply = "📂 Please upload a CSV file with logs first."
-        chat_history = chat_history + [{"role": "assistant", "content": reply}]
-        return chat_history, gr.update(visible=False), gr.update(visible=False), None
-    summary_text, img, filtered_df, anomalies = chat_logic(user_message, df)
-    risk_icon, risk_label, _ = calculate_risk_score(anomalies)
-    reply_text = f"{risk_icon} **Risk Level: {risk_label}**\n\n" + summary_text
-    chat_history = chat_history + [{"role": "assistant", "content": reply_text}]
-    # Chart
-    fig = generate_bar_chart(filtered_df)
-    if fig is not None:
-        chart_update = gr.update(value=fig, visible=True)
-    else:
-        chart_update = gr.update(visible=False)
-    # Report meta state
-    report_meta = (reply_text, anomalies, risk_icon, risk_label)
-    # Screenshot
-    if img:
-        img_update = gr.update(value=img, visible=True)
-    else:
-        img_update = gr.update(visible=False)
-    return chat_history, img_update, chart_update, report_meta
-def on_generate_report(report_meta):
-    if not report_meta:
-        return gr.update(visible=False)
-    summary_text, anomalies, risk_icon, risk_label = report_meta
-    pdf_path = build_pdf_report(summary_text, anomalies, risk_icon, risk_label)
-    return gr.update(value=pdf_path, visible=True)
-# ------------------ GRADIO UI ------------------
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="gray")) as demo:
-    gr.Markdown(DESCRIPTION_MD)
-    with gr.Row():
-        with gr.Column(scale=2):
-            file_input = gr.File(label="Upload CSV log file", file_types=[".csv"])
-            load_btn = gr.Button("Load CSV")
-            load_info = gr.Markdown("No file loaded.")
-        with gr.Column(scale=3):
-            df_preview = gr.Dataframe(
-                label="CSV Preview (first 20 rows)",
-                interactive=False,
-                visible=True
-            )
-    df_state = gr.State(pd.DataFrame())
-    def on_load_csv(file_obj):
-        df, preview, info = load_csv(file_obj)
-        return df, preview, info
-    load_btn.click(
-        fn=on_load_csv,
-        inputs=[file_input],
-        outputs=[df_state, df_preview, load_info]
-    )
-    gr.Markdown("---")
-    gr.Markdown("### 💬 Smart Log Copilot")
-    with gr.Row():
-        with gr.Column(scale=3):
-            chatbot = gr.Chatbot(
-                label=None,
-                type="messages",
-            )
-            msg = gr.Textbox(
-                placeholder="Ask a question like: Who logged in late at night?",
-                show_label=False,
-                lines=2
-            )
-            send_btn = gr.Button("Send", variant="primary")
-        with gr.Column(scale=2):
-            anomaly_image = gr.Image(
-                label="Anomaly Screenshot (placeholder)",
-                visible=False
-            )
-            chart_plot = gr.Plot(
-                label="Log Activity Chart",
-                visible=False
-            )
-            report_btn = gr.Button("Generate PDF Report", variant="secondary")
-            pdf_file = gr.File(label="Download Security Report", visible=False)
-    report_state = gr.State()
-    send_btn.click(
-        fn=on_user_message,
-        inputs=[msg, chatbot, df_state],
-        outputs=[chatbot, anomaly_image, chart_plot, report_state]
-    )
-    msg.submit(
-        fn=on_user_message,
-        inputs=[msg, chatbot, df_state],
-        outputs=[chatbot, anomaly_image, chart_plot, report_state]
-    )
-    report_btn.click(
-        fn=on_generate_report,
-        inputs=[report_state],
-        outputs=[pdf_file]
-    )
-    gr.Markdown(
-        """
-        **Tip:** Use a demo CSV with columns like:
-        `timestamp, user, system, status, country`
-        and deliberately add:
-        - multiple failed logins,
-        - some late-night logins,
-        - same user in 2 countries on same day,
-        - a day where a user touches 5+ systems.
-        Then ask natural questions and let the system explain.
-        """
-    )
-if __name__ == "__main__":
-    demo.launch()

+import streamlit as st
 import pandas as pd
+import json
 import matplotlib.pyplot as plt
 from fpdf import FPDF
+import tempfile
+from io import BytesIO
+from transformers import AutoTokenizer, AutoModelForCausalLM
 # ------------------ MODEL LOADING ------------------
 MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
+@st.cache_resource
+def load_llm():
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        device_map="auto",
+        torch_dtype="auto"
+    )
+    return tokenizer, model
+tokenizer, model = load_llm()
+def llm(prompt, max_new_tokens=400):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         **inputs,
         max_new_tokens=max_new_tokens,
+        pad_token_id=tokenizer.eos_token_id,
+        do_sample=False,
     )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "").strip()
 INTENT_SYSTEM_PROMPT = """
+You convert natural-language questions into a JSON task plan for log analysis.
+VALID actions:
+- "run_log_query"
+- "scan_anomalies"
+- "user_risk_report"
+- "global_risk_report"
+OUTPUT FORMAT:
 {
+ "action": "",
+ "parameters": {
+   "users": "any" or ["name"],
+   "time_range": "all_time" or natural text,
+   "focus": "general" or "login_failures" or "off_hours" or "many_systems" or "impossible_travel",
+   "extra": "<free>"
+ }
 }
 RULES:
+- ONLY output JSON.
 """
 SUMMARY_SYSTEM_PROMPT = """
+You write human-friendly summaries for security managers.
+Explain risks clearly + list recommended actions.
 """
+PLACEHOLDER_IMG = "https://dummyimage.com/600x300/ff0000/ffffff&text=Anomaly+Screenshot"
+def extract_intent(msg):
+    p = INTENT_SYSTEM_PROMPT + "\nUSER QUESTION: " + msg + "\nReturn JSON now:"
+    raw = llm(p)
+    try:
+        raw_json = raw[raw.find("{"): raw.rfind("}") + 1]
+        return json.loads(raw_json)
+    except:
+        return {"action": "run_log_query", "parameters": {"users": "any", "time_range": "all_time", "focus": "general", "extra": msg}}
+# ------------------ CSV + ANALYTICS ------------------
+def normalize(df):
+    df.columns = [c.lower().strip() for c in df.columns]
     return df
+def basic_filter(df, users):
+    if users == "any":
         return df
     if isinstance(users, str):
         users = [users]
+    users = [u.lower() for u in users]
+    return df[df["user"].str.lower().isin(users)]
+def detect_anomalies(df):
+    anomalies = []
+    # failed logins
+    fails = df[df["status"].str.contains("fail", case=False, na=False)]
+    if len(fails) >= 3:
+        anomalies.append({"type": "login_failures", "details": f"{len(fails)} failed logins found"})
+    # off-hours
+    df["timestamp"] = pd.to_datetime(df["timestamp"])
+    off = df[(df["timestamp"].dt.hour >= 23) | (df["timestamp"].dt.hour < 6)]
+    if len(off) > 0:
+        anomalies.append({"type": "off_hours", "details": f"{len(off)} off-hours logins"})
+    # many systems
+    sys_count = df.groupby(df["timestamp"].dt.date).system.nunique()
+    if any(sys_count >= 5):
+        anomalies.append({"type": "many_systems", "details": "5+ systems accessed in a day"})
+    # impossible travel
+    if "country" in df.columns:
+        locations = df.groupby(df["timestamp"].dt.date).country.nunique()
+        if any(locations >= 2):
+            anomalies.append({"type": "impossible_travel", "details": "Multiple countries in one day"})
     return anomalies
+def risk_score(anoms):
+    if not anoms:
+        return "🟢", "Low"
+    if len(anoms) <= 2:
+        return "🟡", "Medium"
+    return "🔴", "High"
+def build_pdf(risk_icon, risk_label, summary, anomalies):
     pdf = FPDF()
     pdf.add_page()
     pdf.set_font("Arial", size=12)
+    pdf.multi_cell(0, 8, f"Security Report – Smart Log Copilot")
+    pdf.multi_cell(0, 8, f"Risk Level: {risk_icon} {risk_label}")
+    pdf.ln(4)
+    pdf.multi_cell(0, 6, summary)
+    pdf.ln(4)
+    pdf.multi_cell(0, 6, "Detected Anomalies:")
     if anomalies:
+        for a in anomalies:
+            pdf.multi_cell(0, 6, f"- {a['type']}: {a['details']}")
     else:
+        pdf.multi_cell(0, 6, "None")
     tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
     pdf.output(tmp.name)
     return tmp.name
+# ------------------ STREAMLIT UI ------------------
+st.set_page_config(page_title="Smart Log Copilot", layout="wide")
+st.title("🔍 Smart Log Copilot (CSV-powered LLM Demo)")
+uploaded = st.file_uploader("Upload CSV log file", type=["csv"])
+df = None
+if uploaded:
+    df = normalize(pd.read_csv(uploaded))
+    st.success(f"CSV loaded ({len(df)} rows)")
+    st.dataframe(df.head(20))
+st.markdown("---")
+chat_input = st.text_input("Ask a question about the logs:")
+report_slot = st.empty()
+if "history" not in st.session_state:
+    st.session_state.history = []
+col1, col2 = st.columns([3, 2])
+with col1:
+    if chat_input and df is not None:
+        intent = extract_intent(chat_input)
+        params = intent["parameters"]
+        filtered = basic_filter(df, params["users"])
+        anomalies = detect_anomalies(filtered)
+        icon, label = risk_score(anomalies)
+        p = SUMMARY_SYSTEM_PROMPT + f"\nQUESTION: {chat_input}\nMATCHED: {len(filtered)} rows\nANOMALIES: {json.dumps(anomalies)}\n\nWrite summary:"
+        summary = llm(p)
+        bot_reply = f"{icon} **Risk Level: {label}**\n\n{summary}"
+        st.session_state.history.append(("user", chat_input))
+        st.session_state.history.append(("assistant", bot_reply))
+    for role, text in st.session_state.history:
+        if role == "user":
+            st.chat_message("user").write(text)
+        else:
+            st.chat_message("assistant").write(text)
+with col2:
+    if df is not None and chat_input:
+        if anomalies:
+            st.image(PLACEHOLDER_IMG, caption="Anomaly Screenshot")
+        fig, ax = plt.subplots(figsize=(4, 2))
+        df["system"].value_counts().plot(kind="bar", ax=ax)
+        st.pyplot(fig)
+        pdf_btn = st.button("📄 Download PDF Report")
+        if pdf_btn:
+            pdf_path = build_pdf(icon, label, summary, anomalies)
+            with open(pdf_path, "rb") as f:
+                st.download_button("Download PDF", f, file_name="security_report.pdf", mime="application/pdf")