Spaces:

SuriRaja
/

PharmaScientistLogIntelligenceCopilot

Sleeping

App Files Files Community

SuriRaja commited on Dec 5, 2025

Commit

f6db94e

verified ·

1 Parent(s): 15e521f

Update app.py

Browse files

Files changed (1) hide show

app.py +174 -100

app.py CHANGED Viewed

@@ -1,11 +1,18 @@
 import json
-import textwrap
 from typing import Any, Dict, List, Optional, Tuple
 import gradio as gr
 import pandas as pd
 from transformers import AutoTokenizer, AutoModelForCausalLM
 MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
@@ -17,15 +24,13 @@ model = AutoModelForCausalLM.from_pretrained(
     torch_dtype="auto"
 )
-# ---------- LLM HELPERS ----------
 def generate_llm(
     prompt: str,
     max_new_tokens: int = 512,
     temperature: float = 0.1
 ) -> str:
-    """Simple text generation helper."""
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         **inputs,
@@ -35,7 +40,6 @@ def generate_llm(
         pad_token_id=tokenizer.eos_token_id
     )
     full = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Return only the new text after the prompt
     return full[len(prompt):].strip()
@@ -74,14 +78,11 @@ RULES:
   and parameters filled with "any"/"all_time"/"general".
 """
 def extract_intent(user_message: str) -> Dict[str, Any]:
-    """Call LLM to convert user message → intent JSON."""
     user_block = f'USER_QUESTION: "{user_message}"\n\nReturn ONLY the JSON object now:'
     prompt = INTENT_SYSTEM_PROMPT + "\n" + user_block
     raw = generate_llm(prompt, max_new_tokens=256, temperature=0.1)
-    # Try to extract JSON from model output
     try:
         first = raw.find("{")
         last = raw.rfind("}")
@@ -91,7 +92,6 @@ def extract_intent(user_message: str) -> Dict[str, Any]:
             raw_json = raw
         data = json.loads(raw_json)
     except Exception:
-        # Fallback safe default
         data = {
             "action": "run_log_query",
             "parameters": {
@@ -130,8 +130,11 @@ def generate_summary(
     sample_rows: pd.DataFrame,
     anomalies: List[Dict[str, Any]]
 ) -> str:
-    # Convert sample rows & anomalies to compact text
-    sample_text = sample_rows.to_markdown(index=False) if not sample_rows.empty else "No matching rows."
     anomalies_text = json.dumps(anomalies, indent=2) if anomalies else "[]"
     prompt = SUMMARY_SYSTEM_PROMPT + "\n\n"
@@ -144,7 +147,7 @@ def generate_summary(
     return generate_llm(prompt, max_new_tokens=512, temperature=0.2)
-# ---------- CSV FILTER & ANOMALY ENGINE ----------
 def normalize_column_names(df: pd.DataFrame) -> pd.DataFrame:
     df = df.copy()
@@ -153,10 +156,6 @@ def normalize_column_names(df: pd.DataFrame) -> pd.DataFrame:
 def basic_time_filter(df: pd.DataFrame, time_range: str) -> pd.DataFrame:
-    """
-    Expect a 'timestamp' column in a parseable datetime format.
-    For demo, support a few simple ranges; otherwise return df.
-    """
     if "timestamp" not in df.columns:
         return df
@@ -182,14 +181,10 @@ def basic_time_filter(df: pd.DataFrame, time_range: str) -> pd.DataFrame:
         cutoff = now - pd.Timedelta(days=30)
         return df[df["timestamp"] >= cutoff]
     else:
-        # Unknown text → just return df for MVP
         return df
 def basic_user_filter(df: pd.DataFrame, users: Any) -> pd.DataFrame:
-    """
-    Expect 'user' or 'username' or 'scientist' column.
-    """
     df = df.copy()
     user_col = None
     for cand in ["user", "username", "scientist", "employee"]:
@@ -206,44 +201,38 @@ def basic_user_filter(df: pd.DataFrame, users: Any) -> pd.DataFrame:
         users = [users]
     users_norm = [u.strip().lower() for u in users]
-    return df[df[user_col].str.lower().isin(users_norm)]
 def detect_anomalies(
     df: pd.DataFrame,
     focus: str = "general"
 ) -> List[Dict[str, Any]]:
-    """
-    Very simple rule-based anomaly engine for demo.
-    Expectations:
-      - 'timestamp' datetime column
-      - 'status' or 'result' for failures
-      - 'system' or 'application' column
-      - 'country' or 'location' for impossible travel (demo-level)
-    """
     anomalies: List[Dict[str, Any]] = []
     if df.empty:
         return anomalies
-    # Ensure needed columns exist
     df = df.copy()
     if "timestamp" in df.columns:
         df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
     # 1) Login failures
     if focus in ["general", "login_failures"]:
-        # interpret failed rows
         fail_mask = False
         for col in ["status", "result", "action"]:
             if col in df.columns:
                 fail_mask = fail_mask | df[col].astype(str).str.lower().str.contains("fail")
         failed = df[fail_mask]
         if not failed.empty:
-            by_user = failed.groupby(df.columns[df.columns.str.contains("user|scientist|employee")][0]) \
-                            if df.columns.str.contains("user|scientist|employee").any() else None
-            if by_user is not None:
                 for user, group in by_user:
-                    if len(group) >= 3:  # threshold
                         anomalies.append({
                             "type": "login_failures",
                             "user": str(user),
@@ -251,7 +240,7 @@ def detect_anomalies(
                             "details": f"{len(group)} failed events found for {user}"
                         })
-    # 2) Off-hours access (after 23:00 or before 06:00)
     if "timestamp" in df.columns and focus in ["general", "off_hours"]:
         df["hour"] = df["timestamp"].dt.hour
         off = df[(df["hour"] >= 23) | (df["hour"] < 6)]
@@ -273,7 +262,6 @@ def detect_anomalies(
     # 3) Many systems in a day (>= 5)
     if focus in ["general", "many_systems"]:
-        # Need user + system
         user_col = None
         for cand in ["user", "username", "scientist", "employee"]:
             if cand in df.columns:
@@ -297,7 +285,7 @@ def detect_anomalies(
                     "details": f"Accessed {row['system_count']} systems on {row['date']}"
                 })
-    # 4) Impossible travel – very rough demo (same user, two countries same day)
     if focus in ["general", "impossible_travel"]:
         user_col = None
         for cand in ["user", "username", "scientist", "employee"]:
@@ -313,14 +301,14 @@ def detect_anomalies(
             df["date"] = df["timestamp"].dt.date
             grouped = df.groupby([user_col, "date"])
             for (user, date), group in grouped:
-                countries = group[loc_col].astype(str).str.strip().str.lower().unique()
-                if len(countries) >= 2:
                     anomalies.append({
                         "type": "impossible_travel",
                         "user": str(user),
                         "date": str(date),
-                        "locations": list(map(str, countries)),
-                        "details": f"Multiple locations {countries} in single day"
                     })
     return anomalies
@@ -330,9 +318,6 @@ def apply_intent_to_dataframe(
     df: pd.DataFrame,
     intent: Dict[str, Any]
 ) -> Tuple[pd.DataFrame, List[Dict[str, Any]], str]:
-    """
-    Return: (filtered_df, anomalies, filter_description)
-    """
     df = normalize_column_names(df)
     action = intent.get("action", "run_log_query")
     params = intent.get("parameters", {})
@@ -340,7 +325,6 @@ def apply_intent_to_dataframe(
     time_range = params.get("time_range", "all_time")
     focus = params.get("focus", "general")
-    # Basic filters
     filtered = basic_time_filter(df, time_range)
     filtered = basic_user_filter(filtered, users)
@@ -353,23 +337,77 @@ def apply_intent_to_dataframe(
     return filtered, anomalies, filter_desc
-# ---------- GRADIO UI LOGIC ----------
 DESCRIPTION_MD = """
 # 🔍 Smart Log Copilot (CSV Demo)
-**Use case:** Pharma / corporate security teams analyzing login & access logs.
 1. Upload a **CSV log file** (with columns like `timestamp`, `user`, `system`, `status`, `country`, etc.)
 2. Ask questions in **plain English**, e.g.:
    - *"Was Dr. Rao doing anything suspicious this week?"*
-   - *"Show night-time logins from any scientist."*
-   - *"Who accessed too many systems in a single day?"*
 3. The app will:
    - Interpret your question via a local LLM (Qwen 1.5B)
    - Filter & analyse the CSV with Pandas
-   - Run simple anomaly rules (off-hours, failures, many systems, impossible travel)
-   - Return an easy-to-read summary + recommendations
 > For demo: a **placeholder anomaly screenshot** is shown whenever anomalies are found.
 """
@@ -377,38 +415,25 @@ DESCRIPTION_MD = """
 PLACEHOLDER_IMAGE_URL = "https://dummyimage.com/600x300/ff0000/ffffff&text=Anomaly+Screenshot+Placeholder"
-def load_csv(file_obj) -> Tuple[pd.DataFrame, str]:
     if file_obj is None:
-        return pd.DataFrame(), "No file uploaded yet."
     try:
         df = pd.read_csv(file_obj.name)
         df = normalize_column_names(df)
         info = f"Loaded CSV with {len(df)} rows and {len(df.columns)} columns."
-        return df, info
     except Exception as e:
-        return pd.DataFrame(), f"Error loading CSV: {e}"
-def chat_logic(
-    user_message: str,
-    history: List[List[str]],
-    df_state: Optional[pd.DataFrame]
-) -> Tuple[str, str]:
-    """
-    Main chat handler.
-    Returns: (assistant_reply, anomaly_image_or_empty)
-    """
-    if df_state is None or df_state.empty:
-        return "Please upload a CSV file with logs first.", ""
-    # 1) Extract intent from LLM
-    intent = extract_intent(user_message)
-    # 2) Apply intent to dataframe → filter + anomaly detection
     filtered_df, anomalies, filter_desc = apply_intent_to_dataframe(df_state, intent)
-    # 3) Prepare summary using LLM
-    sample = filtered_df.head(30)  # small sample
     summary = generate_summary(
         user_question=user_message,
         filter_description=filter_desc,
@@ -416,18 +441,62 @@ def chat_logic(
         anomalies=anomalies
     )
-    # 4) If anomalies exist, show placeholder screenshot
-    anomaly_image = PLACEHOLDER_IMAGE_URL if anomalies else ""
-    return summary, anomaly_image
-with gr.Blocks() as demo:
     gr.Markdown(DESCRIPTION_MD)
     with gr.Row():
         with gr.Column(scale=2):
             file_input = gr.File(label="Upload CSV log file", file_types=[".csv"])
             load_info = gr.Markdown("No file loaded.")
         with gr.Column(scale=3):
             df_preview = gr.Dataframe(
@@ -439,11 +508,9 @@ with gr.Blocks() as demo:
     df_state = gr.State(pd.DataFrame())
     def on_load_csv(file_obj):
-        df, info = load_csv(file_obj)
-        preview = df.head(20) if not df.empty else pd.DataFrame()
         return df, preview, info
-    load_btn = gr.Button("Load CSV")
     load_btn.click(
         fn=on_load_csv,
         inputs=[file_input],
@@ -451,49 +518,56 @@ with gr.Blocks() as demo:
     )
     gr.Markdown("---")
-    gr.Markdown("### 💬 Ask questions about the uploaded logs")
     with gr.Row():
         with gr.Column(scale=3):
-            chatbot = gr.Chatbot(label="Smart Log Copilot")
             msg = gr.Textbox(
-                label="Your question",
-                placeholder="e.g. Was anyone logging in from outside India at night?",
                 lines=2
             )
-            send_btn = gr.Button("Send")
         with gr.Column(scale=2):
             anomaly_image = gr.Image(
                 label="Anomaly Screenshot (placeholder)",
-                value=None,
                 visible=False
             )
-    def on_user_message(user_message, chat_history, df):
-        reply, img = chat_logic(user_message, chat_history, df)
-        chat_history = chat_history + [[user_message, reply]]
-        # Show image only if URL returned
-        if img:
-            return chat_history, gr.update(value=img, visible=True)
-        else:
-            return chat_history, gr.update(visible=False)
     send_btn.click(
         fn=on_user_message,
         inputs=[msg, chatbot, df_state],
-        outputs=[chatbot, anomaly_image]
     )
     msg.submit(
         fn=on_user_message,
         inputs=[msg, chatbot, df_state],
-        outputs=[chatbot, anomaly_image]
     )
     gr.Markdown(
         """
-        **Tip:** Prepare a demo CSV with columns like:
-        `timestamp, user, system, status, country, ip, device`
         and deliberately add:
         - multiple failed logins,
         - some late-night logins,

 import json
 from typing import Any, Dict, List, Optional, Tuple
+from io import BytesIO
+import tempfile
 import gradio as gr
 import pandas as pd
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
 from transformers import AutoTokenizer, AutoModelForCausalLM
+from fpdf import FPDF
+# ------------------ MODEL LOADING ------------------
 MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
     torch_dtype="auto"
 )
+# ------------------ LLM HELPERS ------------------
 def generate_llm(
     prompt: str,
     max_new_tokens: int = 512,
     temperature: float = 0.1
 ) -> str:
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(
         **inputs,
         pad_token_id=tokenizer.eos_token_id
     )
     full = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return full[len(prompt):].strip()
   and parameters filled with "any"/"all_time"/"general".
 """
 def extract_intent(user_message: str) -> Dict[str, Any]:
     user_block = f'USER_QUESTION: "{user_message}"\n\nReturn ONLY the JSON object now:'
     prompt = INTENT_SYSTEM_PROMPT + "\n" + user_block
     raw = generate_llm(prompt, max_new_tokens=256, temperature=0.1)
     try:
         first = raw.find("{")
         last = raw.rfind("}")
             raw_json = raw
         data = json.loads(raw_json)
     except Exception:
         data = {
             "action": "run_log_query",
             "parameters": {
     sample_rows: pd.DataFrame,
     anomalies: List[Dict[str, Any]]
 ) -> str:
+    if not sample_rows.empty:
+        sample_text = sample_rows.to_markdown(index=False)
+    else:
+        sample_text = "No matching rows."
     anomalies_text = json.dumps(anomalies, indent=2) if anomalies else "[]"
     prompt = SUMMARY_SYSTEM_PROMPT + "\n\n"
     return generate_llm(prompt, max_new_tokens=512, temperature=0.2)
+# ------------------ CSV & ANOMALY ENGINE ------------------
 def normalize_column_names(df: pd.DataFrame) -> pd.DataFrame:
     df = df.copy()
 def basic_time_filter(df: pd.DataFrame, time_range: str) -> pd.DataFrame:
     if "timestamp" not in df.columns:
         return df
         cutoff = now - pd.Timedelta(days=30)
         return df[df["timestamp"] >= cutoff]
     else:
         return df
 def basic_user_filter(df: pd.DataFrame, users: Any) -> pd.DataFrame:
     df = df.copy()
     user_col = None
     for cand in ["user", "username", "scientist", "employee"]:
         users = [users]
     users_norm = [u.strip().lower() for u in users]
+    return df[df[user_col].astype(str).str.lower().isin(users_norm)]
 def detect_anomalies(
     df: pd.DataFrame,
     focus: str = "general"
 ) -> List[Dict[str, Any]]:
     anomalies: List[Dict[str, Any]] = []
     if df.empty:
         return anomalies
     df = df.copy()
     if "timestamp" in df.columns:
         df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
     # 1) Login failures
     if focus in ["general", "login_failures"]:
         fail_mask = False
         for col in ["status", "result", "action"]:
             if col in df.columns:
                 fail_mask = fail_mask | df[col].astype(str).str.lower().str.contains("fail")
         failed = df[fail_mask]
         if not failed.empty:
+            user_col = None
+            for cand in ["user", "username", "scientist", "employee"]:
+                if cand in df.columns:
+                    user_col = cand
+                    break
+            if user_col:
+                by_user = failed.groupby(user_col)
                 for user, group in by_user:
+                    if len(group) >= 3:
                         anomalies.append({
                             "type": "login_failures",
                             "user": str(user),
                             "details": f"{len(group)} failed events found for {user}"
                         })
+    # 2) Off-hours (23:00–06:00)
     if "timestamp" in df.columns and focus in ["general", "off_hours"]:
         df["hour"] = df["timestamp"].dt.hour
         off = df[(df["hour"] >= 23) | (df["hour"] < 6)]
     # 3) Many systems in a day (>= 5)
     if focus in ["general", "many_systems"]:
         user_col = None
         for cand in ["user", "username", "scientist", "employee"]:
             if cand in df.columns:
                     "details": f"Accessed {row['system_count']} systems on {row['date']}"
                 })
+    # 4) Impossible travel – same user, 2 locations in same day
     if focus in ["general", "impossible_travel"]:
         user_col = None
         for cand in ["user", "username", "scientist", "employee"]:
             df["date"] = df["timestamp"].dt.date
             grouped = df.groupby([user_col, "date"])
             for (user, date), group in grouped:
+                locations = group[loc_col].astype(str).str.strip().str.lower().unique()
+                if len(locations) >= 2:
                     anomalies.append({
                         "type": "impossible_travel",
                         "user": str(user),
                         "date": str(date),
+                        "locations": list(map(str, locations)),
+                        "details": f"Multiple locations {list(locations)} in single day"
                     })
     return anomalies
     df: pd.DataFrame,
     intent: Dict[str, Any]
 ) -> Tuple[pd.DataFrame, List[Dict[str, Any]], str]:
     df = normalize_column_names(df)
     action = intent.get("action", "run_log_query")
     params = intent.get("parameters", {})
     time_range = params.get("time_range", "all_time")
     focus = params.get("focus", "general")
     filtered = basic_time_filter(df, time_range)
     filtered = basic_user_filter(filtered, users)
     return filtered, anomalies, filter_desc
+def calculate_risk_score(anomalies: List[Dict[str, Any]]):
+    if not anomalies:
+        return "🟢", "Low", 0
+    count = len(anomalies)
+    if count <= 2:
+        return "🟡", "Medium", count
+    return "🔴", "High", count
+def generate_bar_chart(df: pd.DataFrame):
+    if df.empty or "system" not in df.columns:
+        return None
+    fig, ax = plt.subplots(figsize=(6, 3))
+    data = df["system"].value_counts()
+    ax.bar(data.index, data.values)
+    ax.set_title("Events per System")
+    ax.set_xlabel("System")
+    ax.set_ylabel("Events")
+    plt.xticks(rotation=20)
+    fig.tight_layout()
+    return fig
+def build_pdf_report(summary_text, anomalies, risk_icon, risk_label):
+    pdf = FPDF()
+    pdf.add_page()
+    pdf.set_font("Arial", size=12)
+    pdf.multi_cell(0, 10, "Security Report – Smart Log Copilot", align="L")
+    pdf.ln(2)
+    pdf.multi_cell(0, 10, f"Risk Level: {risk_icon} {risk_label}", align="L")
+    pdf.ln(5)
+    pdf.set_font("Arial", size=11)
+    pdf.multi_cell(0, 7, "Summary:", align="L")
+    pdf.set_font("Arial", size=10)
+    pdf.multi_cell(0, 6, summary_text)
+    pdf.ln(5)
+    pdf.set_font("Arial", size=11)
+    pdf.multi_cell(0, 7, "Detected Anomalies:", align="L")
+    pdf.set_font("Arial", size=10)
+    if anomalies:
+        for an in anomalies:
+            line = f"- {an.get('type', '')}: {an.get('details', '')}"
+            pdf.multi_cell(0, 6, line)
+    else:
+        pdf.multi_cell(0, 6, "No anomalies detected.")
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
+    pdf.output(tmp.name)
+    return tmp.name
+# ------------------ DEMO DESCRIPTION ------------------
 DESCRIPTION_MD = """
 # 🔍 Smart Log Copilot (CSV Demo)
+**Use case:** Pharma / corporate security teams analysing login & access logs.
 1. Upload a **CSV log file** (with columns like `timestamp`, `user`, `system`, `status`, `country`, etc.)
 2. Ask questions in **plain English**, e.g.:
    - *"Was Dr. Rao doing anything suspicious this week?"*
+   - *"Who logged in late at night?"*
+   - *"Who accessed too many systems in a day?"*
 3. The app will:
    - Interpret your question via a local LLM (Qwen 1.5B)
    - Filter & analyse the CSV with Pandas
+   - Run anomaly rules (off-hours, failures, many systems, impossible travel)
+   - Return an easy-to-read summary + risk level + optional PDF report.
 > For demo: a **placeholder anomaly screenshot** is shown whenever anomalies are found.
 """
 PLACEHOLDER_IMAGE_URL = "https://dummyimage.com/600x300/ff0000/ffffff&text=Anomaly+Screenshot+Placeholder"
+# ------------------ CORE CHAT LOGIC ------------------
+def load_csv(file_obj):
     if file_obj is None:
+        return pd.DataFrame(), pd.DataFrame(), "No file uploaded yet."
     try:
         df = pd.read_csv(file_obj.name)
         df = normalize_column_names(df)
         info = f"Loaded CSV with {len(df)} rows and {len(df.columns)} columns."
+        return df, df.head(20), info
     except Exception as e:
+        return pd.DataFrame(), pd.DataFrame(), f"Error loading CSV: {e}"
+def chat_logic(user_message: str, df_state: pd.DataFrame):
+    intent = extract_intent(user_message)
     filtered_df, anomalies, filter_desc = apply_intent_to_dataframe(df_state, intent)
+    sample = filtered_df.head(30)
     summary = generate_summary(
         user_question=user_message,
         filter_description=filter_desc,
         anomalies=anomalies
     )
+    img = PLACEHOLDER_IMAGE_URL if anomalies else ""
+    return summary, img, filtered_df, anomalies
+def on_user_message(user_message, chat_history, df):
+    # Append user message
+    chat_history = chat_history + [{"role": "user", "content": user_message}]
+    if df is None or df.empty:
+        reply = "📂 Please upload a CSV file with logs first."
+        chat_history = chat_history + [{"role": "assistant", "content": reply}]
+        return chat_history, gr.update(visible=False), gr.update(visible=False), None
+    summary_text, img, filtered_df, anomalies = chat_logic(user_message, df)
+    risk_icon, risk_label, _ = calculate_risk_score(anomalies)
+    reply_text = f"{risk_icon} **Risk Level: {risk_label}**\n\n" + summary_text
+    chat_history = chat_history + [{"role": "assistant", "content": reply_text}]
+    # Chart
+    fig = generate_bar_chart(filtered_df)
+    if fig is not None:
+        chart_update = gr.update(value=fig, visible=True)
+    else:
+        chart_update = gr.update(visible=False)
+    # Report meta state
+    report_meta = (reply_text, anomalies, risk_icon, risk_label)
+    # Screenshot
+    if img:
+        img_update = gr.update(value=img, visible=True)
+    else:
+        img_update = gr.update(visible=False)
+    return chat_history, img_update, chart_update, report_meta
+def on_generate_report(report_meta):
+    if not report_meta:
+        return gr.update(visible=False)
+    summary_text, anomalies, risk_icon, risk_label = report_meta
+    pdf_path = build_pdf_report(summary_text, anomalies, risk_icon, risk_label)
+    return gr.update(value=pdf_path, visible=True)
+# ------------------ GRADIO UI ------------------
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="gray")) as demo:
     gr.Markdown(DESCRIPTION_MD)
     with gr.Row():
         with gr.Column(scale=2):
             file_input = gr.File(label="Upload CSV log file", file_types=[".csv"])
+            load_btn = gr.Button("Load CSV")
             load_info = gr.Markdown("No file loaded.")
         with gr.Column(scale=3):
             df_preview = gr.Dataframe(
     df_state = gr.State(pd.DataFrame())
     def on_load_csv(file_obj):
+        df, preview, info = load_csv(file_obj)
         return df, preview, info
     load_btn.click(
         fn=on_load_csv,
         inputs=[file_input],
     )
     gr.Markdown("---")
+    gr.Markdown("### 💬 Smart Log Copilot")
     with gr.Row():
         with gr.Column(scale=3):
+            chatbot = gr.Chatbot(
+                label=None,
+                type="messages",
+            )
             msg = gr.Textbox(
+                placeholder="Ask a question like: Who logged in late at night?",
+                show_label=False,
                 lines=2
             )
+            send_btn = gr.Button("Send", variant="primary")
         with gr.Column(scale=2):
             anomaly_image = gr.Image(
                 label="Anomaly Screenshot (placeholder)",
                 visible=False
             )
+            chart_plot = gr.Plot(
+                label="Log Activity Chart",
+                visible=False
+            )
+            report_btn = gr.Button("Generate PDF Report", variant="secondary")
+            pdf_file = gr.File(label="Download Security Report", visible=False)
+    report_state = gr.State()
     send_btn.click(
         fn=on_user_message,
         inputs=[msg, chatbot, df_state],
+        outputs=[chatbot, anomaly_image, chart_plot, report_state]
     )
     msg.submit(
         fn=on_user_message,
         inputs=[msg, chatbot, df_state],
+        outputs=[chatbot, anomaly_image, chart_plot, report_state]
+    )
+    report_btn.click(
+        fn=on_generate_report,
+        inputs=[report_state],
+        outputs=[pdf_file]
     )
     gr.Markdown(
         """
+        **Tip:** Use a demo CSV with columns like:
+        `timestamp, user, system, status, country`
         and deliberately add:
         - multiple failed logins,
         - some late-night logins,