File size: 10,652 Bytes
37a70cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# src/app.py
import streamlit as st
import pandas as pd
import json
from pathlib import Path
from data_loader import load_tickets
from classifier import classify_ticket, classify_all_and_save

# NEW: import RAG handler
try:
    from rag import handle_rag_query
except Exception:
    handle_rag_query = None  # we'll handle absence gracefully

# Config
st.set_page_config(page_title="Atlan - Support Copilot (Phase 3)", layout="wide")
ROOT = Path(__file__).parent.parent.resolve()   # project root
CLASSIFIED_PATH = ROOT.joinpath("classified_tickets_phase2.json")

st.title("Atlan β€” Support Copilot (Phase 3)")
st.markdown(
    "**Phase 3:** Zero-shot topic classification + HF sentiment + rule-based priority + RAG (retrieval-augmented generation). "
    "This demo shows bulk classification and an interactive agent with RAG."
)

# Sidebar controls
st.sidebar.header("Controls")
use_saved = st.sidebar.checkbox("Load pre-saved classified file (if available)", value=True)
run_classify_all = st.sidebar.button("Classify ALL tickets & Save (Phase 2)")
reload_ui = st.sidebar.button("Reload UI")

# NEW: RAG options in sidebar
st.sidebar.markdown("### RAG options")
use_openai = st.sidebar.checkbox("Use OpenAI for generation (if API key set)", value=False)
top_k = st.sidebar.slider("RAG: number of passages to retrieve", min_value=1, max_value=10, value=5)

# Safe reload: attempt API call, otherwise show instruction to refresh
if reload_ui:
    try:
        st.experimental_rerun()
    except Exception:
        st.info("Automatic reload isn't supported by this Streamlit version. Please refresh the browser page to reload the UI.")

# Load tickets (original) β€” call the loader without forcing a path so it uses its default
try:
    tickets = load_tickets()   # loader default = ../sample_tickets.json (project root)
except Exception as e:
    st.error("Could not load sample tickets. Ensure sample_tickets.json exists at the project root (one level above src/).")
    st.exception(e)
    tickets = []

# If user asked to classify all, run classification and save (uses classifier defaults)
if run_classify_all:
    with st.spinner("Running classification on all tickets (models may load on first run)..."):
        try:
            out_path = classify_all_and_save()  # defaults -> saves to ../classified_tickets_phase2.json
            st.success(f"Classified and saved to: {out_path}")
        except Exception as e:
            st.error("Error during batch classification. See details below.")
            st.exception(e)

# Try to load pre-saved classified file (if requested and exists)
classified_data = None
if use_saved and CLASSIFIED_PATH.exists():
    try:
        classified_data = json.loads(CLASSIFIED_PATH.read_text(encoding="utf-8"))
    except Exception as e:
        st.warning("Could not read the saved classified file; falling back to live classification.")
        st.exception(e)

tab1, tab2 = st.tabs(["Bulk Classification Dashboard", "Interactive Agent (demo + RAG)"])

with tab1:
    st.header("Bulk ticket classification")
    st.write("This view shows all tickets with their inferred topic tags, sentiment, and priority.")

    rows = []
    # If we have pre-classified data, use that (faster). Otherwise classify on the fly.
    if classified_data:
        for entry in classified_data:
            c = entry.get("classification", {})
            rows.append({
                "id": entry.get("id"),
                "subject": entry.get("subject"),
                "topic_tags": ", ".join(c.get("topic_tags", [])),
                "sentiment": c.get("sentiment", ""),
                "priority": c.get("priority", ""),
            })
    else:
        # Live classify (will call HF pipelines lazily)
        with st.spinner("Classifying tickets (zero-shot)... this may take a few seconds on first run"):
            for t in tickets:
                try:
                    c = classify_ticket(t)
                except Exception as e:
                    st.error(f"Error classifying ticket {t.get('id')}: {e}")
                    c = {"topic_tags": [], "sentiment": "Error", "priority": "Error"}
                rows.append({
                    "id": t.get("id"),
                    "subject": t.get("subject"),
                    "topic_tags": ", ".join(c.get("topic_tags", [])),
                    "sentiment": c.get("sentiment", ""),
                    "priority": c.get("priority", ""),
                })

    df = pd.DataFrame(rows)
    # basic filters
    cols = st.columns([2, 1, 1, 1])
    with cols[0]:
        q = st.text_input("Filter by subject/text contains")
    with cols[1]:
        sel_topic = st.selectbox("Filter by topic (contains)", options=["(any)"] + sorted({t for row in rows for t in row["topic_tags"].split(", ") if t}))
    with cols[2]:
        sel_sent = st.selectbox("Filter by sentiment", options=["(any)","Angry","Frustrated","Neutral","Curious","Positive"])
    with cols[3]:
        sel_prio = st.selectbox("Filter by priority", options=["(any)","P0","P1","P2"])

    df_display = df.copy()
    if q:
        df_display = df_display[df_display["subject"].str.contains(q, case=False, na=False) | df_display["topic_tags"].str.contains(q, case=False, na=False)]
    if sel_topic and sel_topic != "(any)":
        df_display = df_display[df_display["topic_tags"].str.contains(sel_topic, na=False)]
    if sel_sent and sel_sent != "(any)":
        df_display = df_display[df_display["sentiment"] == sel_sent]
    if sel_prio and sel_prio != "(any)":
        df_display = df_display[df_display["priority"] == sel_prio]

    st.dataframe(df_display.reset_index(drop=True), use_container_width=True, height=420)

    st.markdown("### Sample ticket detail")
    # choose ticket
    ids = df_display["id"].tolist()
    if ids:
        sel = st.selectbox("Select ticket", ids)
        # find original ticket object (from classified_data if present else from tickets)
        selected_full = None
        if classified_data:
            selected_full = next((x for x in classified_data if x["id"] == sel), None)
        if not selected_full:
            selected_full = next((x for x in tickets if x["id"] == sel), None)

        st.write(selected_full)
        st.markdown("**Classification (raw)**")
        if selected_full and "classification" in selected_full:
            st.json(selected_full["classification"])
        else:
            # classify on-the-fly for selected ticket if no classification exists
            with st.spinner("Classifying selected ticket..."):
                try:
                    c = classify_ticket(selected_full)
                except Exception as e:
                    st.error("Error during classification of selected ticket.")
                    st.exception(e)
                    c = {}
                st.json(c)
    else:
        st.info("No tickets to display with current filters.")

with tab2:
    st.header("Interactive Agent (Phase 3 - analysis + RAG)")
    st.markdown(
        "Paste a ticket subject and body (or type). The backend analysis will show topic tags, sentiment and priority. "
        "If the topic is one of the RAG-enabled categories (How-to, Product, Best practices, API/SDK, SSO), the app will run RAG and show a cited answer."
    )

    user_input = st.text_area("Paste a ticket subject + body (or type a new one)", height=220, placeholder="Subject line on first line, body below...")
    analyze = st.button("Analyze input")

    if analyze:
        if not user_input.strip():
            st.warning("Enter some ticket text to analyze.")
        else:
            # Infer subject/body: first line = subject
            lines = user_input.strip().split("\n")
            subject = lines[0]
            body = "\n".join(lines[1:]).strip() if len(lines) > 1 else user_input.strip()
            demo_ticket = {"id": "TEMP", "subject": subject, "body": body}
            with st.spinner("Analyzing (zero-shot + sentiment)..."):
                try:
                    c = classify_ticket(demo_ticket)
                except Exception as e:
                    st.error("Error during classification.")
                    st.exception(e)
                    c = {"topic_tags": [], "sentiment": "Error", "priority": "Error"}

            st.subheader("Internal analysis (backend view)")
            st.json(c)

            st.subheader("Final response (frontend view)")
            # RAG-enabled topics
            allowed_rag = {"How-to", "Product", "Best practices", "API/SDK", "SSO"}

            # If ticket topic is RAG-enabled -> run RAG
            if any(lbl in allowed_rag for lbl in c.get("topic_tags", [])):
                if handle_rag_query is None:
                    st.error("RAG handler not found. Make sure src/rag.py exists and is importable.")
                else:
                    st.info("RAG triggered β€” retrieving docs and generating an answer...")
                    with st.spinner("Retrieving + generating answer (may take a few seconds)..."):
                        # Use the combined subject+body as the query
                        query_text = f"{subject}\n\n{body}"
                        try:
                            rag_res = handle_rag_query(query_text, top_k=top_k, use_openai=use_openai)
                        except Exception as e:
                            st.error("Error during RAG operation.")
                            st.exception(e)
                            rag_res = {"answer": "RAG failed.", "sources": [], "retrieved": []}

                    st.subheader("Answer")
                    st.markdown(rag_res.get("answer", "No answer returned."))

                    st.subheader("Sources (citations)")
                    for s in rag_res.get("sources", []):
                        st.write(s)

                    st.subheader("Top retrieved passages (debug view)")
                    for r in rag_res.get("retrieved", [])[:top_k]:
                        st.markdown(f"**Title:** {r.get('title','(no title)')}  \n**URL:** {r.get('url')}  \n**Score:** {r.get('score'):.4f}")
                        st.write(r.get("text","")[:800] + ("..." if len(r.get("text","")) > 800 else ""))

            else:
                st.success(f"This ticket has been classified as {c.get('topic_tags', [])} and routed to the appropriate team.")

st.markdown("---")
st.caption(
    "Phase 3 demo β€” zero-shot topic classification (facebook/bart-large-mnli), sentiment (distilbert SST-2), and RAG using local FAISS + sentence-transformers. "
    "Toggle 'Use OpenAI' in the sidebar to use the OpenAI API for generation (requires OPENAI_API_KEY in env)."
)