File size: 10,246 Bytes
ccab3d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
"""
app.py β€” Gradio UI for BERTopic Agentic Thematic Analysis
"""

import gradio as gr
import pandas as pd
from agent import run_agent

def format_chat_history(history):
    """Convert list-of-tuples to Gradio chatbot format."""
    # Keep for compatibility; actual normalization happens in handlers.
    return history


def send_message(user_message, chat_history, file_path, thread_id):
    """Forward user message to agent and return updated chat + state."""
    if not user_message.strip():
        return chat_history, "", gr.update(), gr.update()

    # Normalize incoming chat_history (Gradio may provide list of dicts)
    def _to_agent_history(hist):
        if not hist:
            return []
        if isinstance(hist[0], dict):
            agent_hist = []
            i = 0
            while i < len(hist) - 1:
                a, b = hist[i], hist[i+1]
                if a.get("role", "") in ("user", "human") and b.get("role", "") in ("assistant", "ai"):
                    agent_hist.append((a.get("content", ""), b.get("content", "")))
                    i += 2
                else:
                    i += 1
            return agent_hist
        return hist or []

    def _to_gradio_history_from_agent(hist):
        gr_hist = []
        for t in hist:
            if isinstance(t, (list, tuple)) and len(t) >= 2:
                gr_hist.append({"role": "user", "content": t[0]})
                gr_hist.append({"role": "assistant", "content": t[1]})
        return gr_hist

    agent_chat_history = _to_agent_history(chat_history)
    context = {"file_path": file_path, "thread_id": thread_id}
    response, review_data, phase_html = run_agent(user_message, context, agent_chat_history)

    # Build gradio-compatible history
    if isinstance(chat_history, list) and chat_history and isinstance(chat_history[0], dict):
        new_chat = chat_history.copy()
    else:
        new_chat = _to_gradio_history_from_agent(agent_chat_history)

    new_chat.append({"role": "user", "content": user_message})
    new_chat.append({"role": "assistant", "content": response})
    review_df = pd.DataFrame(review_data) if review_data else pd.DataFrame(
        columns=["#", "Topic Label", "Top Evidence", "Sentences", "Papers",
                 "Approve", "Rename To", "Reasoning"]
    )
    return new_chat, "", review_df, phase_html


def submit_review(review_df, chat_history, file_path, thread_id):
    """Send the edited review table back to the agent."""
    table_json = review_df.to_json(orient="records")
    review_message = f"[REVIEW_TABLE_SUBMITTED]\n{table_json}"
    context = {"file_path": file_path, "thread_id": thread_id}
    # Normalize incoming history similar to send_message
    def _to_agent_history_for_submit(hist):
        if not hist:
            return []
        if isinstance(hist[0], dict):
            agent_hist = []
            i = 0
            while i < len(hist) - 1:
                a, b = hist[i], hist[i+1]
                if a.get("role", "") in ("user", "human") and b.get("role", "") in ("assistant", "ai"):
                    agent_hist.append((a.get("content", ""), b.get("content", "")))
                    i += 2
                else:
                    i += 1
            return agent_hist
        return hist or []

    agent_chat_history = _to_agent_history_for_submit(chat_history)
    response, new_review_data, phase_html = run_agent(review_message, context, agent_chat_history)

    # Build gradio-compatible history
    if isinstance(chat_history, list) and chat_history and isinstance(chat_history[0], dict):
        new_chat = chat_history.copy()
    else:
        def _to_gradio(hist):
            out = []
            for t in (hist or []):
                if isinstance(t, (list, tuple)) and len(t) >= 2:
                    out.append({"role": "user", "content": t[0]})
                    out.append({"role": "assistant", "content": t[1]})
            return out
        new_chat = _to_gradio(agent_chat_history)

    new_chat.append({"role": "user", "content": "(Review table submitted)"})
    new_chat.append({"role": "assistant", "content": response})
    new_df = pd.DataFrame(new_review_data) if new_review_data else review_df
    return new_chat, new_df, phase_html


def get_download_files():
    """Collect output files available for download."""
    import os, glob
    files = glob.glob("outputs/*.csv") + glob.glob("outputs/*.json") + glob.glob("outputs/*.txt")
    return files if files else None

with gr.Blocks(title="BERTopic Agentic Thematic Analysis") as demo:
    thread_id_state = gr.State("thread-001")
    uploaded_path_state = gr.State(None)

    gr.Markdown(
        "# πŸ”¬ BERTopic Agentic Thematic Analysis\n"
        "Upload your Scopus CSV and follow the agent through Braun & Clarke's 6 phases."
    )

    phase_bar = gr.HTML(
        value="""
        <div style='padding:10px;background:#f0f4ff;border-radius:8px;font-family:sans-serif'>
          <b>Phase Progress:</b>
          <span style='margin-left:12px'>⬜ P1</span>
          <span style='margin-left:8px'>⬜ P2</span>
          <span style='margin-left:8px'>⬜ P3</span>
          <span style='margin-left:8px'>⬜ P4</span>
          <span style='margin-left:8px'>⬜ P5</span>
          <span style='margin-left:8px'>⬜ P5.5</span>
          <span style='margin-left:8px'>⬜ P6</span>
        </div>
        """,
        label="Phase Tracker"
    )

    with gr.Group():
        gr.Markdown("## πŸ“ Section 1: Upload Scopus CSV")
        csv_upload = gr.File(
            label="Upload Scopus CSV",
            file_types=[".csv"],
            type="filepath"
        )
        upload_status = gr.Textbox(label="Upload Status", interactive=False)

        def handle_upload(filepath):
            if filepath is None:
                return "No file uploaded.", None
            return f"βœ… File loaded: {filepath}", filepath

        csv_upload.change(
            fn=handle_upload,
            inputs=[csv_upload],
            outputs=[upload_status, uploaded_path_state]
        )

    with gr.Group():
        gr.Markdown("## πŸ’¬ Section 2: Agent Chat")
        gr.Markdown(
            "_Start with:_ **'Start Phase 1'** to begin familiarisation, "
            "then follow the agent's instructions phase by phase."
        )
        chatbot = gr.Chatbot(height=420, label="Agent Conversation")
        with gr.Row():
            user_input = gr.Textbox(
                placeholder="Type your message or command here...",
                label="Your Message",
                scale=5
            )
            send_btn = gr.Button("Send β–Ά", variant="primary", scale=1)

    with gr.Group():
        gr.Markdown("## πŸ“Š Section 3: Results")

        # Review Table
        gr.Markdown("### πŸ—‚οΈ Topic Review Table")
        gr.Markdown(
            "Edit the **Approve** (True/False), **Rename To**, and **Reasoning** columns, "
            "then click **Submit Review** to proceed."
        )
        review_table = gr.Dataframe(
            headers=["#", "Topic Label", "Top Evidence", "Sentences",
                     "Papers", "Approve", "Rename To", "Reasoning"],
            datatype=["number", "str", "str", "number", "number", "bool", "str", "str"],
            interactive=True,
            label="Review Table",
            wrap=True,
            row_count=(5, "dynamic"),
            column_count=(8, "fixed")
        )
        submit_review_btn = gr.Button("βœ… Submit Review", variant="secondary")

        gr.Markdown("### πŸ“ˆ Topic Charts")
        with gr.Row():
            chart_selector = gr.Dropdown(
                choices=["Topic Distribution", "Similarity Heatmap",
                         "Top Keywords per Topic", "Abstract vs Title Comparison"],
                label="Select Chart",
                value="Topic Distribution"
            )
        chart_display = gr.HTML(label="Chart")

        def load_chart(chart_name):
            """Load pre-generated Plotly chart HTML from disk."""
            import os
            import html as _html
            chart_map = {
                "Topic Distribution":         "outputs/chart_distribution.html",
                "Similarity Heatmap":          "outputs/chart_heatmap.html",
                "Top Keywords per Topic":      "outputs/chart_keywords.html",
                "Abstract vs Title Comparison":"outputs/chart_comparison.html",
            }
            path = chart_map.get(chart_name, "")
            if os.path.exists(path):
                with open(path, "r", encoding="utf-8") as f:
                    content = f.read()
                # Embed the full HTML in an iframe via srcdoc so scripts execute
                # Escape attribute characters but preserve the document structure.
                srcdoc = _html.escape(content, quote=True)
                iframe = (
                    f"<iframe srcdoc=\"{srcdoc}\" style=\"border:0; width:100%; height:700px;\"></iframe>"
                )
                return iframe
            return "<p style='color:grey'>Chart not yet generated. Complete the relevant phase first.</p>"

        chart_selector.change(fn=load_chart, inputs=[chart_selector], outputs=[chart_display])

        gr.Markdown("### πŸ“₯ Download Outputs")
        download_btn = gr.Button("πŸ”„ Refresh Download List")
        download_files = gr.File(label="Available Output Files", file_count="multiple")

        download_btn.click(fn=get_download_files, inputs=[], outputs=[download_files])

    send_btn.click(
        fn=send_message,
        inputs=[user_input, chatbot, uploaded_path_state, thread_id_state],
        outputs=[chatbot, user_input, review_table, phase_bar]
    )
    user_input.submit(
        fn=send_message,
        inputs=[user_input, chatbot, uploaded_path_state, thread_id_state],
        outputs=[chatbot, user_input, review_table, phase_bar]
    )
    submit_review_btn.click(
        fn=submit_review,
        inputs=[review_table, chatbot, uploaded_path_state, thread_id_state],
        outputs=[chatbot, review_table, phase_bar]
    )


if __name__ == "__main__":
    demo.launch(
        share=False,
        server_name="0.0.0.0",
        server_port=7860,
        theme=gr.themes.Soft(),
    )