File size: 37,165 Bytes
d50fc97
 
05871ef
 
28abf63
 
813f3c0
 
 
8812eb7
 
 
 
 
 
 
 
 
 
 
 
 
 
d33d74b
 
8812eb7
 
 
 
d50fc97
05871ef
 
 
 
 
 
 
d50fc97
8812eb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a24abb9
05871ef
 
 
 
a24abb9
 
8812eb7
 
 
 
 
 
 
9047f8a
05871ef
9047f8a
05871ef
617daa2
 
 
 
 
 
 
813f3c0
b52cc70
813f3c0
617daa2
b52cc70
617daa2
b52cc70
617daa2
 
813f3c0
 
 
 
 
 
 
 
 
617daa2
 
b52cc70
617daa2
b52cc70
617daa2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b52cc70
 
 
617daa2
b52cc70
 
617daa2
 
 
 
f2c945e
 
 
 
 
 
 
 
 
 
 
813f3c0
f2c945e
 
 
 
 
 
 
813f3c0
f2c945e
 
813f3c0
f2c945e
813f3c0
 
f2c945e
 
 
813f3c0
f2c945e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd22541
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
617daa2
813f3c0
 
a24abb9
813f3c0
617daa2
f2c945e
 
 
 
 
 
 
 
 
 
 
 
 
813f3c0
 
 
617daa2
 
813f3c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
617daa2
813f3c0
617daa2
 
813f3c0
617daa2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9047f8a
05871ef
8812eb7
 
 
 
 
 
9047f8a
05871ef
a24abb9
 
d50fc97
05871ef
 
 
9098a8c
 
 
 
 
 
 
 
 
 
a977462
 
 
 
 
 
 
05871ef
 
a977462
05871ef
 
a977462
 
 
 
 
05871ef
 
 
 
8812eb7
05871ef
 
 
 
 
9098a8c
 
 
 
 
 
 
05871ef
 
d50fc97
8812eb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
06825b1
 
8812eb7
06825b1
8812eb7
901e364
 
 
 
 
 
06825b1
 
 
 
 
 
 
 
 
 
 
 
 
 
8812eb7
 
 
 
 
 
 
 
 
 
 
 
06825b1
d33d74b
 
06825b1
 
 
 
d33d74b
 
 
 
 
 
 
 
 
 
8812eb7
 
d33d74b
8812eb7
d33d74b
 
 
 
05871ef
 
 
9098a8c
 
 
d33d74b
9098a8c
 
 
 
 
 
 
d33d74b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d50fc97
 
05871ef
 
8812eb7
05871ef
8812eb7
 
05871ef
 
 
 
a24abb9
8812eb7
 
 
a9d92ec
 
 
 
8812eb7
a9d92ec
 
 
 
 
8812eb7
 
 
 
a9d92ec
8812eb7
 
 
 
 
 
 
 
 
 
d33d74b
 
 
8812eb7
 
 
 
 
d8d53d9
 
 
a9d92ec
 
 
d8d53d9
 
a9d92ec
d8d53d9
8812eb7
a9d92ec
 
 
 
 
d8d53d9
a9d92ec
d8d53d9
 
 
 
a9d92ec
d8d53d9
a9d92ec
 
 
 
 
8812eb7
 
a9d92ec
d8d53d9
 
a9d92ec
 
 
d8d53d9
 
a9d92ec
 
d8d53d9
 
 
 
a9d92ec
8812eb7
d8d53d9
a9d92ec
 
8812eb7
 
 
 
 
 
 
 
 
 
 
 
 
 
d33d74b
 
 
 
 
 
 
8812eb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d33d74b
8812eb7
 
 
d33d74b
8812eb7
 
 
 
 
 
 
 
 
 
 
 
 
d33d74b
8812eb7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d33d74b
8812eb7
 
05871ef
 
8812eb7
a24abb9
05871ef
d33d74b
05871ef
8812eb7
 
d33d74b
05871ef
d33d74b
05871ef
 
d33d74b
 
05871ef
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
import os
import gradio as gr
import openai

from planner import plan_course
from generators import generate_course_zip
from searcher import web_search, fetch_and_extract, get_youtube_transcript

# Bring in DB helpers to persist resources if needed later
# Import DB helpers. We include list_chats, rename_chat and delete_chat to support
# persistent chat sessions and management actions. The resource helpers allow
# fetching cached resources if needed.
from db import (
    get_resource,
    upsert_resource,
    list_resources,
    new_chat,
    append_message,
    load_chat,
    soft_delete_message,
    list_chats,
    rename_chat,
    delete_chat,
    add_attachment,
    list_attachments,
)

# Import the docx utility to generate Word documents for course outlines.
from docx_utils import outline_to_docx

# System prompt guiding the assistant's behaviour during brainstorming
SYSTEM_PROMPT = (
    "You are a helpful course planning assistant. Conduct brainstorming with the user "
    "about their course idea. Offer suggestions, ask clarifying questions, and capture their requirements. "
    "When the user feels ready, they will click 'Finalize Outline' to create a course plan using all prior "
    "conversation and gathered resources."
)

def chat(user_message, chat_history, chat_pairs, sources, plan, resource_cache, chat_key):
    """
    Handle a user chat message and return updated chat state.

    This version persists messages to the database by inserting user and assistant
    messages into the `messages` table keyed by `chat_key`. It also returns
    updated state variables for Gradio to reflect the conversation.

    Args:
        user_message: The latest user input from the textbox.
        chat_history: List of message dicts representing the full conversation.
        chat_pairs: List of (user, assistant) tuples for display in the Chatbot.
        sources: List of collected resource dicts with 'title' and 'url'.
        plan: The current course plan text (unused here).
        resource_cache: Dictionary caching search results by query.
        chat_key: The unique key identifying the current chat session.

    Returns:
        Tuple of updated (chat_pairs, chat_history, chat_pairs, sources, plan,
        resource_cache).
    """
    # Ensure lists/dicts are initialised
    if chat_history is None:
        chat_history = []
    if chat_pairs is None:
        chat_pairs = []
    if resource_cache is None:
        resource_cache = {}
    # Persist the user message to the database if a chat key is provided
    if chat_key:
        try:
            append_message(chat_key, "user", user_message)
        except Exception:
            # Ignore DB errors; continue without persistence
            pass
    # Append the user's message to the conversation history (list of dictionaries for Chatbot)
    chat_history.append({"role": "user", "content": user_message})
    # Build messages including system prompt for API call
    messages = [{"role": "system", "content": SYSTEM_PROMPT}] + chat_history
    # Check if the user message contains a URL to open and read.
    url = None
    for part in user_message.split():
        if part.startswith("http://") or part.startswith("https://"):
            url = part
            break
    if url:
        # If the message contains a URL, attempt to fetch and summarise it using our extraction helpers.
        try:
            # Detect YouTube links and fetch transcript
            if "youtube.com" in url or "youtu.be" in url:
                try:
                    transcript_text = get_youtube_transcript(url)
                except Exception:
                    transcript_text = ""
                page_content = transcript_text or ""
                page_title = url
            else:
                record = fetch_and_extract(url)
                if record:
                    page_content = record.get("excerpt", "")
                    page_title = record.get("title", url)
                else:
                    page_content = ""
                    page_title = url
            if not page_content:
                assistant_reply = "I couldn't extract content from that page."
            else:
                # Summarise the extracted content using OpenAI
                try:
                    model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
                    temperature = float(os.getenv("TEMPERATURE", "0.7"))
                    max_tokens = int(os.getenv("MAX_OUTPUT_TOKENS", "512"))
                    api_key = os.getenv("OPENAI_API_KEY") or os.getenv("COURSECREATOR_API_KEY")
                    if not api_key:
                        raise ValueError("OPENAI_API_KEY or COURSECREATOR_API_KEY is not set")
                    summary_system = "You are a helpful assistant. Summarize the given content in a concise and clear way."
                    truncated_content = page_content[:8000]
                    summary_messages = [
                        {"role": "system", "content": summary_system},
                        {"role": "user", "content": truncated_content},
                    ]
                    if hasattr(openai, "OpenAI"):
                        client = openai.OpenAI(api_key=api_key)
                        try:
                            resp = client.chat.completions.create(
                                model=model,
                                messages=summary_messages,
                                temperature=temperature,
                                max_tokens=max_tokens,
                            )
                        except Exception:
                            resp = client.chat.completions.create(
                                model=model,
                                messages=summary_messages,
                                temperature=temperature,
                                max_completion_tokens=max_tokens,
                            )
                        assistant_reply = resp.choices[0].message.content
                    else:
                        openai.api_key = api_key
                        try:
                            resp = openai.ChatCompletion.create(
                                model=model,
                                messages=summary_messages,
                                temperature=temperature,
                                max_tokens=max_tokens,
                            )
                        except Exception:
                            resp = openai.ChatCompletion.create(
                                model=model,
                                messages=summary_messages,
                                temperature=temperature,
                                max_completion_tokens=max_tokens,
                            )
                        assistant_reply = resp["choices"][0]["message"]["content"]
                except Exception as e:
                    assistant_reply = (
                        "An error occurred while summarizing the page content. Please ensure your OpenAI API key is configured.\n"
                        f"(Error: {e})"
                    )
        except Exception as e:
            assistant_reply = (
                "An error occurred while extracting the web page. Please ensure your search API key is configured.\n"
                f"(Error: {e})"
            )
    else:
        # Determine if the user is requesting a web search. If so, perform the search instead
        # of calling the language model. This allows the assistant to fetch resources when
        # the user asks the agent to "search" or "search the internet".
        # A message triggers a search if it explicitly asks to search or find articles.
        # We check for common phrases like "search", "find" combined with "articles" or "resources".
        search_triggers = [
            "search",
            "internet search",
            "web search",
            "find articles",
            "find 5 articles",
            "find five articles",
            "find resources",
        ]
        lower_msg = user_message.lower().strip()
        # Determine if a search should be performed:
        # if the message contains the word "search" anywhere, or contains "find" and "article".
        do_search = False
        if any(trig in lower_msg for trig in search_triggers):
            do_search = True
        elif "find" in lower_msg and ("article" in lower_msg or "articles" in lower_msg or "resource" in lower_msg):
            do_search = True
        if do_search:
            # Determine the query string from the user's message.
            # We remove a leading search trigger phrase if present (e.g. "search", "find articles").
            query = user_message
            removed = False
            for trig in search_triggers:
                if lower_msg.startswith(trig):
                    # Drop the trigger prefix and any surrounding punctuation
                    query = user_message[len(trig):].strip()
                    removed = True
                    break
            # If the message starts with "find", remove "find" and any optional number + article/resource words
            if not removed and lower_msg.startswith("find"):
                import re
                pattern = r"^find\s+(?:\d+\s+)?(?:articles?|resources?)\s*"
                query = re.sub(pattern, "", user_message, flags=re.IGNORECASE).strip()
            # Further clean the query by extracting quoted phrases or topic descriptors.
            import re as _re
            # If the query contains quoted text, use the quoted portion as the search term
            match = _re.search(r"[\"']([^\"']+)[\"']", query)
            if match:
                query = match.group(1).strip()
            else:
                # Look for phrases following 'about' or 'on' as a topic indicator
                m2 = _re.search(r"\b(?:about|on)\s+([^.,;!?]+)", query, flags=_re.IGNORECASE)
                if m2:
                    query = m2.group(1).strip()
            # Remove trailing instructions like 'provide summaries' etc.
            # Discard anything after a directive word such as 'summarize', 'summaries', 'provide', or 'examples'
            query = _re.split(r"\b(?:summarize|summaries|provide|examples|use cases|case studies)\b", query, maxsplit=1, flags=_re.IGNORECASE)[0].strip() or query
            # If the query is empty or appears generic (e.g. just 'the internet' or 'articles'),
            # fall back to the most recent non-search user message in the conversation. This
            # helps preserve context by using the course topic as the search term when the
            # user simply says "search the internet" or "find articles" without specifying a subject.
            generic_queries = {"", "the internet", "internet", "relevant articles", "articles", "5 articles", "5 relevant articles"}
            if query.lower() in generic_queries:
                fallback_query = None
                # Search backwards through chat_history for the last user message that wasn't a search request
                for past in reversed(chat_history):
                    if past.get("role") == "user":
                        past_text = past.get("content", "").lower()
                        # Skip messages that themselves triggered a search
                        if not any(trig in past_text for trig in search_triggers):
                            fallback_query = past.get("content", "").strip()
                            break
                if fallback_query:
                    query = fallback_query
                else:
                    # As a final fallback, use the original user message (after removing the trigger)
                    query = user_message.strip()
            try:
                # Use cached search results if available for this query key (case-insensitive)
                query_key = query.lower()
                if query_key in resource_cache:
                    search_results = resource_cache[query_key]
                else:
                    # Use our wrapped web_search for better domain filtering and consistent return type.
                    # We pass a list of allowed domains to prefer reputable sources (e.g. .edu, .org, .gov and some tech blogs).
                    allowed_domains = [
                        ".edu",
                        ".org",
                        ".gov",
                        "arxiv.org",
                        "kdnuggets.com",
                        "towardsdatascience.com",
                        "datacamp.com",
                        "medium.com",
                    ]
                    search_results = web_search(query, max_results=5, allowed_domains=allowed_domains)
                    resource_cache[query_key] = search_results
                # Iterate over search results, fetch their content, cache resources and summarise
                summaries = []
                if sources is None:
                    sources = []
                existing_urls = {src.get("url") for src in sources if isinstance(src, dict) and src.get("url")}
                # For each result (should be a dict with 'url' and 'title')
                for item in search_results:
                    if not isinstance(item, dict):
                        continue
                    url = item.get("url")
                    title = item.get("title", url)
                    if not url or url in existing_urls:
                        continue
                    # Fetch and cache resource content
                    record = fetch_and_extract(url)
                    if not record:
                        # Skip if unable to fetch
                        continue
                    # Add to sources for plan generation (avoid duplicates)
                    sources.append({"title": record.get("title", title), "url": record.get("url", url)})
                    existing_urls.add(url)
                    # Summarise the resource's excerpt using OpenAI
                    try:
                        model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
                        temperature = float(os.getenv("TEMPERATURE", "0.7"))
                        max_tokens = int(os.getenv("MAX_OUTPUT_TOKENS", "256"))
                        api_key = os.getenv("OPENAI_API_KEY") or os.getenv("COURSECREATOR_API_KEY")
                        if not api_key:
                            raise ValueError("OPENAI_API_KEY or COURSECREATOR_API_KEY is not set")
                        summary_system = "You are a helpful assistant. Summarize the following article excerpt in one paragraph."
                        excerpt = record.get("excerpt", "")[:3000]
                        summary_messages = [
                            {"role": "system", "content": summary_system},
                            {"role": "user", "content": excerpt},
                        ]
                        if hasattr(openai, "OpenAI"):
                            client = openai.OpenAI(api_key=api_key)
                            try:
                                resp = client.chat.completions.create(
                                    model=model,
                                    messages=summary_messages,
                                    temperature=temperature,
                                    max_tokens=max_tokens,
                                )
                            except Exception:
                                resp = client.chat.completions.create(
                                    model=model,
                                    messages=summary_messages,
                                    temperature=temperature,
                                    max_completion_tokens=max_tokens,
                                )
                            summary_text = resp.choices[0].message.content
                        else:
                            openai.api_key = api_key
                            try:
                                resp = openai.ChatCompletion.create(
                                    model=model,
                                    messages=summary_messages,
                                    temperature=temperature,
                                    max_tokens=max_tokens,
                                )
                            except Exception:
                                resp = openai.ChatCompletion.create(
                                    model=model,
                                    messages=summary_messages,
                                    temperature=temperature,
                                    max_completion_tokens=max_tokens,
                                )
                            summary_text = resp["choices"][0]["message"]["content"]
                    except Exception as se:
                        # If summarization fails, just include the title and URL without a summary
                        summary_text = ""
                    # Compose summary line with link and summary
                    line = f"**{title}** ({url})"
                    if summary_text:
                        line += f"\n{summary_text.strip()}"
                    summaries.append(line)
                if summaries:
                    assistant_reply = "Here are some articles I found and summarised:\n\n" + "\n\n".join(summaries)
                else:
                    assistant_reply = "I couldn't fetch or summarise any credible articles for that query."
            except Exception as e:
                assistant_reply = (
                    "An error occurred during web search and summarisation. Please ensure your API keys are configured.\n"
                    f"(Error: {e})"
                )
        else:
            # Call OpenAI's ChatCompletion to get assistant's reply
            try:
                # Use a widely supported default model; older OpenAI SDKs (pinned below v1)
                # do not recognise newer model names like gpt-5. Default to gpt-3.5-turbo
                # but allow overriding via the OPENAI_MODEL env variable.
                model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
                temperature = float(os.getenv("TEMPERATURE", "0.7"))
                max_tokens = int(os.getenv("MAX_OUTPUT_TOKENS", "1024"))
                # Support alternative secret name COURSECREATOR_API_KEY as a fallback for the OpenAI API key
                api_key = os.getenv("OPENAI_API_KEY") or os.getenv("COURSECREATOR_API_KEY")
                if not api_key:
                    raise ValueError("OPENAI_API_KEY or COURSECREATOR_API_KEY is not set")
                # Prefer the new OpenAI SDK (>=1.0) if available
                if hasattr(openai, "OpenAI"):
                    client = openai.OpenAI(api_key=api_key)
                    # Try sending max_tokens; if unsupported, retry with max_completion_tokens
                    try:
                        response = client.chat.completions.create(
                            model=model,
                            messages=messages,
                            temperature=temperature,
                            max_tokens=max_tokens,
                        )
                    except Exception:
                        # Some newer models (e.g. o1 series) do not support max_tokens
                        response = client.chat.completions.create(
                            model=model,
                            messages=messages,
                            temperature=temperature,
                            max_completion_tokens=max_tokens,
                        )
                    assistant_reply = response.choices[0].message.content
                else:
                    # Legacy OpenAI SDK (<1.0)
                    openai.api_key = api_key
                    try:
                        response = openai.ChatCompletion.create(
                            model=model,
                            messages=messages,
                            temperature=temperature,
                            max_tokens=max_tokens,
                        )
                    except Exception:
                        # Fallback for models that require max_completion_tokens
                        response = openai.ChatCompletion.create(
                            model=model,
                            messages=messages,
                            temperature=temperature,
                            max_completion_tokens=max_tokens,
                        )
                    assistant_reply = response["choices"][0]["message"]["content"]
            except Exception as e:
                # When the API call fails (e.g. missing API key), return an error message
                assistant_reply = (
                    "An error occurred while processing your message. "
                    "Please ensure your OpenAI API key is configured in the Space secrets.\n"
                    f"(Error: {e})"
                )
    # Append assistant reply to conversation history
    chat_history.append({"role": "assistant", "content": assistant_reply})
    # Persist assistant message to the database
    if chat_key:
        try:
            append_message(chat_key, "assistant", assistant_reply)
        except Exception:
            pass
    # Append pair to display history for any other uses (kept for compatibility)
    chat_pairs.append((user_message, assistant_reply))
    # For Chatbot with default type (list of (user, assistant) tuples), return chat_pairs as the first output
    return chat_pairs, chat_history, chat_pairs, sources, plan, resource_cache

def run_search(query, chat_history, chat_pairs, sources, plan, num_results=5, domain_filter=""):
    """Execute a web search and update sources list."""
    # perform search using provided searcher
    try:
        results = run_web_search(query, num_results=num_results, domain_filter=domain_filter)
    except Exception as e:
        # handle search errors (e.g. missing API key)
        results = []
        summary = (
            "An error occurred during web search. Please ensure your search API key is configured.\n"
            f"(Error: {e})"
        )
        return summary, chat_history, chat_pairs, sources or [], plan
    # Normalize the results: Tavily may return a dict with 'results'
    if isinstance(results, dict):
        normalized_results = results.get("results", [])
    elif isinstance(results, list):
        normalized_results = results
    else:
        normalized_results = []
    if sources is None:
        sources = []
    sources.extend(normalized_results)
    # summarise results into a string for display
    summary_lines = []
    for r in normalized_results:
        if isinstance(r, dict):
            title = r.get("title", "")
            url = r.get("url", "")
            summary_lines.append(f"{title} - {url}")
    summary = "\n".join(summary_lines)
    return summary, chat_history, chat_pairs, sources, plan

def finalize_outline(chat_history, chat_pairs, sources, plan):
    """Generate a course outline based on the conversation and collected sources (text only)."""
    if chat_history is None:
        chat_history = []
    if sources is None:
        sources = []
    # use the planner to create the plan
    try:
        plan_text = plan_course(chat_history, sources)
    except Exception as e:
        plan_text = (
            "An error occurred while generating the course outline. Please ensure your API keys are configured.\n"
            f"(Error: {e})"
        )
    plan = plan_text
    return plan_text, chat_history, chat_pairs, sources, plan


def finalize_and_doc(chat_history, chat_pairs, sources, plan, chat_key):
    """
    Generate a course outline and a Word document from the conversation and sources.

    This function calls the planner to create a textual plan, then writes the plan and
    references to a .docx file using the docx utility. It returns the plan text,
    updated state variables, and the path to the generated document.

    Args:
        chat_history: Conversation messages list.
        chat_pairs: Display pairs list.
        sources: List of collected resources (dictionaries with title and url).
        plan: The existing plan text (ignored here).
        chat_key: The key identifying the current chat (unused here but kept for consistency).

    Returns:
        Tuple of (plan_text, chat_history, chat_pairs, sources, plan_text, doc_path).
    """
    if chat_history is None:
        chat_history = []
    if sources is None:
        sources = []
    import json
    # Generate the course plan as structured JSON using the planner
    try:
        json_string = plan_course(chat_history, sources)
    except Exception as e:
        # Construct a minimal JSON error message safely using json.dumps to escape characters
        error_obj = {
            "error": "An error occurred while generating the course outline.",
            "details": str(e),
        }
        json_string = json.dumps(error_obj, indent=2)
    # Attempt to parse the JSON to ensure it is valid; if it fails, wrap as raw string
    try:
        parsed = json.loads(json_string)
    except Exception:
        parsed = None
    plan_text = json_string
    # Write the JSON outline to a file for download
    json_path = "/tmp/course_outline.json"
    try:
        with open(json_path, "w") as jf:
            jf.write(json_string)
    except Exception:
        json_path = None
    # Create a Word document from the JSON string; we simply embed the JSON as text into the document
    try:
        doc_path = outline_to_docx("Course Outline", plan_text, references=sources)
    except Exception as e:
        # If DOCX generation fails, create a temporary text file with an error
        err_msg = (
            "An error occurred while creating the Word document.\n"
            f"(Error: {e})"
        )
        tmp_path = "/tmp/outline_error.txt"
        with open(tmp_path, "w") as f:
            f.write(err_msg)
        doc_path = tmp_path
    # Record the generated JSON and document as attachments
    if chat_key:
        try:
            if json_path:
                add_attachment(chat_key, json_path, os.path.basename(json_path))
            if doc_path:
                add_attachment(chat_key, doc_path, os.path.basename(doc_path))
        except Exception:
            pass
    # Fetch updated attachment list
    attachments = []
    if chat_key:
        try:
            attachment_records = list_attachments(chat_key)
            attachments = [att.get("file_path") for att in attachment_records if att.get("file_path")]
        except Exception:
            attachments = []
    # Update plan state
    plan = plan_text
    return plan_text, chat_history, chat_pairs, sources, plan, doc_path, attachments

def generate_package(plan, sources, chat_key):
    """Generate the final course package zip file and record it as an attachment."""
    # Fallback: create a minimal plan if none exists
    if not plan:
        plan = "Course plan is empty."
    if sources is None:
        sources = []
    try:
        zip_path = generate_course_zip(plan, sources)
    except Exception as e:
        # On error, return a message as a text file inside an in-memory file path
        err_msg = (
            "An error occurred while generating the course package. Please check your API keys or input.\n"
            f"(Error: {e})"
        )
        tmp_path = "/tmp/error.txt"
        with open(tmp_path, "w") as f:
            f.write(err_msg)
        zip_path = tmp_path
    # Record the generated zip as an attachment
    if chat_key:
        try:
            add_attachment(chat_key, zip_path, os.path.basename(zip_path))
        except Exception:
            pass
    # Fetch updated attachment list
    attachments = []
    if chat_key:
        try:
            records = list_attachments(chat_key)
            attachments = [att.get("file_path") for att in records if att.get("file_path")]
        except Exception:
            attachments = []
    return zip_path, attachments

with gr.Blocks() as demo:
    gr.Markdown(
        """# Course Creator Agent
Chat with the assistant to brainstorm your course idea. Use the panel on the left to manage multiple chat sessions (create, rename, delete). You can ask the assistant to search the internet directly in the chat. When you're ready, click **Finalize Outline** to generate a course plan and a Word document. Then generate the final course package (ZIP)."""
    )
    # Global states
    state_chat_key = gr.State(new_chat())
    state_chat_history = gr.State([])
    state_chat_pairs = gr.State([])
    state_sources = gr.State([])
    state_plan = gr.State("")
    state_resource_cache = gr.State({})
    
    # Define layout with a sidebar for sessions and a main panel for chat
    with gr.Row():
        # Sidebar: manage chat sessions. We use a Dropdown instead of a Radio to allow custom
        # labels separate from the underlying chat keys. Each option will display the
        # chat title with a short portion of its key, but the actual value will be the
        # chat key itself. This makes renaming and deleting sessions more reliable.
        with gr.Column(scale=1, min_width=220):
            # Use a Radio component for session selection. Each choice will be a tuple
            # (label, value) where label is the human-readable title and value is
            # the underlying chat key. The value returned when a choice is
            # selected will be the chat key, which we can use to load the session.
            session_picker = gr.Radio(label="Your Chats", choices=[], value=None, interactive=True)
            new_chat_btn = gr.Button("New Chat")
            rename_input = gr.Textbox(label="Rename chat", placeholder="New title", lines=1)
            rename_btn = gr.Button("Rename")
            delete_btn = gr.Button("Delete Chat")
        # Main panel: chat, outline and generation actions
        with gr.Column(scale=4):
            chatbot = gr.Chatbot(label="Conversation", height=400)
            msg_input = gr.Textbox(
                label="Your message",
                placeholder="Type your message and press Enter",
                lines=1,
            )
            finalize_btn = gr.Button("Finalize Outline")
            plan_output = gr.Textbox(label="Course outline", interactive=False)
            doc_output = gr.File(label="Course outline (Word)")
            # Display any files generated during the chat session. This component
            # will show multiple attachments and allow downloading them.
            attachments_output = gr.File(label="Attachments", file_count="multiple")
            generate_btn = gr.Button("Generate Course Package")
            file_output = gr.File(label="course.zip")

    # Helper to refresh the sidebar session list. Returns an update for session_picker.
    def refresh_sessions():
        """Refresh the list of chat sessions for the sidebar.

        Each chat will be displayed by its title (or "Untitled" if none) followed by a short
        portion of its key in parentheses. The actual value of each dropdown choice will be
        the full chat key, while the displayed label will be this formatted string. The
        currently active chat key will be selected when possible.

        Returns:
            A gradio update for the ``session_picker`` dropdown component.
        """
        chats = list_chats()
        # Build a list of (label, value) tuples for the Radio. Each label shows
        # the chat title and a short portion of the key, while the value is the
        # full chat key. When a user selects an option, the value (chat key)
        # will be returned.
        options: list[tuple[str, str]] = []
        current_key = state_chat_key.value
        selected_key = None
        for c in chats:
            title = c.get("title") or "Untitled"
            key = c.get("key")
            label = f"{title} ({key[:8]})"
            options.append((label, key))
            if key == current_key:
                selected_key = key
        # If current chat not found (e.g. after deletion), pick the first one
        if not selected_key and options:
            selected_key = options[0][1]
        return gr.update(choices=options, value=selected_key)

    # Load the selected chat into memory and return display pairs/history
    def load_session(selected_key):
        """Load a chat session when selected in the sidebar.

        The session_picker dropdown returns the chat key directly. Load the
        conversation messages from the database and reconstruct the history and
        pairs for display. Sources, plan and resource_cache are reset.

        Args:
            selected_key: The chat key selected from the dropdown. If None,
                returns empty state.

        Returns:
            Tuple of (chatbot_pairs, chat_history, chat_pairs, sources, plan, resource_cache).
        """
        if not selected_key:
            return [], [], [], [], "", {}
        # Update the global state for the current chat key
        state_chat_key.value = selected_key
        msgs = load_chat(selected_key)
        history = []
        pairs = []
        buffer = []
        for msg in msgs:
            role = msg["role"]
            content = msg["content"]
            history.append({"role": role, "content": content})
            if role == "user":
                buffer = [content, ""]
            else:
                if buffer:
                    buffer[1] = content
                    pairs.append(tuple(buffer))
                    buffer = []
        # Load any previously generated attachments for this chat
        try:
            attachment_records = list_attachments(selected_key)
            attachments = [att.get("file_path") for att in attachment_records if att.get("file_path")]
        except Exception:
            attachments = []
        return pairs, history, pairs, [], "", {}, attachments

    # Create a new chat session and return the new key
    def handle_new_chat():
        key = new_chat()
        return key

    # Rename the current chat session
    def handle_rename(chat_key, new_title):
        if chat_key and new_title:
            rename_chat(chat_key, new_title)
        return ""

    # Delete the current chat session and return a new key to switch to
    def handle_delete(chat_key):
        if chat_key:
            delete_chat(chat_key)
        chats = list_chats()
        if chats:
            return chats[0]["key"]
        else:
            return new_chat()

    # Initialize session list on load
    demo.load(
        lambda: refresh_sessions(),
        None,
        [session_picker],
    )
    # When a session is selected, load it along with its attachments
    session_picker.change(
        load_session,
        inputs=session_picker,
        outputs=[chatbot, state_chat_history, state_chat_pairs, state_sources, state_plan, state_resource_cache, attachments_output],
    )
    # New chat button
    new_chat_btn.click(
        handle_new_chat,
        inputs=None,
        outputs=state_chat_key,
    ).then(
        lambda: refresh_sessions(),
        None,
        [session_picker],
    ).then(
        load_session,
        inputs=state_chat_key,
        outputs=[chatbot, state_chat_history, state_chat_pairs, state_sources, state_plan, state_resource_cache, attachments_output],
    )
    # Rename button
    rename_btn.click(
        handle_rename,
        inputs=[state_chat_key, rename_input],
        outputs=rename_input,
    ).then(
        lambda: refresh_sessions(),
        None,
        [session_picker],
    )
    # Delete button
    delete_btn.click(
        handle_delete,
        inputs=state_chat_key,
        outputs=state_chat_key,
    ).then(
        lambda: refresh_sessions(),
        None,
        [session_picker],
    ).then(
        load_session,
        inputs=state_chat_key,
        outputs=[chatbot, state_chat_history, state_chat_pairs, state_sources, state_plan, state_resource_cache, attachments_output],
    )
    # Chat submission: include chat_key for persistence
    msg_input.submit(
        chat,
        inputs=[msg_input, state_chat_history, state_chat_pairs, state_sources, state_plan, state_resource_cache, state_chat_key],
        outputs=[chatbot, state_chat_history, state_chat_pairs, state_sources, state_plan, state_resource_cache],
    )
    # Finalise outline and produce Word doc, recording the doc as an attachment
    finalize_btn.click(
        finalize_and_doc,
        inputs=[state_chat_history, state_chat_pairs, state_sources, state_plan, state_chat_key],
        outputs=[plan_output, state_chat_history, state_chat_pairs, state_sources, state_plan, doc_output, attachments_output],
    )
    # Generate course package (zip) and record it as an attachment
    generate_btn.click(
        generate_package,
        inputs=[state_plan, state_sources, state_chat_key],
        outputs=[file_output, attachments_output],
    )
    demo.launch()