import os import gradio as gr import openai from planner import plan_course from generators import generate_course_zip from searcher import web_search, fetch_and_extract, get_youtube_transcript # Bring in DB helpers to persist resources if needed later # Import DB helpers. We include list_chats, rename_chat and delete_chat to support # persistent chat sessions and management actions. The resource helpers allow # fetching cached resources if needed. from db import ( get_resource, upsert_resource, list_resources, new_chat, append_message, load_chat, soft_delete_message, list_chats, rename_chat, delete_chat, add_attachment, list_attachments, ) # Import the docx utility to generate Word documents for course outlines. from docx_utils import outline_to_docx # System prompt guiding the assistant's behaviour during brainstorming SYSTEM_PROMPT = ( "You are a helpful course planning assistant. Conduct brainstorming with the user " "about their course idea. Offer suggestions, ask clarifying questions, and capture their requirements. " "When the user feels ready, they will click 'Finalize Outline' to create a course plan using all prior " "conversation and gathered resources." ) def chat(user_message, chat_history, chat_pairs, sources, plan, resource_cache, chat_key): """ Handle a user chat message and return updated chat state. This version persists messages to the database by inserting user and assistant messages into the `messages` table keyed by `chat_key`. It also returns updated state variables for Gradio to reflect the conversation. Args: user_message: The latest user input from the textbox. chat_history: List of message dicts representing the full conversation. chat_pairs: List of (user, assistant) tuples for display in the Chatbot. sources: List of collected resource dicts with 'title' and 'url'. plan: The current course plan text (unused here). resource_cache: Dictionary caching search results by query. chat_key: The unique key identifying the current chat session. Returns: Tuple of updated (chat_pairs, chat_history, chat_pairs, sources, plan, resource_cache). """ # Ensure lists/dicts are initialised if chat_history is None: chat_history = [] if chat_pairs is None: chat_pairs = [] if resource_cache is None: resource_cache = {} # Persist the user message to the database if a chat key is provided if chat_key: try: append_message(chat_key, "user", user_message) except Exception: # Ignore DB errors; continue without persistence pass # Append the user's message to the conversation history (list of dictionaries for Chatbot) chat_history.append({"role": "user", "content": user_message}) # Build messages including system prompt for API call messages = [{"role": "system", "content": SYSTEM_PROMPT}] + chat_history # Check if the user message contains a URL to open and read. url = None for part in user_message.split(): if part.startswith("http://") or part.startswith("https://"): url = part break if url: # If the message contains a URL, attempt to fetch and summarise it using our extraction helpers. try: # Detect YouTube links and fetch transcript if "youtube.com" in url or "youtu.be" in url: try: transcript_text = get_youtube_transcript(url) except Exception: transcript_text = "" page_content = transcript_text or "" page_title = url else: record = fetch_and_extract(url) if record: page_content = record.get("excerpt", "") page_title = record.get("title", url) else: page_content = "" page_title = url if not page_content: assistant_reply = "I couldn't extract content from that page." else: # Summarise the extracted content using OpenAI try: model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo") temperature = float(os.getenv("TEMPERATURE", "0.7")) max_tokens = int(os.getenv("MAX_OUTPUT_TOKENS", "512")) api_key = os.getenv("OPENAI_API_KEY") or os.getenv("COURSECREATOR_API_KEY") if not api_key: raise ValueError("OPENAI_API_KEY or COURSECREATOR_API_KEY is not set") summary_system = "You are a helpful assistant. Summarize the given content in a concise and clear way." truncated_content = page_content[:8000] summary_messages = [ {"role": "system", "content": summary_system}, {"role": "user", "content": truncated_content}, ] if hasattr(openai, "OpenAI"): client = openai.OpenAI(api_key=api_key) try: resp = client.chat.completions.create( model=model, messages=summary_messages, temperature=temperature, max_tokens=max_tokens, ) except Exception: resp = client.chat.completions.create( model=model, messages=summary_messages, temperature=temperature, max_completion_tokens=max_tokens, ) assistant_reply = resp.choices[0].message.content else: openai.api_key = api_key try: resp = openai.ChatCompletion.create( model=model, messages=summary_messages, temperature=temperature, max_tokens=max_tokens, ) except Exception: resp = openai.ChatCompletion.create( model=model, messages=summary_messages, temperature=temperature, max_completion_tokens=max_tokens, ) assistant_reply = resp["choices"][0]["message"]["content"] except Exception as e: assistant_reply = ( "An error occurred while summarizing the page content. Please ensure your OpenAI API key is configured.\n" f"(Error: {e})" ) except Exception as e: assistant_reply = ( "An error occurred while extracting the web page. Please ensure your search API key is configured.\n" f"(Error: {e})" ) else: # Determine if the user is requesting a web search. If so, perform the search instead # of calling the language model. This allows the assistant to fetch resources when # the user asks the agent to "search" or "search the internet". # A message triggers a search if it explicitly asks to search or find articles. # We check for common phrases like "search", "find" combined with "articles" or "resources". search_triggers = [ "search", "internet search", "web search", "find articles", "find 5 articles", "find five articles", "find resources", ] lower_msg = user_message.lower().strip() # Determine if a search should be performed: # if the message contains the word "search" anywhere, or contains "find" and "article". do_search = False if any(trig in lower_msg for trig in search_triggers): do_search = True elif "find" in lower_msg and ("article" in lower_msg or "articles" in lower_msg or "resource" in lower_msg): do_search = True if do_search: # Determine the query string from the user's message. # We remove a leading search trigger phrase if present (e.g. "search", "find articles"). query = user_message removed = False for trig in search_triggers: if lower_msg.startswith(trig): # Drop the trigger prefix and any surrounding punctuation query = user_message[len(trig):].strip() removed = True break # If the message starts with "find", remove "find" and any optional number + article/resource words if not removed and lower_msg.startswith("find"): import re pattern = r"^find\s+(?:\d+\s+)?(?:articles?|resources?)\s*" query = re.sub(pattern, "", user_message, flags=re.IGNORECASE).strip() # Further clean the query by extracting quoted phrases or topic descriptors. import re as _re # If the query contains quoted text, use the quoted portion as the search term match = _re.search(r"[\"']([^\"']+)[\"']", query) if match: query = match.group(1).strip() else: # Look for phrases following 'about' or 'on' as a topic indicator m2 = _re.search(r"\b(?:about|on)\s+([^.,;!?]+)", query, flags=_re.IGNORECASE) if m2: query = m2.group(1).strip() # Remove trailing instructions like 'provide summaries' etc. # Discard anything after a directive word such as 'summarize', 'summaries', 'provide', or 'examples' query = _re.split(r"\b(?:summarize|summaries|provide|examples|use cases|case studies)\b", query, maxsplit=1, flags=_re.IGNORECASE)[0].strip() or query # If the query is empty or appears generic (e.g. just 'the internet' or 'articles'), # fall back to the most recent non-search user message in the conversation. This # helps preserve context by using the course topic as the search term when the # user simply says "search the internet" or "find articles" without specifying a subject. generic_queries = {"", "the internet", "internet", "relevant articles", "articles", "5 articles", "5 relevant articles"} if query.lower() in generic_queries: fallback_query = None # Search backwards through chat_history for the last user message that wasn't a search request for past in reversed(chat_history): if past.get("role") == "user": past_text = past.get("content", "").lower() # Skip messages that themselves triggered a search if not any(trig in past_text for trig in search_triggers): fallback_query = past.get("content", "").strip() break if fallback_query: query = fallback_query else: # As a final fallback, use the original user message (after removing the trigger) query = user_message.strip() try: # Use cached search results if available for this query key (case-insensitive) query_key = query.lower() if query_key in resource_cache: search_results = resource_cache[query_key] else: # Use our wrapped web_search for better domain filtering and consistent return type. # We pass a list of allowed domains to prefer reputable sources (e.g. .edu, .org, .gov and some tech blogs). allowed_domains = [ ".edu", ".org", ".gov", "arxiv.org", "kdnuggets.com", "towardsdatascience.com", "datacamp.com", "medium.com", ] search_results = web_search(query, max_results=5, allowed_domains=allowed_domains) resource_cache[query_key] = search_results # Iterate over search results, fetch their content, cache resources and summarise summaries = [] if sources is None: sources = [] existing_urls = {src.get("url") for src in sources if isinstance(src, dict) and src.get("url")} # For each result (should be a dict with 'url' and 'title') for item in search_results: if not isinstance(item, dict): continue url = item.get("url") title = item.get("title", url) if not url or url in existing_urls: continue # Fetch and cache resource content record = fetch_and_extract(url) if not record: # Skip if unable to fetch continue # Add to sources for plan generation (avoid duplicates) sources.append({"title": record.get("title", title), "url": record.get("url", url)}) existing_urls.add(url) # Summarise the resource's excerpt using OpenAI try: model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo") temperature = float(os.getenv("TEMPERATURE", "0.7")) max_tokens = int(os.getenv("MAX_OUTPUT_TOKENS", "256")) api_key = os.getenv("OPENAI_API_KEY") or os.getenv("COURSECREATOR_API_KEY") if not api_key: raise ValueError("OPENAI_API_KEY or COURSECREATOR_API_KEY is not set") summary_system = "You are a helpful assistant. Summarize the following article excerpt in one paragraph." excerpt = record.get("excerpt", "")[:3000] summary_messages = [ {"role": "system", "content": summary_system}, {"role": "user", "content": excerpt}, ] if hasattr(openai, "OpenAI"): client = openai.OpenAI(api_key=api_key) try: resp = client.chat.completions.create( model=model, messages=summary_messages, temperature=temperature, max_tokens=max_tokens, ) except Exception: resp = client.chat.completions.create( model=model, messages=summary_messages, temperature=temperature, max_completion_tokens=max_tokens, ) summary_text = resp.choices[0].message.content else: openai.api_key = api_key try: resp = openai.ChatCompletion.create( model=model, messages=summary_messages, temperature=temperature, max_tokens=max_tokens, ) except Exception: resp = openai.ChatCompletion.create( model=model, messages=summary_messages, temperature=temperature, max_completion_tokens=max_tokens, ) summary_text = resp["choices"][0]["message"]["content"] except Exception as se: # If summarization fails, just include the title and URL without a summary summary_text = "" # Compose summary line with link and summary line = f"**{title}** ({url})" if summary_text: line += f"\n{summary_text.strip()}" summaries.append(line) if summaries: assistant_reply = "Here are some articles I found and summarised:\n\n" + "\n\n".join(summaries) else: assistant_reply = "I couldn't fetch or summarise any credible articles for that query." except Exception as e: assistant_reply = ( "An error occurred during web search and summarisation. Please ensure your API keys are configured.\n" f"(Error: {e})" ) else: # Call OpenAI's ChatCompletion to get assistant's reply try: # Use a widely supported default model; older OpenAI SDKs (pinned below v1) # do not recognise newer model names like gpt-5. Default to gpt-3.5-turbo # but allow overriding via the OPENAI_MODEL env variable. model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo") temperature = float(os.getenv("TEMPERATURE", "0.7")) max_tokens = int(os.getenv("MAX_OUTPUT_TOKENS", "1024")) # Support alternative secret name COURSECREATOR_API_KEY as a fallback for the OpenAI API key api_key = os.getenv("OPENAI_API_KEY") or os.getenv("COURSECREATOR_API_KEY") if not api_key: raise ValueError("OPENAI_API_KEY or COURSECREATOR_API_KEY is not set") # Prefer the new OpenAI SDK (>=1.0) if available if hasattr(openai, "OpenAI"): client = openai.OpenAI(api_key=api_key) # Try sending max_tokens; if unsupported, retry with max_completion_tokens try: response = client.chat.completions.create( model=model, messages=messages, temperature=temperature, max_tokens=max_tokens, ) except Exception: # Some newer models (e.g. o1 series) do not support max_tokens response = client.chat.completions.create( model=model, messages=messages, temperature=temperature, max_completion_tokens=max_tokens, ) assistant_reply = response.choices[0].message.content else: # Legacy OpenAI SDK (<1.0) openai.api_key = api_key try: response = openai.ChatCompletion.create( model=model, messages=messages, temperature=temperature, max_tokens=max_tokens, ) except Exception: # Fallback for models that require max_completion_tokens response = openai.ChatCompletion.create( model=model, messages=messages, temperature=temperature, max_completion_tokens=max_tokens, ) assistant_reply = response["choices"][0]["message"]["content"] except Exception as e: # When the API call fails (e.g. missing API key), return an error message assistant_reply = ( "An error occurred while processing your message. " "Please ensure your OpenAI API key is configured in the Space secrets.\n" f"(Error: {e})" ) # Append assistant reply to conversation history chat_history.append({"role": "assistant", "content": assistant_reply}) # Persist assistant message to the database if chat_key: try: append_message(chat_key, "assistant", assistant_reply) except Exception: pass # Append pair to display history for any other uses (kept for compatibility) chat_pairs.append((user_message, assistant_reply)) # For Chatbot with default type (list of (user, assistant) tuples), return chat_pairs as the first output return chat_pairs, chat_history, chat_pairs, sources, plan, resource_cache def run_search(query, chat_history, chat_pairs, sources, plan, num_results=5, domain_filter=""): """Execute a web search and update sources list.""" # perform search using provided searcher try: results = run_web_search(query, num_results=num_results, domain_filter=domain_filter) except Exception as e: # handle search errors (e.g. missing API key) results = [] summary = ( "An error occurred during web search. Please ensure your search API key is configured.\n" f"(Error: {e})" ) return summary, chat_history, chat_pairs, sources or [], plan # Normalize the results: Tavily may return a dict with 'results' if isinstance(results, dict): normalized_results = results.get("results", []) elif isinstance(results, list): normalized_results = results else: normalized_results = [] if sources is None: sources = [] sources.extend(normalized_results) # summarise results into a string for display summary_lines = [] for r in normalized_results: if isinstance(r, dict): title = r.get("title", "") url = r.get("url", "") summary_lines.append(f"{title} - {url}") summary = "\n".join(summary_lines) return summary, chat_history, chat_pairs, sources, plan def finalize_outline(chat_history, chat_pairs, sources, plan): """Generate a course outline based on the conversation and collected sources (text only).""" if chat_history is None: chat_history = [] if sources is None: sources = [] # use the planner to create the plan try: plan_text = plan_course(chat_history, sources) except Exception as e: plan_text = ( "An error occurred while generating the course outline. Please ensure your API keys are configured.\n" f"(Error: {e})" ) plan = plan_text return plan_text, chat_history, chat_pairs, sources, plan def finalize_and_doc(chat_history, chat_pairs, sources, plan, chat_key): """ Generate a course outline and a Word document from the conversation and sources. This function calls the planner to create a textual plan, then writes the plan and references to a .docx file using the docx utility. It returns the plan text, updated state variables, and the path to the generated document. Args: chat_history: Conversation messages list. chat_pairs: Display pairs list. sources: List of collected resources (dictionaries with title and url). plan: The existing plan text (ignored here). chat_key: The key identifying the current chat (unused here but kept for consistency). Returns: Tuple of (plan_text, chat_history, chat_pairs, sources, plan_text, doc_path). """ if chat_history is None: chat_history = [] if sources is None: sources = [] import json # Generate the course plan as structured JSON using the planner try: json_string = plan_course(chat_history, sources) except Exception as e: # Construct a minimal JSON error message safely using json.dumps to escape characters error_obj = { "error": "An error occurred while generating the course outline.", "details": str(e), } json_string = json.dumps(error_obj, indent=2) # Attempt to parse the JSON to ensure it is valid; if it fails, wrap as raw string try: parsed = json.loads(json_string) except Exception: parsed = None plan_text = json_string # Write the JSON outline to a file for download json_path = "/tmp/course_outline.json" try: with open(json_path, "w") as jf: jf.write(json_string) except Exception: json_path = None # Create a Word document from the JSON string; we simply embed the JSON as text into the document try: doc_path = outline_to_docx("Course Outline", plan_text, references=sources) except Exception as e: # If DOCX generation fails, create a temporary text file with an error err_msg = ( "An error occurred while creating the Word document.\n" f"(Error: {e})" ) tmp_path = "/tmp/outline_error.txt" with open(tmp_path, "w") as f: f.write(err_msg) doc_path = tmp_path # Record the generated JSON and document as attachments if chat_key: try: if json_path: add_attachment(chat_key, json_path, os.path.basename(json_path)) if doc_path: add_attachment(chat_key, doc_path, os.path.basename(doc_path)) except Exception: pass # Fetch updated attachment list attachments = [] if chat_key: try: attachment_records = list_attachments(chat_key) attachments = [att.get("file_path") for att in attachment_records if att.get("file_path")] except Exception: attachments = [] # Update plan state plan = plan_text return plan_text, chat_history, chat_pairs, sources, plan, doc_path, attachments def generate_package(plan, sources, chat_key): """Generate the final course package zip file and record it as an attachment.""" # Fallback: create a minimal plan if none exists if not plan: plan = "Course plan is empty." if sources is None: sources = [] try: zip_path = generate_course_zip(plan, sources) except Exception as e: # On error, return a message as a text file inside an in-memory file path err_msg = ( "An error occurred while generating the course package. Please check your API keys or input.\n" f"(Error: {e})" ) tmp_path = "/tmp/error.txt" with open(tmp_path, "w") as f: f.write(err_msg) zip_path = tmp_path # Record the generated zip as an attachment if chat_key: try: add_attachment(chat_key, zip_path, os.path.basename(zip_path)) except Exception: pass # Fetch updated attachment list attachments = [] if chat_key: try: records = list_attachments(chat_key) attachments = [att.get("file_path") for att in records if att.get("file_path")] except Exception: attachments = [] return zip_path, attachments with gr.Blocks() as demo: gr.Markdown( """# Course Creator Agent Chat with the assistant to brainstorm your course idea. Use the panel on the left to manage multiple chat sessions (create, rename, delete). You can ask the assistant to search the internet directly in the chat. When you're ready, click **Finalize Outline** to generate a course plan and a Word document. Then generate the final course package (ZIP).""" ) # Global states state_chat_key = gr.State(new_chat()) state_chat_history = gr.State([]) state_chat_pairs = gr.State([]) state_sources = gr.State([]) state_plan = gr.State("") state_resource_cache = gr.State({}) # Define layout with a sidebar for sessions and a main panel for chat with gr.Row(): # Sidebar: manage chat sessions. We use a Dropdown instead of a Radio to allow custom # labels separate from the underlying chat keys. Each option will display the # chat title with a short portion of its key, but the actual value will be the # chat key itself. This makes renaming and deleting sessions more reliable. with gr.Column(scale=1, min_width=220): # Use a Radio component for session selection. Each choice will be a tuple # (label, value) where label is the human-readable title and value is # the underlying chat key. The value returned when a choice is # selected will be the chat key, which we can use to load the session. session_picker = gr.Radio(label="Your Chats", choices=[], value=None, interactive=True) new_chat_btn = gr.Button("New Chat") rename_input = gr.Textbox(label="Rename chat", placeholder="New title", lines=1) rename_btn = gr.Button("Rename") delete_btn = gr.Button("Delete Chat") # Main panel: chat, outline and generation actions with gr.Column(scale=4): chatbot = gr.Chatbot(label="Conversation", height=400) msg_input = gr.Textbox( label="Your message", placeholder="Type your message and press Enter", lines=1, ) finalize_btn = gr.Button("Finalize Outline") plan_output = gr.Textbox(label="Course outline", interactive=False) doc_output = gr.File(label="Course outline (Word)") # Display any files generated during the chat session. This component # will show multiple attachments and allow downloading them. attachments_output = gr.File(label="Attachments", file_count="multiple") generate_btn = gr.Button("Generate Course Package") file_output = gr.File(label="course.zip") # Helper to refresh the sidebar session list. Returns an update for session_picker. def refresh_sessions(): """Refresh the list of chat sessions for the sidebar. Each chat will be displayed by its title (or "Untitled" if none) followed by a short portion of its key in parentheses. The actual value of each dropdown choice will be the full chat key, while the displayed label will be this formatted string. The currently active chat key will be selected when possible. Returns: A gradio update for the ``session_picker`` dropdown component. """ chats = list_chats() # Build a list of (label, value) tuples for the Radio. Each label shows # the chat title and a short portion of the key, while the value is the # full chat key. When a user selects an option, the value (chat key) # will be returned. options: list[tuple[str, str]] = [] current_key = state_chat_key.value selected_key = None for c in chats: title = c.get("title") or "Untitled" key = c.get("key") label = f"{title} ({key[:8]})" options.append((label, key)) if key == current_key: selected_key = key # If current chat not found (e.g. after deletion), pick the first one if not selected_key and options: selected_key = options[0][1] return gr.update(choices=options, value=selected_key) # Load the selected chat into memory and return display pairs/history def load_session(selected_key): """Load a chat session when selected in the sidebar. The session_picker dropdown returns the chat key directly. Load the conversation messages from the database and reconstruct the history and pairs for display. Sources, plan and resource_cache are reset. Args: selected_key: The chat key selected from the dropdown. If None, returns empty state. Returns: Tuple of (chatbot_pairs, chat_history, chat_pairs, sources, plan, resource_cache). """ if not selected_key: return [], [], [], [], "", {} # Update the global state for the current chat key state_chat_key.value = selected_key msgs = load_chat(selected_key) history = [] pairs = [] buffer = [] for msg in msgs: role = msg["role"] content = msg["content"] history.append({"role": role, "content": content}) if role == "user": buffer = [content, ""] else: if buffer: buffer[1] = content pairs.append(tuple(buffer)) buffer = [] # Load any previously generated attachments for this chat try: attachment_records = list_attachments(selected_key) attachments = [att.get("file_path") for att in attachment_records if att.get("file_path")] except Exception: attachments = [] return pairs, history, pairs, [], "", {}, attachments # Create a new chat session and return the new key def handle_new_chat(): key = new_chat() return key # Rename the current chat session def handle_rename(chat_key, new_title): if chat_key and new_title: rename_chat(chat_key, new_title) return "" # Delete the current chat session and return a new key to switch to def handle_delete(chat_key): if chat_key: delete_chat(chat_key) chats = list_chats() if chats: return chats[0]["key"] else: return new_chat() # Initialize session list on load demo.load( lambda: refresh_sessions(), None, [session_picker], ) # When a session is selected, load it along with its attachments session_picker.change( load_session, inputs=session_picker, outputs=[chatbot, state_chat_history, state_chat_pairs, state_sources, state_plan, state_resource_cache, attachments_output], ) # New chat button new_chat_btn.click( handle_new_chat, inputs=None, outputs=state_chat_key, ).then( lambda: refresh_sessions(), None, [session_picker], ).then( load_session, inputs=state_chat_key, outputs=[chatbot, state_chat_history, state_chat_pairs, state_sources, state_plan, state_resource_cache, attachments_output], ) # Rename button rename_btn.click( handle_rename, inputs=[state_chat_key, rename_input], outputs=rename_input, ).then( lambda: refresh_sessions(), None, [session_picker], ) # Delete button delete_btn.click( handle_delete, inputs=state_chat_key, outputs=state_chat_key, ).then( lambda: refresh_sessions(), None, [session_picker], ).then( load_session, inputs=state_chat_key, outputs=[chatbot, state_chat_history, state_chat_pairs, state_sources, state_plan, state_resource_cache, attachments_output], ) # Chat submission: include chat_key for persistence msg_input.submit( chat, inputs=[msg_input, state_chat_history, state_chat_pairs, state_sources, state_plan, state_resource_cache, state_chat_key], outputs=[chatbot, state_chat_history, state_chat_pairs, state_sources, state_plan, state_resource_cache], ) # Finalise outline and produce Word doc, recording the doc as an attachment finalize_btn.click( finalize_and_doc, inputs=[state_chat_history, state_chat_pairs, state_sources, state_plan, state_chat_key], outputs=[plan_output, state_chat_history, state_chat_pairs, state_sources, state_plan, doc_output, attachments_output], ) # Generate course package (zip) and record it as an attachment generate_btn.click( generate_package, inputs=[state_plan, state_sources, state_chat_key], outputs=[file_output, attachments_output], ) demo.launch()