File size: 50,098 Bytes
e7e4b4c 5c886b9 072d156 e7e4b4c 5c886b9 e7e4b4c b71fe32 e7e4b4c b71fe32 e7e4b4c df84eb3 e7e4b4c df84eb3 e7e4b4c df84eb3 e7e4b4c df84eb3 e7e4b4c df84eb3 e7e4b4c df84eb3 e7e4b4c df84eb3 e7e4b4c d55cf20 df84eb3 e7e4b4c df84eb3 e7e4b4c df84eb3 e7e4b4c df84eb3 e7e4b4c 0463b71 df84eb3 6280240 df84eb3 e7e4b4c df84eb3 e7e4b4c df84eb3 e7e4b4c df84eb3 e7e4b4c b71fe32 e7e4b4c b71fe32 e7e4b4c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 |
# Standard Libraries
import os
import json
import nbformat
import shutil
import sys
import subprocess
import re
from pathlib import Path
from typing import Any, Dict, Optional
#from IPython.display import Image, display
# LangChain and OpenAI related imports
from langchain_openai import ChatOpenAI # LLM for invoking prompts
from langchain.schema import SystemMessage
#from langchain.chat_models import ChatOpenAI
import papermill as pm
from datetime import datetime
from typing import Any, Dict
from typing_extensions import TypedDict
from workflow_utils import (
extract_json_from_response,
get_notebook_blueprint,
is_plot_code,
stage_dependencies,
build_plot_insight_agent_executor)
# Fetch secrets from Hugging Face Spaces
api_key = os.environ['AZURE_OPENAI_KEY']
endpoint = os.environ['AZURE_OPENAI_ENDPOINT']
# Setting up LLM
## Create a ChatOpenAI model instance using LangChain
model = ChatOpenAI(
openai_api_base= endpoint,
openai_api_key= api_key,
model="gpt-4o",
streaming=False # Explicitly disabling streaming
)
#**Case Study Generator - Prod**
# **Interactive Case Study State**
class InteractiveCaseStudyState(TypedDict):
"""
Shared state for the interactive, human-in-the-loop Case Study generation workflow.
This state dictionary is passed and updated across all workflow nodes.
Each node reads from and writes to this state, and human review/enhancer nodes
use it to store feedback, approval flags, and intermediate artifacts.
"""
# ─── User Inputs ────────────────────────────────────────────────────────────
domain: str
# Domain of the case study (e.g., "ml", "dl", "nlp", "cv", "rag", "genai", etc.)
topics_and_subtopics: str
# User-specified weekly topics and subtopics for notebook content focus.
problem_statement: str
# The core business problem or use case the notebook should solve.
dataset_type: str
# Type of dataset uploaded. Accepted values: "csv", "pdf", "images", "json".
# Drives downstream logic for loading, preprocessing, and code generation.
dataset_file_path: str
# Automatically populated backend path to the uploaded dataset
# (CSV, PDF folder, image folder, etc.). Not entered by the user.
data_dictionary: Dict[str, str]
# Column→description mapping for structured data.
# For PDFs or images, may include folder structure or document descriptions.
additional_instructions: str
# Optional user guidance—preprocessing steps, desired models, notebook style, etc.
# If omitted, the LLM uses sensible defaults for the chosen domain.
# ─── Intermediate Artifacts ────────────────────────────────────────────────
blueprint: str
# Text blueprint selected based on `domain`, used to guide Think-Sections.
plan: Dict[str, Any]
# Notebook outline (JSON) generated by the Think-Sections node.
raw_notebook: Dict[str, Any]
# Notebook skeleton with markdown & code cells but no outputs.
executed_notebook: Dict[str, Any]
# Notebook JSON after Papermill execution—includes real outputs.
annotated_notebook: Dict[str, Any]
# Executed notebook interleaved with Observations cells.
final_notebook: Dict[str, Any]
# Final notebook JSON (execution + insights), ready for download.
#─── Execution Error Capture ──────────────────────────────────────────────
execution_error: str
#If execution failed, the captured exception text from Papermill.
#─── Human-in-the-Loop Flags & Feedback ─────────────────────────────────────
_plan_approved: Optional[bool]
# False by default. Set True when the user approves the plan.
plan_feedback: str
# If _plan_approved is False, the user's feedback on how to improve the plan.
_notebook_approved: Optional[bool]
# False by default. Set True when the user approves the code skeleton.
notebook_feedback: str
# If _notebook_approved is False, the user's feedback on the notebook before execution.
_execution_approved: Optional[bool]
# False by default. Set True when the user approves the executed notebook.
execution_feedback: str
# If _execution_approved is False, the user's feedback or notes about runtime errors.
## **Think-Sections**
def think_sections_node(state):
"""
Think‑Sections Node:
- Reads user inputs including domain, topics, problem, dataset_file_path,
data_dictionary, and optional additional_instructions.
- Retrieves a domain blueprint via get_notebook_blueprint().
- Prompts the LLM to generate a structured JSON-based notebook outline.
"""
import json
from langchain.schema import SystemMessage
# 1. Extract inputs from state
domain = state["domain"]
topics = state["topics_and_subtopics"]
problem = state["problem_statement"]
dataset_type = state["dataset_type"]
dataset_path = state["dataset_file_path"]
data_dict = state.get("data_dictionary", {})
instructions = state.get("additional_instructions", "")
# 2. Get domain-specific blueprint
blueprint = get_notebook_blueprint(domain)
state["blueprint"] = blueprint
# 3. Build the prompt
prompt = f"""
You are a world-class AI curriculum architect and notebook planner.
## Domain Blueprint
Below is the domain-specific blueprint for **{domain}**:
{blueprint}
---
## Task
Design a structured, beginner-friendly, and pedagogically sound **notebook plan** in JSON format.
This notebook will help users solve a real-world problem using their data and provided topics.
---
## User Inputs
**Domain**: {domain}
**Topics and Subtopics**:
{topics}
**Problem Statement**:
{problem}
**Dataset Type**:
{dataset_type}
**Dataset Location**:
{dataset_path}
**Data Dictionary**:
{json.dumps(data_dict, indent=2)}
**Additional Instructions**:
{instructions if instructions.strip() else "None provided"}
---
## Output Format (JSON Only)
Return a JSON object with a top-level key `"sections"` — an **ordered list** of notebook sections.
Each section must follow the rules below:
### Section (1st-level)
- Represented as a heading with `#` markdown
- Keys:
- `title`: Section heading
- `type`: `"markdown"`
- `subsections`: List of second-level content blocks
### Subsection (2nd-level)
- Represented using `##` markdown
- Keys:
- `title`: Subsection heading
- `type`: `"markdown"` or `"code"`
- For `markdown`:
- `description`: Markdown block with supporting explanation
- For `code`:
- `code_rules`: List of instructions or rules that must be implemented in code
---
## Constraints to Follow
- Use names from `data_dictionary` or inferred data context when referring to columns or fields.
- Always begin with a section for the **Problem Statement** (with subsections for Business Context and Data Dictionary).
- Align section structure with domain blueprint and user context.
- Respect all user-given topics, problem, and instructions.
- Return **only valid JSON**—no extra markdown or commentary outside the JSON object.
---
Now generate the structured JSON plan for the notebook.
""".strip()
# 4. Invoke LLM and parse
response = model.invoke([SystemMessage(content=prompt)])
plan = extract_json_from_response(response.content)
# 5. Update state
state["plan"] = plan
# state["_plan_approved"] = False
# state["plan_feedback"] = ""
return state
# def think_sections_node(state):
# """
# Think-Sections Node:
# - Reads user inputs including domain, topics, problem, dataset_file_path,
# data_dictionary, and optional additional_instructions.
# - Retrieves a domain blueprint via get_notebook_blueprint().
# - Prompts the LLM to generate a detailed, text-based notebook plan with
# granular code cell operations, aiming for 70-80 total cells.
# """
# import json
# from langchain.schema import SystemMessage
# # 1. Extract inputs from state
# domain = state["domain"]
# topics = state["topics_and_subtopics"]
# problem = state["problem_statement"]
# dataset_type = state["dataset_type"]
# dataset_path = state["dataset_file_path"]
# data_dict = state.get("data_dictionary", {})
# instructions = state.get("additional_instructions", "")
# # 2. Get domain-specific blueprint
# # This function would now return the text-based blueprint (e.g., EDA_blueprint_text, ML_blueprint_text)
# # as described in our previous discussion, NOT the high-level JSON structure.
# # For demonstration, I'm using a placeholder. In a real system, you'd fetch the actual text.
# blueprint = get_notebook_blueprint(domain)
# state["blueprint"] = blueprint # Store the blueprint for later use if needed
# # 3. Build the prompt
# prompt = f"""
# You are a world-class AI curriculum architect and notebook planner. Your task is to generate a highly detailed,
# step-by-step notebook plan for a user, ensuring it's beginner-friendly and pedagogically sound.
# ## Your Goal
# Your primary goal is to **expand the provided domain blueprint into a complete, operational notebook plan**.
# This plan must meticulously detail every single operation, especially within code cells,
# to guide the user through solving their problem. The final plan should aim for a total of **70 to 80 cells**.
# ---
# ## Domain Blueprint for {domain}
# This is the comprehensive guideline for structuring a notebook in the **{domain}** domain. You must strictly
# adhere to its sections, subsections, and overall flow. For each section, and particularly for each code operation,
# you will expand on the general rule to create highly specific, actionable steps.
# {blueprint}
# ---
# ## User Inputs to Contextualize the Plan
# Here are the specific details provided by the user. You must integrate these details to
# make the plan highly relevant and actionable for their specific case.
# **Domain**: {domain}
# **Topics and Subtopics**: {topics}
# * **CRITICAL**: Only include techniques and operations that fall under these specified weekly topics.
# Do NOT go beyond this scope.
# **Problem Statement**: {problem}
# * **CRITICAL**: Every step and explanation in the plan must directly contribute to addressing this business problem.
# **Dataset Type**: {dataset_type}
# **Dataset Location**: {dataset_path}
# **Data Dictionary**: {json.dumps(data_dict, indent=2)}
# * **CRITICAL**: Refer to specific columns/fields from the Data Dictionary and dataset details
# (e.g., shape, columns, types) when describing operations. Tailor operations to the specific
# characteristics of this dataset.
# **Additional Instructions**: {instructions if instructions.strip() else "None provided"}
# * Integrate any additional instructions provided by the user.
# ---
# ## Output Format (Plain Text Notebook Plan)
# Your output must be a single, continuous text document representing the notebook plan.
# Do NOT output JSON. Follow this strict markdown structure:
# - **First-level headings (`#`):** For main sections (e.g., `# 1. Problem Statement`).
# - **Second-level headings (`##`):** For subsections (e.g., `## Business Context`).
# - **Code Cell Details:** For every operation that requires a code cell, describe it using the format:
# `Code Cell X: <Detailed, prescriptive description of the operation>`
# - `X` should be a sequential number within that specific subsection.
# - The description must be **verbose and explicit**, detailing *what* the user should perform in that cell, *why* (linking to problem/data), and *how* (conceptually, not actual code).
# - **Example:** "Code Cell 1: In this cell, we will load the dataset from '{dataset_path}' using the `pandas.read_csv()` function. This is crucial as it brings our raw data into a DataFrame for initial inspection and subsequent analysis, directly addressing the need to analyze '{dataset_type}' data for the '{problem}' problem."
# ---
# ## Key Constraints for Plan Generation
# * **Granularity:** Expand each conceptual step from the blueprint into multiple granular code cells if needed, aiming for the 70-80 cell target.
# * **Specificity:** Refer to specific columns and data types from the `data_dictionary` and inferred dataset characteristics.
# * **Relevance:** Every operation must be justified by its relevance to the `problem_statement` and the `dataset`.
# * **Scope:** **ABSOLUTELY DO NOT INTRODUCE CONCEPTS, LIBRARIES, OR TECHNIQUES THAT ARE NOT EXPLICITLY MENTIONED OR IMPLICITLY COVERED WITHIN THE `Topics and Subtopics` provided.** This is paramount.
# * **Pedagogical Soundness:** The plan should logically progress, making sense for a learner.
# * **No Actual Code:** Provide detailed *instructions* for the code, not the code itself.
# * **No Commentary:** Do not include any conversational filler or extra markdown outside the specified plan structure.
# Now, generate the detailed notebook plan as a plain text document.
# """.strip()
# # 4. Invoke LLM
# # Assuming 'model' is your configured LLM client
# response = model.invoke([SystemMessage(content=prompt)])
# plan_text = response.content
# # 5. Update state
# state["plan"] = plan_text
# # state["_plan_approved"] = False # Reset approval status
# # state["plan_feedback"] = "" # Clear feedback
# return state
# ## **Review Plan**
# def review_plan_node(state: InteractiveCaseStudyState) -> InteractiveCaseStudyState:
# """
# Review‑Plan Node (UI‑driven):
# - Expects that the front‑end will render the JSON plan for user review.
# - The front‑end should display two buttons: 'YES' and 'NO'.
# - If 'NO' is clicked, the front‑end should show a text input for feedback.
# - Stores:
# state['_plan_approved'] = True/False
# state['plan_feedback'] = "" or the user’s feedback.
# """
# # 1) Grab the generated plan
# plan = state.get("plan", {})
# # 2) In a console environment, we’d pretty‑print it; in Streamlit, you’d st.json(plan)
# print("\n===== PROPOSED NOTEBOOK PLAN =====")
# print(json.dumps(plan, indent=2))
# print("==================================\n")
# # 3) UI layer responsibility:
# # Render two buttons: YES / NO.
# # If YES: call this node again with user_action="yes"
# # If NO: call this node again with user_action="no" and then collect feedback.
# # For console fallback, we still allow text input:
# ans = input("Do you approve this plan? (YES/NO): ").strip().lower()
# approved = ans in ("yes", "y")
# state["_plan_approved"] = approved
# if not approved:
# # show feedback prompt
# feedback = input("Please provide feedback to improve the plan:\n")
# state["plan_feedback"] = feedback
# else:
# # clear any old feedback
# state["plan_feedback"] = ""
# return state
## **Enhance Plan**
def enhance_plan_node(state):
"""
Enhance‑Plan Node:
- Inputs:
• state["plan"]: the last JSON plan
• state["plan_feedback"]: user’s free‑text feedback on how to improve it
• state["domain"], state["topics_and_subtopics"], state["problem_statement"]
• state["dataset_type"], state["dataset_file_path"], state["data_dictionary"]
• state["additional_instructions"]
- Action: Re‑prompt the LLM, injecting the feedback to revise the plan.
- Output: Overwrites state["plan"] with the refined JSON,
clears plan_feedback, and resets state["_plan_approved"] to False.
"""
# 1. Extract only the needed inputs
original_plan = state["plan"]
feedback = state.get("plan_feedback", "")
domain = state["domain"]
topics = state["topics_and_subtopics"]
problem = state["problem_statement"]
dataset_type = state["dataset_type"]
dataset_path = state["dataset_file_path"]
data_dict = state.get("data_dictionary", {})
additional_instr = state.get("additional_instructions", "")
# 2. Build the enhancement prompt
prompt = f"""
You are an expert AI notebook planner. The user has reviewed the following plan
and given feedback on how to improve it:
---
**Original Plan** (JSON):
{json.dumps(original_plan, indent=2)}
**User Feedback**:
{feedback}
---
**Context for Revision**:
- Domain: {domain}
- Topics & Subtopics: {topics}
- Problem Statement: {problem}
- Dataset Type: {dataset_type}
- Dataset Location: {dataset_path}
- Data Dictionary:
{json.dumps(data_dict, indent=2)}
- Additional Instructions:
{additional_instr if additional_instr.strip() else "None"}
Please revise the original plan to address the user’s feedback.
- Keep the same JSON structure (top‑level "sections", each with "title", "type", "subsections").
- Adjust section titles, ordering, and code_rules as needed to satisfy the feedback.
- Do not add sections beyond the user’s specified topics.
- Return **only** the updated plan JSON — no extra commentary.
""".strip()
# 3. Invoke the LLM
response = model.invoke([SystemMessage(content=prompt)])
new_plan = extract_json_from_response(response.content)
# 4. Save and reset flags
state["plan"] = new_plan
# state["_plan_approved"] = False
# state["plan_feedback"] = ""
return state
## **Write Code**
def write_code_node(state: InteractiveCaseStudyState) -> InteractiveCaseStudyState:
"""
Write‑Code Node:
- Reads state["plan"] plus user inputs (dataset_type, dataset_file_path,
data_dictionary, domain, additional_instructions).
- Prompts the LLM to generate a flattened Jupyter notebook JSON skeleton (nbformat v4)
where each task becomes its own cell:
• A Markdown cell for each section heading (# Section)
• A Markdown cell for each subsection heading (## Subsection)
• For `type=="markdown"` subsections: one Markdown cell containing the description
• For `type=="code"` subsections: one code cell per rule, with inline comments
explaining each line of code
• A final Markdown cell `# Insights & Recommendations`
All code cells must have execution_count=null and outputs=[].
- Stores the result in state["raw_notebook"].
- Resets notebook review flags for the UI.
"""
# 1) Extract inputs
plan = state["plan"]
dataset_type = state["dataset_type"]
dataset_path = state["dataset_file_path"]
data_dict = state.get("data_dictionary", {})
domain = state["domain"]
additional_instr = state.get("additional_instructions", "")
# 2) Build the comprehensive prompt
prompt = f"""
You are an expert notebook generator. Generate a Jupyter notebook JSON (nbformat v4)
that implements the following plan exactly and by flattening each plan subsection into individual cells.
Do NOT include any outputs—only code & markdown cells.
PLAN:
{json.dumps(plan, indent=2)}
CONTEXT:
- Domain: {domain}
- Dataset type: {dataset_type}
- Dataset location: {dataset_path}
- Data Dictionary:
{json.dumps(data_dict, indent=2)}
- Additional Instructions:
{additional_instr if additional_instr.strip() else "None provided"}
OUTPUT SPEC:
1. Return only a valid notebook JSON with keys: nbformat, nbformat_minor, metadata, cells.
2. For each section in PLAN:
a. Add a **markdown** cell: `# <Section Title>`
b. For each subsection:
i. Add a **markdown** cell: `## <Subsection Title>`
ii. If subsection `type` is "markdown":
- Add a separate markdown cell with the `description` text.
iii. If subsection `type` is "code":
- For each rule in `code_rules`:
• Create one **code** cell.
• Write only the code implementing that rule.
• Ensure each line of code has an appropriate inline comment explaining its purpose.
3. After all sections, append one **markdown** cell:
`# Insights & Recommendations`
4. Each **code** cell must include:
- `"cell_type": "code"`, `"execution_count": null`, `"metadata": `, `"outputs": []`, `"source": […]`
5. Each **markdown** cell must include:
- `"cell_type": "markdown"`, `"metadata": `, `"source": […]`
Return **only** the complete notebook JSON object.
"""
# 3) Invoke the model
response = model.invoke([SystemMessage(content=prompt)])
# 4) Parse and store the raw notebook skeleton
raw_nb = extract_json_from_response(response.content)
state["raw_notebook"] = raw_nb
# 5) Reset review flags so the UI will prompt the user next
# state["_notebook_approved"] = False
# state["notebook_feedback"] = ""
return state
## **Review Notebook**
# def review_notebook_node(state: InteractiveCaseStudyState) -> InteractiveCaseStudyState:
# """
# Review-Notebook Node:
# - Reads state["raw_notebook"] (the skeleton JSON).
# - Expects the UI to render that notebook for the user.
# - Reads two new state flags set by the UI:
# • state["_notebook_approved"] (bool)
# • state["notebook_feedback"] (str, empty if approved)
# - Returns the updated state for downstream branching.
# """
# # 1) Sanity check
# raw_nb = state.get("raw_notebook")
# if raw_nb is None:
# raise ValueError("No raw_notebook found in state")
# # At this point your Streamlit app should:
# # • Render `raw_nb` as a notebook preview
# # • Present two buttons: “Yes” and “No”
# # • If “No” is clicked, show a text_area for feedback
# # 2) Read back user’s choice from the state (set by the UI)
# approved = state.get("_notebook_approved", False)
# feedback = state.get("notebook_feedback", "").strip()
# # 3) Ensure feedback is only set when rejected
# if approved:
# # clear any spurious feedback
# state["notebook_feedback"] = ""
# else:
# # if user hasn’t provided feedback yet, ensure it’s initialized
# state["notebook_feedback"] = feedback
# # 4) State flags remain as set by the UI.
# # Downstream graph will branch on _notebook_approved.
# return state
## **Modify Notebook**
def modify_notebook_node(state: InteractiveCaseStudyState) -> InteractiveCaseStudyState:
"""
Modify-Notebook Node:
- Reads the existing notebook skeleton from state["raw_notebook"].
- Reads user feedback from state["notebook_feedback"].
- Uses additional context (domain, topics, problem, dataset_path, data_dictionary)
to re-prompt the LLM to refine the notebook skeleton.
- Overwrites state["raw_notebook"] with the updated JSON.
- Resets approval flag for the next review cycle.
"""
# 1. Extract required fields
raw_nb = state.get("raw_notebook")
feedback = state.get("notebook_feedback", "").strip()
domain = state.get("domain", "")
topics = state.get("topics_and_subtopics", "")
problem = state.get("problem_statement", "")
dataset_path = state.get("dataset_file_path", "")
data_dict = state.get("data_dictionary", {})
if raw_nb is None:
raise ValueError("No raw_notebook in state to modify")
if not feedback:
raise ValueError("No notebook_feedback in state to guide modification")
# 2. Build the prompt with extra context
prompt = f"""
You are an expert AI notebook refiner. The user is working in the **{domain}** domain,
with the following context:
• **Topics & Subtopics:**
{topics}
• **Problem Statement:**
{problem}
• **Dataset Location:**
{dataset_path}
• **Data Dictionary:**
{json.dumps(data_dict, indent=2)}
The user reviewed the current Jupyter notebook skeleton (nbformat v4).
They provided the following feedback on what to change:
---
**User Feedback:**
{feedback}
---
**Current Notebook Skeleton (JSON):**
{json.dumps(raw_nb, indent=2)}
---
Please produce a revised notebook skeleton JSON that:
1. Incorporates the user’s feedback precisely.
2. Maintains valid nbformat v4 structure (keys: nbformat, nbformat_minor, metadata, cells).
3. Preserves existing code and markdown formatting except where edits are requested.
4. Uses domain‑appropriate libraries and patterns (per the domain context above).
5. Outputs **only** the updated JSON object — no extra text or commentary.
"""
# 3. Invoke the LLM
response = model.invoke([SystemMessage(content=prompt)])
updated_nb = extract_json_from_response(response.content)
# 4. Store updated skeleton & reset approval flag
state["raw_notebook"] = updated_nb
state["_notebook_approved"] = False
return state
# **Execute Notebook**
def execute_notebook_node(state: InteractiveCaseStudyState) -> InteractiveCaseStudyState:
"""
Execute-Notebook Node:
- Reads state['raw_notebook'] (nbformat v4 JSON, cells with empty outputs).
- Writes it to a timestamped temp .ipynb alongside the dataset.
- Runs it via Papermill, capturing outputs.
- On success: stores full executed notebook in state['executed_notebook'].
- On failure: stores the partially executed notebook in state['executed_notebook']
and the exception text in state['execution_error'].
"""
# 1. Extract inputs
raw_nb = state.get("raw_notebook", {})
dataset_path = state.get("dataset_file_path", "")
# 2. Prepare unique temp directory
run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
tmp_dir = f"tmp_notebooks/run_{run_id}"
os.makedirs(tmp_dir, exist_ok=True)
raw_path = os.path.join(tmp_dir, "raw_notebook.ipynb")
exec_path = os.path.join(tmp_dir, "executed_notebook.ipynb")
# 3. Write the skeleton notebook to disk
nb_node = nbformat.from_dict(raw_nb)
# nb_node.metadata.setdefault("kernelspec", {"name": "python3.9"})
# nb_node.metadata.setdefault("language_info", {"name": "python"})
with open(raw_path, "w", encoding="utf-8") as f:
nbformat.write(nb_node, f)
# 4. Stage dataset file
if dataset_path and os.path.exists(dataset_path):
shutil.copy(dataset_path, tmp_dir)
# 5. Execute via Papermill
try:
subprocess.run(["python", "-m", "pip", "install", "papermill", "ipykernel", "nbformat"], check=True)
pm.execute_notebook(
input_path=raw_path,
output_path=exec_path,
parameters={},
log_output=True,
kernel_name="python3"
)
# 6a. Success: read full executed notebook
executed_nb = nbformat.read(exec_path, as_version=4)
state["executed_notebook"] = nbformat.writes(executed_nb)
state.pop("execution_error", None)
except Exception as e:
# 6b. Failure: read whatever Papermill wrote, or fallback to skeleton
if os.path.exists(exec_path):
partial_nb = nbformat.read(exec_path, as_version=4)
else:
partial_nb = nbformat.read(raw_path, as_version=4)
state["executed_notebook"] = nbformat.writes(partial_nb)
state["execution_error"] = str(e)
return state
# def execute_notebook_node(state: InteractiveCaseStudyState) -> InteractiveCaseStudyState:
# """
# Execute-Notebook Node with venv isolation and proper pip-install handling:
# - Reads state['raw_notebook'] (nbformat v4 JSON).
# - Writes it to a timestamped temp folder as raw_notebook.ipynb.
# - Creates a Python venv in that folder.
# - Detects any '!pip install' lines: extracts package names and installs them in venv.
# - Comments out the original '!pip install' lines in the notebook to avoid re-running them.
# - Registers an ipykernel for the venv (if possible).
# - Runs the notebook via Papermill under that venv/kernel, capturing outputs.
# - On success: stores executed notebook JSON in state["executed_notebook"], clears state["execution_error"].
# - On failure: injects an error markdown cell at the top, stores partially executed notebook JSON in state["executed_notebook"], and error text in state["execution_error"].
# """
# raw_nb = state.get("raw_notebook", {})
# dataset_path = state.get("dataset_file_path", "")
# # 1. Prepare unique temp directory
# run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
# tmp_dir = Path(f"tmp_notebooks/run_{run_id}")
# tmp_dir.mkdir(parents=True, exist_ok=True)
# raw_path = tmp_dir / "raw_notebook.ipynb"
# exec_path = tmp_dir / "executed_notebook.ipynb"
# # 2. Write the skeleton notebook to disk
# nb_node = nbformat.from_dict(raw_nb)
# nb_node.metadata.setdefault("kernelspec", {"name": "python3"})
# nb_node.metadata.setdefault("language_info", {"name": "python"})
# with raw_path.open("w", encoding="utf-8") as f:
# nbformat.write(nb_node, f)
# # 3. Stage dataset file if exists
# if dataset_path and os.path.exists(dataset_path):
# try:
# shutil.copy(dataset_path, tmp_dir)
# except Exception:
# pass # ignore if copy fails
# # 4. Helpers to detect and comment out '!pip install' lines
# def extract_pip_packages(ipynb_path: Path) -> list:
# """Return list of package strings from '!pip install ...' lines."""
# pkgs = []
# try:
# notebook = json.loads(ipynb_path.read_text(encoding="utf-8"))
# except Exception:
# return pkgs
# pattern = r'!pip\s+install\s+(.+)'
# for cell in notebook.get('cells', []):
# if cell.get('cell_type') == 'code':
# source = ''.join(cell.get('source', []))
# for line in source.splitlines():
# m = re.match(pattern, line.strip())
# if m:
# rest = m.group(1).strip()
# # Remove inline comments after '#'
# rest = rest.split('#', 1)[0].strip()
# # Split by whitespace to get package tokens
# parts = rest.split()
# for p in parts:
# if p:
# pkgs.append(p)
# return pkgs
# def comment_out_pip_lines(ipynb_path: Path):
# """
# Modify the notebook in-place: prefix any '!pip install...' lines with '# ',
# preserving indentation, so they won't run.
# """
# try:
# nb = nbformat.read(str(ipynb_path), as_version=4)
# except Exception:
# return
# modified = False
# for cell in nb.cells:
# if cell.cell_type == 'code':
# lines = cell.source.splitlines()
# new_lines = []
# for line in lines:
# # Detect leading whitespace
# leading_ws = line[:len(line) - len(line.lstrip())]
# stripped = line.lstrip()
# if stripped.startswith("!pip install"):
# # Comment out, preserving indentation
# new_line = leading_ws + "# " + stripped
# new_lines.append(new_line)
# modified = True
# else:
# new_lines.append(line)
# cell.source = "\n".join(new_lines)
# if modified:
# try:
# nbformat.write(nb, str(ipynb_path))
# except Exception:
# pass
# try:
# # 5. Create and prepare virtual environment
# env_path = tmp_dir / "venv"
# if not env_path.exists():
# subprocess.run([sys.executable, "-m", "venv", str(env_path)], check=True)
# print(os.listdir(tmp_dir/"venv"))
# # Determine python executable in venv
# if os.name == "nt":
# python_exec = env_path / "Scripts" / "python.exe"
# else:
# python_exec = env_path / "lib" / "python3.9"
# # 6. Bootstrap pip in venv (if needed)
# try:
# subprocess.run([str(python_exec), "-m", "ensurepip", "--upgrade"], check=True)
# except Exception:
# pass
# try:
# subprocess.run([str(python_exec), "-m", "pip", "install", "--upgrade", "pip"], check=True)
# except Exception:
# pass
# # 7. Detect packages from '!pip install' lines and install them in venv
# packages = extract_pip_packages(raw_path)
# if packages:
# install_cmd = [str(python_exec), "-m", "pip", "install"] + packages
# try:
# subprocess.run(install_cmd, check=True, capture_output=True, text=True)
# except subprocess.CalledProcessError as e:
# err = e.stderr or e.stdout or str(e)
# # Record installation error in state; execution may still proceed and fail later
# prev = state.get("execution_error", "")
# state["execution_error"] = (prev + "\n" if prev else "") + f"Failed to install packages {packages}: {err}"
# # After installing, comment out original pip lines so notebook won't try again
# comment_out_pip_lines(raw_path)
# # 8. Install papermill, ipykernel, nbformat in venv so we can run papermill under venv
# try:
# subprocess.run(
# [str(python_exec), "-m", "pip", "install", "papermill", "ipykernel", "nbformat"],
# check=True, capture_output=True, text=True
# )
# except Exception as e:
# err = getattr(e, 'stderr', None) or str(e)
# prev = state.get("execution_error", "")
# state["execution_error"] = (prev + "\n" if prev else "") + f"Failed to install papermill/ipykernel: {err}"
# # 9. Register ipykernel for this venv (optional; if fails, Papermill may use default kernel)
# kernel_name = f"venv_{run_id}"
# try:
# subprocess.run(
# [
# str(python_exec), "-m", "ipykernel", "install",
# "--user",
# "--name", kernel_name,
# "--display-name", f"Python ({kernel_name})"
# ],
# check=True, capture_output=True, text=True
# )
# except Exception as e:
# err = getattr(e, 'stderr', None) or str(e)
# prev = state.get("execution_error", "")
# state["execution_error"] = (prev + "\n" if prev else "") + f"Failed to register ipykernel: {err}"
# # 10. Execute via Papermill under venv
# input_nb = raw_path.resolve()
# output_nb = exec_path.resolve()
# cmd = [str(python_exec), "-m", "papermill", str(input_nb), str(output_nb), "-k", f'{env_path.name}']
# try:
# result = subprocess.run(
# cmd,
# capture_output=True,
# text=True,
# cwd=str(tmp_dir),
# timeout=60 * 30 # adjust as needed
# )
# stderr = result.stderr or ""
# returncode = result.returncode
# # 11. Read output notebook if created
# if output_nb.exists():
# executed_nb = nbformat.read(str(output_nb), as_version=4)
# else:
# # Fallback to raw or partially executed
# executed_nb = nbformat.read(str(raw_path), as_version=4)
# # 12. Handle return code: if non-zero, inject error cell
# if returncode != 0:
# error_msg = f"Papermill exited with code {returncode}.\nStderr:\n{stderr}"
# err_cell = nbformat.v4.new_markdown_cell(f"**Execution Error:**\n```\n{error_msg}\n```")
# executed_nb.cells.insert(0, err_cell)
# prev = state.get("execution_error", "")
# state["execution_error"] = (prev + "\n" if prev else "") + error_msg
# else:
# # Success: clear any previous execution_error
# state.pop("execution_error", None)
# # 13. Save executed notebook JSON into state
# state["executed_notebook"] = nbformat.writes(executed_nb)
# except subprocess.TimeoutExpired as te:
# # Timeout: capture partial output and inject timeout error
# if exec_path.exists():
# partial_nb = nbformat.read(str(exec_path), as_version=4)
# else:
# partial_nb = nbformat.read(str(raw_path), as_version=4)
# timeout_msg = f"Timeout: notebook execution exceeded limit ({te})."
# err_cell = nbformat.v4.new_markdown_cell(f"**Execution Error:**\n```\n{timeout_msg}\n```")
# partial_nb.cells.insert(0, err_cell)
# state["executed_notebook"] = nbformat.writes(partial_nb)
# state["execution_error"] = timeout_msg
# except Exception as e:
# # General execution error: inject error cell
# if exec_path.exists():
# partial_nb = nbformat.read(str(exec_path), as_version=4)
# else:
# partial_nb = nbformat.read(str(raw_path), as_version=4)
# err = str(e)
# err_cell = nbformat.v4.new_markdown_cell(f"**Execution Error:**\n```\n{err}\n```")
# partial_nb.cells.insert(0, err_cell)
# state["executed_notebook"] = nbformat.writes(partial_nb)
# state["execution_error"] = err
# except Exception as e:
# # Any errors in venv setup or earlier steps: inject error at top of raw notebook
# raw_nb_node = nbformat.from_dict(raw_nb)
# err = str(e)
# err_cell = nbformat.v4.new_markdown_cell(f"**Execution Setup Error:**\n```\n{err}\n```")
# raw_nb_node.cells.insert(0, err_cell)
# state["executed_notebook"] = nbformat.writes(raw_nb_node)
# state["execution_error"] = f"Setup failed: {err}"
# return state
## **Review Execution**
# def review_execution_node(state: InteractiveCaseStudyState) -> InteractiveCaseStudyState:
# """
# Review-Execution Node:
# - Reads state['executed_notebook'] (the .ipynb JSON with outputs).
# - Reads state.get('execution_error') to know if something failed.
# - Updates:
# state['_execution_approved']: bool
# state['execution_feedback']: str (if any)
# """
# # 1. Extract executed notebook and error flag
# executed_nb_json = state.get("executed_notebook", "")
# error_msg = state.get("execution_error")
# # 2. Validate JSON
# try:
# nb = json.loads(executed_nb_json)
# except Exception:
# # Malformed notebook JSON → automatic reject
# state["_execution_approved"] = False
# state["execution_feedback"] = "Executed notebook JSON could not be parsed."
# return state
# # 3. Decide success vs failure
# if error_msg:
# # FAILURE scenario
# # In Streamlit you’d render `nb` plus highlight error_msg prominently.
# print("😢 Sadly, an error occurred while executing the notebook:")
# print(f">>> {error_msg}\n")
# # Prompt user: feedback or auto‑handle
# ans = input(
# "Do you have feedback on this error, or should I handle it myself? (feedback/auto): "
# ).strip().lower()
# if ans == "auto":
# # Let the system attempt auto‑correction
# state["_execution_approved"] = False
# state["execution_feedback"] = "" # no user feedback
# else:
# # Collect user feedback
# feedback = input("Please describe how I should fix this:\n").strip()
# state["_execution_approved"] = False
# state["execution_feedback"] = feedback or "No feedback provided."
# else:
# # SUCCESS scenario
# # Streamlit: celebrate with "Notebook executed successfully end‑to‑end!"
# print("🎉 Notebook executed successfully end‑to‑end!")
# # Ask for final proceed approval
# ans = input("Shall I proceed to write the insights? (YES/NO): ").strip().lower()
# state["_execution_approved"] = (ans in ("yes", "y"))
# if not state["_execution_approved"]:
# # If user declines, optionally capture why
# feedback = input("Please share any concerns before writing insights:\n").strip()
# state["execution_feedback"] = feedback or ""
# return state
## **Correct Notebook**
def correct_notebook_node(state: InteractiveCaseStudyState) -> InteractiveCaseStudyState:
"""
Correct-Notebook Node:
- Inputs:
• state['raw_notebook']: the notebook skeleton JSON (cells only, no outputs)
• state['execution_error']: the error string produced by Papermill
• state['execution_feedback']: optional user feedback string from review_execution_node
- Action:
• Prompt an LLM to minimally patch the notebook JSON to fix errors.
- Output:
• Overwrites state['raw_notebook'] with corrected notebook JSON.
• Resets state['_execution_approved'] to False to trigger re-execution.
"""
# 1. Extract from state
raw_nb = state.get("raw_notebook", {})
exec_error = state.get("execution_error", "").strip()
feedback = state.get("execution_feedback", "").strip()
if not raw_nb:
raise ValueError("Missing required key: 'raw_notebook'")
# 2. Build the LLM prompt
prompt = f"""
You are a Python notebook repair assistant.
Below is the original notebook code (nbformat v4 JSON):
{json.dumps(raw_nb, indent=2)}
The following error occurred during execution:
{exec_error or '(no error provided)'}
{('In addition, the user provided the following correction feedback:' if feedback else 'The user did not provide any manual feedback.')}
Please return a corrected version of the notebook (still in nbformat v4 JSON).
Make only the **minimal** necessary changes to fix the above error and satisfy the feedback if present.
Preserve all other cell content and metadata exactly as they are.
Return only the JSON. Do not include any extra explanation or comments.
"""
# 3. Send to LLM
response = model.invoke([SystemMessage(content=prompt)])
corrected_nb = extract_json_from_response(response.content)
# 4. Save updated notebook back into state
state["raw_notebook"] = corrected_nb
state["_execution_approved"] = False # force re-execution after correction
return state
## **Write Insights**
# ─────────────── write_insights_node ──────────────────
def write_insights_node(state: InteractiveCaseStudyState) -> InteractiveCaseStudyState:
"""
Write-Insights Node:
- Inputs: state['executed_notebook'] (JSON str), state['problem_statement'], state['dataset_path']
- Stages dataset for any REPL plotting.
- For each code cell with outputs:
• If plot: re‑execute all code so far via PythonREPL agent → insight.
• Else: call LLM with problem, prior observations, code, and raw output.
- Inserts a markdown “Observation” after each code cell.
- Appends a final "# Insights and Conclusion" cell.
- Sets state['final_notebook'] to the updated notebook JSON (as Python dict).
"""
# 1) Load executed notebook JSON
executed_nb_str = state.get("executed_notebook", "")
executed_nb = json.loads(executed_nb_str)
problem_stmt = state.get("problem_statement", "")
dataset_path = state.get("dataset_file_path", "")
cells = executed_nb.get("cells", [])
# 2) Prepare for plot‑REPL (commented out if not needed here)
work_dir = "tmp_repl"
# stage_dependencies(dataset_path, work_dir)
# plot_executor = build_plot_insight_agent_executor(model)
updated_cells = []
context_history = [] # list of past observation strings
code_history = [] # list of past code sources
for cell in cells:
updated_cells.append(cell)
if cell.get("cell_type") == "code" and cell.get("outputs"):
code_src = "".join(cell.get("source", []))
code_history.append(code_src)
# aggregate textual outputs
raw_out = ""
for out in cell["outputs"]:
if out.get("output_type") == "stream":
raw_out += "".join(out.get("text", []))
elif out.get("output_type") == "execute_result":
raw_out += "".join(out.get("data", {}).get("text/plain", []))
if is_plot_code(code_src):
# Commented out observation writing for plot cells
# combined = "\n".join(code_history)
# cwd = os.getcwd()
# os.chdir(work_dir)
# tool_call = plot_executor.invoke({"input": combined})
# obs = tool_call.get("output", "").strip()
# os.chdir(cwd)
obs = "" # No observation added for plot cells
else:
prior = "\n".join(f"- {o}" for o in context_history)
prompt = f"""
You are an AI assistant writing a single one line bullet‑point observation (no code).
Problem Statement:
{problem_stmt}
Prior Observations:
{prior}
Current Code:
{code_src}
Current Output:
{raw_out}
Write one concise, beginner‑friendly bullet‑point that interprets this output
in context of the problem.
"""
resp = model.invoke([SystemMessage(content=prompt)])
obs = resp.content.strip()
if obs:
observation_cell = {
"cell_type": "markdown",
"metadata": {},
"source": [f"**Observation:** {obs}\n"]
}
updated_cells.append(observation_cell)
context_history.append(obs)
# Final summary section
all_obs = "\n".join(f"- {o}" for o in context_history)
summary_prompt = f"""
You are an AI assistant composing the final **Insights and Conclusion** section.
Problem Statement:
{problem_stmt}
Observations:
{all_obs}
Provide a short set of business‑focused recommendations and a conclusion.
"""
summary_resp = model.invoke([SystemMessage(content=summary_prompt)])
summary_text = summary_resp.content.strip()
summary_cell = {
"cell_type": "markdown",
"metadata": {},
"source": [f"# Insights and Conclusion\n\n{summary_text}\n"]
}
updated_cells.append(summary_cell)
executed_nb["cells"] = updated_cells
state["final_notebook"] = executed_nb
return state
# ## **Final Workflow**
# from langgraph.graph import StateGraph, START, END
# from IPython.display import Image, display
# from langgraph.checkpoint.memory import InMemorySaver
# def create_interactive_workflow() -> StateGraph:
# graph = StateGraph(InteractiveCaseStudyState)
# # Core nodes
# graph.add_node("think_sections", think_sections_node)
# graph.add_node("review_plan", review_plan_node)
# graph.add_node("enhance_plan", enhance_plan_node)
# graph.add_node("write_code", write_code_node)
# graph.add_node("review_notebook", review_notebook_node)
# graph.add_node("modify_notebook", modify_notebook_node)
# graph.add_node("execute_notebook",execute_notebook_node)
# graph.add_node("review_execution",review_execution_node)
# graph.add_node("correct_notebook",correct_notebook_node)
# graph.add_node("write_insights", write_insights_node)
# # START → Think‑Sections → Review Plan
# graph.add_edge(START, "think_sections")
# graph.add_edge("think_sections", "review_plan")
# graph.add_conditional_edges(
# "review_plan",
# lambda s: "true" if s.get("_plan_approved", False) else "false",
# {"true": "write_code", "false": "enhance_plan"}
# )
# graph.add_edge("enhance_plan", "review_plan")
# # Write‑Code → Review Notebook
# graph.add_edge("write_code", "review_notebook")
# graph.add_conditional_edges(
# "review_notebook",
# lambda s: "true" if s.get("_notebook_approved", False) else "false",
# {"true": "execute_notebook", "false": "modify_notebook"}
# )
# graph.add_edge("modify_notebook", "review_notebook")
# # Execute Notebook → Review Execution
# graph.add_edge("execute_notebook", "review_execution")
# graph.add_conditional_edges(
# "review_execution",
# lambda s: "true" if s.get("_execution_approved", False) else "false",
# {"true": "write_insights", "false": "correct_notebook"}
# )
# graph.add_edge("correct_notebook", "execute_notebook")
# # Final Insights → END
# graph.add_edge("write_insights", END)
# return graph
# from langgraph.checkpoint.memory import InMemorySaver
# checkpointer = InMemorySaver()
# # ─────────── Compile & Visualize ───────────
# case_study_workflow = create_interactive_workflow().compile(checkpointer=checkpointer)
# # Show the Mermaid diagram of the workflow
# display(Image(case_study_workflow.get_graph().draw_mermaid_png()))
# thread = {"configurable": {"thread_id": "1"}}
# case_study_workflow.get_state(thread).next
# case_study_workflow.get_state({"configurable": {"thread_id": "1"}}).next
|