Kyo-Kai commited on
Commit
6601c5b
·
1 Parent(s): 1de8976

Fixed placeholder bugs and improved prompts

Browse files
agents/explainer/__init__.py CHANGED
@@ -10,9 +10,8 @@ import logging
10
 
11
  from llama_index.core.agent import AgentRunner
12
  from llama_index.llms.litellm import LiteLLM
13
- from llama_index.core.tools import FunctionTool
14
- from services.vector_store import VectorStore # Import VectorStore
15
- from services.llm_factory import _PROVIDER_MAP # Import _PROVIDER_MAP directly
16
 
17
  # Configure logging for explainer agent
18
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -52,41 +51,27 @@ class ExplainerAgent:
52
  retrieved_context = []
53
  if self.vector_store:
54
  # Use the title and content to query the vector store for relevant chunks
55
- query = f"{title}. {content[:100]}" # Combine title and start of content for query
56
- retrieved_docs = self.vector_store.search(query, k=3) # Retrieve top 3 relevant docs
 
57
  retrieved_context = [doc['content'] for doc in retrieved_docs]
58
  logging.info(f"ExplainerAgent: Retrieved {len(retrieved_context)} context chunks.")
59
 
60
- base_prompt = explain_prompter(title, content, retrieved_context) # Pass retrieved_context
61
 
62
  if explanation_style == "Concise":
63
- style_instruction = ("Keep the explanation concise (max 300 words), "
64
  "focusing on core concepts.")
65
  elif explanation_style == "Detailed":
66
  style_instruction = ("Provide a detailed explanation, elaborating on concepts,"
67
  " examples, and deeper insights to master the topic.")
68
  else:
69
- style_instruction = ("Keep the explanation concise (max 300 words), "
70
  "focusing on core concepts.")
71
 
72
  prompt_message = f"""
73
  {base_prompt}
74
  {style_instruction}
75
-
76
- You can use the `make_figure` tool to generate charts and diagrams.
77
- When using `make_figure`, provide the `chart_type` (e.g., "bar_chart", "line_graph",
78
- "pie_chart", "scatter_plot", "histogram")
79
- and the `data` as a JSON dictionary.
80
- For example:
81
- `make_figure(title="Example Bar Chart", content="Data for bar chart",
82
- chart_type="bar_chart", data={{"labels": ["A", "B"], "values": [10, 20]}})`
83
-
84
- If you decide to generate a figure, ensure the `title` and `content` arguments
85
- passed to `make_figure` are relevant to the current learning unit.
86
- After generating the explanation, if you used the `make_figure` tool, the output
87
- will contain a placeholder like `[FIGURE_PATH: /path/to/figure.png]`.
88
- You MUST include this placeholder directly in your final markdown response where
89
- the figure should appear.
90
  """
91
 
92
  chat_response = self.agent.chat(prompt_message)
@@ -136,50 +121,49 @@ class ExplainerAgent:
136
  forbidden_descs = ["code", "code example", "code snippet", "sample", "example",
137
  "[error: missing or generic code description from llm]"]
138
 
139
- is_generic_desc = False
140
- if raw_llm_desc:
141
- if raw_llm_desc.strip().lower() in forbidden_descs:
142
- is_generic_desc = True
143
- else:
144
- is_generic_desc = True
145
 
146
  if is_generic_desc:
147
  actual_display_desc = f"Python code illustrating '{title}'"
148
  desc_for_generator = (
149
- f"Context: '{title}'. Task: Generate a relevant Python code example. "
150
- f"The LLM failed to provide a specific description (or provided a generic one: "
151
- f"'{raw_llm_desc}') for this code block. "
152
- f"Ensure the generated code is self-contained, includes example usage, "
153
- f"and critically, MUST end with a print() statement to display the main result."
154
  )
155
- if raw_llm_desc and raw_llm_desc.strip().lower() not in forbidden_descs :
156
- logging.warning(f"ExplainerAgent: LLM provided an unusual or generic code "
157
- f"description: '{raw_llm_desc}'. Using fallback title "
158
- f"'{actual_display_desc}'.")
159
- elif raw_llm_desc:
160
- logging.warning(f"ExplainerAgent: LLM provided generic code description: "
161
- f"'{raw_llm_desc}'. Using fallback title '{actual_display_desc}'.")
162
- else:
163
- logging.warning(f"ExplainerAgent: LLM provided no code description with [CODE:]. "
164
- f"Using fallback title '{actual_display_desc}'.")
165
  else:
166
- actual_display_desc = raw_llm_desc
167
  desc_for_generator = (
168
- f"Generate Python code for: '{raw_llm_desc}'. IMPORTANT: The example usage "
169
- f"within this generated code block MUST end with a print() statement to display "
170
- f"the main result or output clearly to the user. "
171
- f"The code should be self-contained with all necessary setup (imports, variables)."
172
  )
173
 
174
- code_snippet = make_code_snippet(title, content, desc_for_generator)
 
 
 
 
 
 
 
175
 
176
  if code_snippet:
177
- code_examples.append(CodeExample(language="python", code=code_snippet,
178
- description=actual_display_desc))
179
- return_value = f'[AGENT_CODE_PLACEHOLDER_{len(code_examples) - 1}]'
180
- logging.info(f"ExplainerAgent: Generated code for title '{actual_display_desc}', "
181
- f"returning placeholder: '{return_value}'")
182
- return return_value
 
 
 
 
 
 
 
 
 
183
  else:
184
  logging.warning(f"ExplainerAgent: make_code_snippet returned empty for description: "
185
  f"'{desc_for_generator}'. Removing placeholder from markdown.")
 
10
 
11
  from llama_index.core.agent import AgentRunner
12
  from llama_index.llms.litellm import LiteLLM
13
+ from services.vector_store import VectorStore
14
+ from services.llm_factory import _PROVIDER_MAP
 
15
 
16
  # Configure logging for explainer agent
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
51
  retrieved_context = []
52
  if self.vector_store:
53
  # Use the title and content to query the vector store for relevant chunks
54
+ # Combine title and start of context for the query and utilize top 3 relevant docs
55
+ query = f"{title}. {content[:100]}"
56
+ retrieved_docs = self.vector_store.search(query, k=3)
57
  retrieved_context = [doc['content'] for doc in retrieved_docs]
58
  logging.info(f"ExplainerAgent: Retrieved {len(retrieved_context)} context chunks.")
59
 
60
+ base_prompt = explain_prompter(title, content, retrieved_context)
61
 
62
  if explanation_style == "Concise":
63
+ style_instruction = ("Keep the explanation concise (max 400 words), "
64
  "focusing on core concepts.")
65
  elif explanation_style == "Detailed":
66
  style_instruction = ("Provide a detailed explanation, elaborating on concepts,"
67
  " examples, and deeper insights to master the topic.")
68
  else:
69
+ style_instruction = ("Keep the explanation concise (max 400 words), "
70
  "focusing on core concepts.")
71
 
72
  prompt_message = f"""
73
  {base_prompt}
74
  {style_instruction}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  """
76
 
77
  chat_response = self.agent.chat(prompt_message)
 
121
  forbidden_descs = ["code", "code example", "code snippet", "sample", "example",
122
  "[error: missing or generic code description from llm]"]
123
 
124
+ is_generic_desc = not raw_llm_desc or raw_llm_desc.strip().lower() in forbidden_descs
 
 
 
 
 
125
 
126
  if is_generic_desc:
127
  actual_display_desc = f"Python code illustrating '{title}'"
128
  desc_for_generator = (
129
+ f"Context: '{title}'. Task: Generate a runnable, self-contained Python code example. "
130
+ f"The LLM provided a generic description: '{raw_llm_desc}'. Your final line of code MUST be a print() statement."
 
 
 
131
  )
132
+ logging.warning(f"ExplainerAgent: LLM provided generic or no code description: "
133
+ f"'{raw_llm_desc}'. Using fallback title.")
 
 
 
 
 
 
 
 
134
  else:
135
+ actual_display_desc = raw_llm_desc.strip()
136
  desc_for_generator = (
137
+ f"Generate a runnable, self-contained Python code snippet for: '{raw_llm_desc}'. "
138
+ f"It must include all necessary imports and initialize all variables. "
139
+ f"Your final line of code MUST be a print() statement to display the result."
 
140
  )
141
 
142
+ code_snippet = make_code_snippet(
143
+ title,
144
+ content,
145
+ desc_for_generator,
146
+ llm_provider=self.provider,
147
+ llm_model_name=self.model_name,
148
+ llm_api_key=self.api_key
149
+ )
150
 
151
  if code_snippet:
152
+ # 1. Create the CodeExample object
153
+ new_code_example = CodeExample(language="python", code=code_snippet,
154
+ description=actual_display_desc)
155
+ # 2. Add it to the list that app.py will use
156
+ code_examples.append(new_code_example)
157
+
158
+ # 3. Get the index of the newly added item
159
+ insertion_index = len(code_examples) - 1
160
+
161
+ # 4. Create the EXACT placeholder your app.py expects
162
+ placeholder = f"[CODE_INSERTION_POINT_{insertion_index}]"
163
+
164
+ logging.info(f"ExplainerAgent: Generated code for '{actual_display_desc}', "
165
+ f"returning placeholder: '{placeholder}'")
166
+ return placeholder
167
  else:
168
  logging.warning(f"ExplainerAgent: make_code_snippet returned empty for description: "
169
  f"'{desc_for_generator}'. Removing placeholder from markdown.")
agents/explainer/explain_prompt.py CHANGED
@@ -13,61 +13,61 @@ The following information has been retrieved from the original document and is h
13
  ---
14
  """
15
  return f"""
16
- You are an expert AI assistant specializing in transforming complex concepts into deeply insightful, structured explanations. Your goal is to produce thoughtful, thorough educational content—avoiding repetition and encouraging deep analytical reasoning.
17
 
18
  **MANDATORY REQUIREMENTS:**
19
 
20
- 1. **Structure and Formatting:**
21
- - Start with a clear, concise introduction to the topic.
22
- - Break the content into logically organized sections using appropriate markdown headings.
23
- - Use **bold** for key terms and bullet points for lists.
24
- - **Use standard MathJax LaTeX for all mathematics:**
25
- - Inline math: `$ E=mc^2 $`
26
- - Display math: `$$ \int_a^b f(x) dx $$`
27
- - End with a summary or key takeaways.
28
- - Only use tools (e.g., visual aids or code placeholders) when they add significant explanatory value.
29
- - **IMPORTANT:** Ensure the final markdown output does NOT end with any trailing backticks (```).
30
 
31
- 2. **Code Examples - CRITICAL:**
32
- - Represent ALL Python code exclusively using this format: `[CODE: specific description of what the code does]`.
33
- - DO NOT use triple backticks for code blocks (e.g., ```python).
34
- - Each description must be unique, precise, and clearly reflect the function or purpose of the code.
35
- - ✅ Examples:
36
- - `[CODE: Python function to calculate acceleration from force and mass]`
37
- - `[CODE: Loading and filtering a CSV file with pandas]`
38
- - ❌ Forbidden descriptions:
39
- - "Code snippet", "Example", "Sample code", "Python Script"
40
- - The code (to be generated by another system) must be self-contained:
41
- - Include all necessary `import` statements.
42
- - Initialize variables with meaningful example values.
43
- - End with a `print()` statement to show the final result/output. This is essential for ensuring output visibility.
44
 
45
- 3. **Visual Aids - CRITICAL:**
46
- - Use the `make_figure` tool **only** when the content includes numerical data or clear categorical comparisons suitable for visualization.
47
- - Insert a placeholder exactly like this where the figure should appear: `[FIGURE_PATH: /path/to/figure.png]`
48
- - DO NOT use `[FIGURE: {{...}}]` or JSON-style placeholders—these will not be processed correctly.
49
- - Ensure the visual aid enhances clarity, provides insight, or enables comparison—not simply decorates the explanation.
50
 
51
- 4. **Content Quality:**
52
- - Provide deep, step-by-step explanations using real-world analogies and relatable examples.
53
- - Clearly define all technical terms.
54
- - Maintain an encouraging, educational tone.
55
- - **Synthesize the 'Retrieved Context' with the 'Raw Content/Context'** to build a document-specific and relevant explanation.
56
- - **Avoid hallucinating** facts not present in either source.
57
- - **Avoid redundancy:** Each section should add new value. Do not restate the same point in different words.
58
- - **Final Review Step:** After composing the explanation, pause and review it. Deepen any shallow sections, remove repetition, and ensure every sentence adds clarity or insight.
59
- - **Enhanced Intelligence Requirements:** Think critically and analytically about the topic. Question assumptions, explore nuances, and provide multi-layered explanations that demonstrate deep understanding rather than surface-level coverage.
60
- - **Factual Precision:** Cross-reference information within the provided context and retrieved documents. If the content is factual/informational, verify consistency and flag any potential contradictions. Prioritize accuracy over speed of response.
61
- - **Adaptive Detail Level:** For creative content, unleash full creative potential with rich imagery, character development, and narrative depth. For document-based content, maintain strict fidelity to source material while expanding explanations using your knowledge base to illuminate complex concepts.
62
 
63
- 5. **INTELLIGENCE AND ACCURACY MANDATE:**
64
- - If this is creative content: Be imaginative, original, and emotionally engaging while maintaining internal consistency.
65
- - If this is document-based information: Treat the source document as authoritative truth. Reference it religiously, quote directly when appropriate, and use your knowledge only to provide additional context that enhances understanding without contradicting the source.
66
- - In all cases: Demonstrate intellectual rigor by exploring implications, connections, and deeper meanings rather than just restating information.
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  **Topic to Explain:** {title}
69
 
70
  **Raw Content/Context:** {content}
71
  {context_section}
72
  **Your Explanation (in Markdown):**
73
- """
 
13
  ---
14
  """
15
  return f"""
16
+ You are an expert AI assistant specializing in transforming complex concepts into deeply insightful, structured explanations. Your goal is to produce thoughtful, thorough educational content.
17
 
18
  **MANDATORY REQUIREMENTS:**
19
 
20
+ **1. Agent Workflow - CRITICAL:**
21
+ - **Think:** Analyze the request and formulate a plan in a `Thought:` block.
22
+ - **Act (Optional):** If a tool is needed, use `Action:` and `Action Input:` to call it.
23
+ - **Observe:** The system will provide an `Observation:` with the tool's result (a success or an error).
24
+ - **Finalize:**
25
+ - If the tool was successful, your NEXT and FINAL step is to generate the complete markdown explanation in an `Answer:` block. **Do not try to use another tool or repeat the `Thought:` process.**
26
+ - If the tool failed, you can try to correct your `Action Input` in a new `Thought:` and `Action:` sequence.
 
 
 
27
 
28
+ **2. Tools: Code and Figures - CRITICAL:**
29
+ Your role is to insert **placeholders** for code and figures. Another system will generate the actual content. You must follow these formats precisely.
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ **A. Code Placeholders:**
32
+ - **Your Task:** When you provide a mathematical, coding or relevant sbuject examples, you MUST immediately follow it with a `[CODE: ...]` placeholder.
33
+ - **CRITICAL:** The description inside the placeholder must be a **specific, runnable task that solves the exact example you just described.** This makes the code interactive and relevant. The code must be self-contained and print the final result.
34
+ - **Another system will generate the code.** Your job is ONLY to create this hyper-specific placeholder. Do not write any Python code yourself (no ```python).
 
35
 
36
+ - **Correct Example:**
37
+ - ... an explanation of `∫ x*e^x dx`.
38
+ - `[CODE: Python code using SymPy to symbolically solve the integral of x*e^x and print the result]`
 
 
 
 
 
 
 
 
39
 
40
+ - **Incorrect Example:**
41
+ - ... an explanation of `∫ x*e^x dx`.
42
+ - `[CODE: Python code for integration plotting]` (This is too generic and will result in non-runnable template code! ALSO DO NOT TRY TO PLOT IN CODE PLACEHOLDER)
 
43
 
44
+ **B. `make_figure` Tool:**
45
+ - **Your Task:** To request a figure, call the `make_figure` tool. It will return a file path.
46
+ - **CRITICAL WORKFLOW:** After the tool returns a path in the `Observation:`, you MUST use that exact path to create the placeholder in your final `Answer:`, like this: `[FIGURE_PATH: <filepath_returned_by_tool>]`.
47
+ - **Tool Definition:** You MUST adhere strictly to this signature. Note that labels go *inside* the `data` dictionary.
48
+ - **Signature:** `make_figure(title: str, content: str, chart_type: str, data: Dict[str, Any])`
49
+ - **Data Structure for "line_graph", "bar_chart", "scatter_plot":**
50
+ - `data`: `{{ "x": [...], "y": [...], "x_label": "...", "y_label": "..." }}`
51
+ - **Data Structure for "pie_chart":**
52
+ - `data`: `{{ "labels": [...], "sizes": [...] }}`
53
+
54
+ **3. General Formatting:**
55
+ - Start with a clear introduction.
56
+ - Use markdown headings (`##`, `###`) to organize content.
57
+ - Use **bold** for key terms and bullet points for lists.
58
+ - Use standard MathJax LaTeX for all mathematics: `$E=mc^2$` (inline) and `$$ ... $$` (display).
59
+ - End with a summary or key takeaways.
60
+
61
+ **4. Content Quality:**
62
+ - Provide deep, step-by-step explanations with real-world analogies.
63
+ - Clearly define all technical terms.
64
+ - Synthesize the 'Retrieved Context' with the 'Raw Content/Context' for a relevant explanation.
65
+ - Avoid hallucination and redundancy.
66
+
67
+ ---
68
  **Topic to Explain:** {title}
69
 
70
  **Raw Content/Context:** {content}
71
  {context_section}
72
  **Your Explanation (in Markdown):**
73
+ """
agents/explainer/tools/code_generator.py CHANGED
@@ -1,8 +1,16 @@
 
1
  import textwrap
 
2
  from services.llm_factory import get_completion_fn
3
- import re # Added this import
4
 
5
- def make_code_snippet(title: str, content: str, suggestion: str) -> str:
 
 
 
 
 
 
 
6
  """Generate a code snippet based on suggestion using LLM."""
7
  if not suggestion.strip():
8
  return textwrap.dedent(
@@ -13,24 +21,34 @@ def make_code_snippet(title: str, content: str, suggestion: str) -> str:
13
  )
14
 
15
  prompt = f"""
16
- Generate a concise and functional Python code snippet based on the following unit and suggestion.
17
- The code should directly illustrate a key concept from the unit.
18
- Do not include excessive comments or explanations within the code itself.
19
- IMPORTANT: Do NOT include `plt.show()` or any other interactive plotting commands. If a plot is suggested,
20
- assume it will be handled by a separate visualization component. Focus solely on the data processing or
21
- algorithmic logic.
22
 
23
- Unit Title: {title}
24
- Unit Content: {content}
25
- Code Suggestion: {suggestion}
 
 
 
 
 
 
26
 
 
 
 
 
 
27
  ```python
28
  # Your code here
29
  ```
30
  """
31
 
32
  try:
33
- llm = get_completion_fn("mistral") # Using mistral for code generation
 
 
 
 
34
  response = llm(prompt)
35
 
36
  # Extract code block, being more flexible with whitespace around backticks
@@ -40,10 +58,12 @@ def make_code_snippet(title: str, content: str, suggestion: str) -> str:
40
 
41
  # Fallback if no code block is found, return the whole response
42
  return response.strip()
43
- except Exception:
 
44
  return textwrap.dedent(
45
  f"""
46
  # Failed to generate code for {title}
47
  # Content preview: {content[:40]}...
 
48
  """
49
- )
 
1
+ import re
2
  import textwrap
3
+ from typing import Optional
4
  from services.llm_factory import get_completion_fn
 
5
 
6
+ def make_code_snippet(
7
+ title: str,
8
+ content: str,
9
+ suggestion: str,
10
+ llm_provider: str,
11
+ llm_model_name: Optional[str] = None,
12
+ llm_api_key: Optional[str] = None
13
+ ) -> str:
14
  """Generate a code snippet based on suggestion using LLM."""
15
  if not suggestion.strip():
16
  return textwrap.dedent(
 
21
  )
22
 
23
  prompt = f"""
24
+ You are an expert Python programmer tasked with generating a single, self-contained, and runnable code snippet.
 
 
 
 
 
25
 
26
+ **Task:**
27
+ Generate a concise Python code snippet that directly implements the following request.
28
+ - The code MUST be self-contained (include all necessary imports).
29
+ - The code MUST end with a `print()` statement to show the final result.
30
+ - For mathematical/scientific tasks, **strongly prefer using libraries like `numpy`, `scipy`, and `sympy`** to ensure the code is correct and robust.
31
+ - Do not include any explanations or comments outside of essential clarifications.
32
+ - Do not use plotting libraries like matplotlib.
33
+
34
+ **Request:** "{suggestion}"
35
 
36
+ **Full Context (for reference):**
37
+ - Unit Title: {title}
38
+ - Unit Content: {content}
39
+
40
+ **Your Python Code Snippet:**
41
  ```python
42
  # Your code here
43
  ```
44
  """
45
 
46
  try:
47
+ llm = get_completion_fn(
48
+ provider=llm_provider,
49
+ model_name=llm_model_name,
50
+ api_key=llm_api_key
51
+ )
52
  response = llm(prompt)
53
 
54
  # Extract code block, being more flexible with whitespace around backticks
 
58
 
59
  # Fallback if no code block is found, return the whole response
60
  return response.strip()
61
+ except Exception as e:
62
+ logging.error(f"Failed to generate code for '{title}' due to LLM error: {e}", exc_info=True)
63
  return textwrap.dedent(
64
  f"""
65
  # Failed to generate code for {title}
66
  # Content preview: {content[:40]}...
67
+ # Error: {e}
68
  """
69
+ )
agents/explainer/tools/figure_generator.py CHANGED
@@ -1,10 +1,7 @@
1
- import json
 
2
  import plotly.graph_objects as go
3
  from llama_index.core.tools import FunctionTool
4
- from typing import Dict, Any
5
- import tempfile
6
- import uuid
7
- import os
8
 
9
  def make_figure(
10
  title: str,
 
1
+ import tempfile
2
+ from typing import Dict, Any
3
  import plotly.graph_objects as go
4
  from llama_index.core.tools import FunctionTool
 
 
 
 
5
 
6
  def make_figure(
7
  title: str,
app.py CHANGED
@@ -119,7 +119,7 @@ def create_app():
119
  gr.Markdown("### ✍️ Paste Content")
120
  text_in = create_text_input(lines=8)
121
  with gr.Row():
122
- input_type = gr.Radio(choices=["PDF", "Text"], value="Text", label="Content Type")
123
  plan_btn = create_primary_button("🚀 Process with AI")
124
  plan_status = create_status_markdown(
125
  "Upload content and click 'Process with AI' to generate learning units."
 
119
  gr.Markdown("### ✍️ Paste Content")
120
  text_in = create_text_input(lines=8)
121
  with gr.Row():
122
+ input_type = gr.Radio(choices=["File", "Text"], value="Text", label="Content Type")
123
  plan_btn = create_primary_button("🚀 Process with AI")
124
  plan_status = create_status_markdown(
125
  "Upload content and click 'Process with AI' to generate learning units."