Spaces:
Sleeping
feat: Enhance persona prompts and RAG evaluation logging
Browse filesThis commit significantly improves the "Generation" and evaluation capabilities of the RAG pipeline by fully implementing persona-based prompts and enriching the evaluation data.
The initial implementation had placeholder prompts for the 'parent' and 'principal' personas. Furthermore, the evaluation log did not capture the exact prompt details sent to the LLM, making it difficult to debug or audit the generation step.
This commit addresses these gaps by:
- **Fully implements the 'parent' and 'principal' persona prompts** in `prompts.py`. Each prompt is now tailored with specific instructions for tone, content, and formatting to align with the requirements of the performance task brief.
- **Refactors the `generate_recommendation_summary` function** in the RAG pipeline. It no longer returns just the final text, but now also returns a detailed dictionary containing the prompt template, the variables used, and the final formatted prompt text sent to the LLM.
- **Updates the Gradio app (`app.py`)** to log this detailed prompt information under a new `llm_prompt_details` key in the downloadable evaluation JSON, providing complete traceability for each request.
- **Adjusts the console entry point (`main.py`)** to maintain compatibility with the refactored pipeline function.
- app.py +14 -12
- src/fot_recommender/main.py +1 -1
- src/fot_recommender/prompts.py +50 -14
- src/fot_recommender/rag_pipeline.py +24 -4
|
@@ -7,7 +7,10 @@ import numpy as np
|
|
| 7 |
import sys
|
| 8 |
from pathlib import Path
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
| 11 |
FAISS_INDEX_PATH,
|
| 12 |
FINAL_KB_CHUNKS_PATH,
|
| 13 |
CITATIONS_PATH,
|
|
@@ -16,8 +19,8 @@ from fot_recommender.config import (
|
|
| 16 |
SEARCH_RESULT_COUNT_K,
|
| 17 |
MIN_SIMILARITY_SCORE,
|
| 18 |
)
|
| 19 |
-
from fot_recommender.utils import load_citations, format_evidence_for_display
|
| 20 |
-
from fot_recommender.rag_pipeline import (
|
| 21 |
load_knowledge_base,
|
| 22 |
initialize_embedding_model,
|
| 23 |
generate_recommendation_summary,
|
|
@@ -115,7 +118,7 @@ def get_recommendations_api(student_narrative, persona, password):
|
|
| 115 |
return
|
| 116 |
|
| 117 |
# 2. GENERATE
|
| 118 |
-
synthesized_recommendation = generate_recommendation_summary(
|
| 119 |
retrieved_chunks=retrieved_chunks_with_scores,
|
| 120 |
student_narrative=student_narrative,
|
| 121 |
api_key=FOT_GOOGLE_API_KEY,
|
|
@@ -136,8 +139,7 @@ def get_recommendations_api(student_narrative, persona, password):
|
|
| 136 |
evidence_list_str += (
|
| 137 |
f" - **Content Snippet:**\n > {evidence['content_snippet']}\n"
|
| 138 |
)
|
| 139 |
-
|
| 140 |
-
final_output = synthesized_recommendation + evidence_header + evidence_list_str
|
| 141 |
|
| 142 |
# 4. Assemble Evaluation Data
|
| 143 |
evaluation_data = {
|
|
@@ -154,8 +156,11 @@ def get_recommendations_api(student_narrative, persona, password):
|
|
| 154 |
}
|
| 155 |
for chunk, score in retrieved_chunks_with_scores
|
| 156 |
],
|
| 157 |
-
"
|
| 158 |
-
"
|
|
|
|
|
|
|
|
|
|
| 159 |
}
|
| 160 |
|
| 161 |
# 5. Create a temporary file for download
|
|
@@ -166,7 +171,7 @@ def get_recommendations_api(student_narrative, persona, password):
|
|
| 166 |
temp_file_path = f.name
|
| 167 |
|
| 168 |
yield (
|
| 169 |
-
|
| 170 |
gr.update(interactive=True),
|
| 171 |
gr.update(visible=True),
|
| 172 |
evaluation_data,
|
|
@@ -268,7 +273,4 @@ with gr.Blocks(theme=gr.themes.Soft(), css=CUSTOM_CSS) as interface: # type: ig
|
|
| 268 |
|
| 269 |
|
| 270 |
if __name__ == "__main__":
|
| 271 |
-
# Add project src to the sys.path for when running as a script
|
| 272 |
-
APP_ROOT = Path(__file__).parent
|
| 273 |
-
sys.path.insert(0, str(APP_ROOT / "src"))
|
| 274 |
interface.launch()
|
|
|
|
| 7 |
import sys
|
| 8 |
from pathlib import Path
|
| 9 |
|
| 10 |
+
APP_ROOT = Path(__file__).parent
|
| 11 |
+
sys.path.insert(0, str(APP_ROOT / "src"))
|
| 12 |
+
|
| 13 |
+
from fot_recommender.config import ( # noqa: E402
|
| 14 |
FAISS_INDEX_PATH,
|
| 15 |
FINAL_KB_CHUNKS_PATH,
|
| 16 |
CITATIONS_PATH,
|
|
|
|
| 19 |
SEARCH_RESULT_COUNT_K,
|
| 20 |
MIN_SIMILARITY_SCORE,
|
| 21 |
)
|
| 22 |
+
from fot_recommender.utils import load_citations, format_evidence_for_display # noqa: E402
|
| 23 |
+
from fot_recommender.rag_pipeline import ( # noqa: E402
|
| 24 |
load_knowledge_base,
|
| 25 |
initialize_embedding_model,
|
| 26 |
generate_recommendation_summary,
|
|
|
|
| 118 |
return
|
| 119 |
|
| 120 |
# 2. GENERATE
|
| 121 |
+
synthesized_recommendation, llm_prompt_details = generate_recommendation_summary(
|
| 122 |
retrieved_chunks=retrieved_chunks_with_scores,
|
| 123 |
student_narrative=student_narrative,
|
| 124 |
api_key=FOT_GOOGLE_API_KEY,
|
|
|
|
| 139 |
evidence_list_str += (
|
| 140 |
f" - **Content Snippet:**\n > {evidence['content_snippet']}\n"
|
| 141 |
)
|
| 142 |
+
final_ui_output = synthesized_recommendation + evidence_header + evidence_list_str
|
|
|
|
| 143 |
|
| 144 |
# 4. Assemble Evaluation Data
|
| 145 |
evaluation_data = {
|
|
|
|
| 156 |
}
|
| 157 |
for chunk, score in retrieved_chunks_with_scores
|
| 158 |
],
|
| 159 |
+
"llm_prompt_details": llm_prompt_details,
|
| 160 |
+
"outputs": {
|
| 161 |
+
"llm_synthesized_recommendation": synthesized_recommendation,
|
| 162 |
+
"final_formatted_ui_output": final_ui_output,
|
| 163 |
+
},
|
| 164 |
}
|
| 165 |
|
| 166 |
# 5. Create a temporary file for download
|
|
|
|
| 171 |
temp_file_path = f.name
|
| 172 |
|
| 173 |
yield (
|
| 174 |
+
final_ui_output,
|
| 175 |
gr.update(interactive=True),
|
| 176 |
gr.update(visible=True),
|
| 177 |
evaluation_data,
|
|
|
|
| 273 |
|
| 274 |
|
| 275 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
| 276 |
interface.launch()
|
|
@@ -87,7 +87,7 @@ def main():
|
|
| 87 |
if not api_key:
|
| 88 |
return "ERROR: FOT_GOOGLE_API_KEY not found. Please create a .env file and add your key."
|
| 89 |
|
| 90 |
-
synthesized_recommendation = generate_recommendation_summary(
|
| 91 |
top_interventions, student_query, api_key=api_key, persona="teacher"
|
| 92 |
)
|
| 93 |
|
|
|
|
| 87 |
if not api_key:
|
| 88 |
return "ERROR: FOT_GOOGLE_API_KEY not found. Please create a .env file and add your key."
|
| 89 |
|
| 90 |
+
synthesized_recommendation, _ = generate_recommendation_summary(
|
| 91 |
top_interventions, student_query, api_key=api_key, persona="teacher"
|
| 92 |
)
|
| 93 |
|
|
@@ -13,25 +13,61 @@ Based on the student's profile, the following intervention strategies have been
|
|
| 13 |
--- END CONTEXT ---
|
| 14 |
|
| 15 |
**Your Task:**
|
| 16 |
-
Synthesize the provided
|
| 17 |
|
| 18 |
**Instructions:**
|
| 19 |
-
1. Start with a brief summary of the student's key challenges.
|
| 20 |
-
2. Recommend 2-3 concrete, actionable strategies derived *only* from the provided
|
| 21 |
-
3. For each strategy, briefly explain *why* it is relevant to this student, citing the core ideas from the
|
| 22 |
-
4. Do not invent information. Ground your entire response in the provided
|
| 23 |
-
5. Format the output clearly using Markdown for readability.
|
| 24 |
""",
|
| 25 |
"parent": """
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
""",
|
| 31 |
"principal": """
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
""",
|
| 37 |
}
|
|
|
|
| 13 |
--- END CONTEXT ---
|
| 14 |
|
| 15 |
**Your Task:**
|
| 16 |
+
Synthesize the provided evidence to write a concise, actionable recommendation for the student's teacher.
|
| 17 |
|
| 18 |
**Instructions:**
|
| 19 |
+
1. Start with a brief, one-sentence summary of the student's key challenges.
|
| 20 |
+
2. Recommend 2-3 concrete, actionable strategies derived *only* from the provided evidence. These should be steps the teacher can take in their classroom or in collaboration with the school's support team.
|
| 21 |
+
3. For each strategy, briefly explain *why* it is relevant to this student, citing the core ideas from the source evidence.
|
| 22 |
+
4. Do not invent information or suggest strategies not present in the context. Ground your entire response in the provided evidence.
|
| 23 |
+
5. Format the output clearly using Markdown for readability (e.g., headings and bullet points).
|
| 24 |
""",
|
| 25 |
"parent": """
|
| 26 |
+
You are a helpful and empathetic AI guidance assistant designed to support parents. Your tone must be supportive, clear, and reassuring. You must avoid educational jargon, acronyms, and overly technical terms.
|
| 27 |
+
|
| 28 |
+
**Student's Situation:**
|
| 29 |
+
{student_narrative}
|
| 30 |
+
|
| 31 |
+
**Potential Support Strategies (Based on School Resources):**
|
| 32 |
+
Based on your child's situation, we've identified some effective support strategies from our resource library:
|
| 33 |
+
|
| 34 |
+
--- BEGIN CONTEXT ---
|
| 35 |
+
{context}
|
| 36 |
+
--- END CONTEXT ---
|
| 37 |
+
|
| 38 |
+
**Your Task:**
|
| 39 |
+
Synthesize the provided information into a supportive and easy-to-understand message for the student's parent or guardian.
|
| 40 |
+
|
| 41 |
+
**Instructions:**
|
| 42 |
+
1. Start with a warm, reassuring opening that summarizes the situation in simple, positive terms (e.g., "Thank you for being a partner in your child's success. Here are a few ways we can work together to help them get back on track.").
|
| 43 |
+
2. Translate the core ideas from the evidence into 2-3 simple, practical suggestions for how the parent can provide support at home.
|
| 44 |
+
3. For each suggestion, briefly explain in plain language how it can help their child.
|
| 45 |
+
4. Maintain a collaborative and non-judgmental tone throughout.
|
| 46 |
+
5. Conclude with an encouraging statement that emphasizes partnership between home and school.
|
| 47 |
+
6. Do not invent strategies. Base all suggestions on the core ideas presented in the context, but rephrase them for a parent audience.
|
| 48 |
""",
|
| 49 |
"principal": """
|
| 50 |
+
You are an expert AI strategist and administrative partner for a high school principal. Your tone is strategic, data-informed, and focused on resource allocation and system-level thinking.
|
| 51 |
+
|
| 52 |
+
**Student Profile:**
|
| 53 |
+
{student_narrative}
|
| 54 |
+
|
| 55 |
+
**Relevant Intervention Data:**
|
| 56 |
+
The following evidence-based intervention strategies have been retrieved that match the student's profile:
|
| 57 |
+
|
| 58 |
+
--- BEGIN CONTEXT ---
|
| 59 |
+
{context}
|
| 60 |
+
--- END CONTEXT ---
|
| 61 |
+
|
| 62 |
+
**Your Task:**
|
| 63 |
+
Synthesize the retrieved data into a strategic summary for the school principal. The output should be a high-level overview suitable for administrative action and planning.
|
| 64 |
+
|
| 65 |
+
**Instructions:**
|
| 66 |
+
1. Begin with a one-sentence "Executive Summary" of the student's on-track status and primary risk factors (e.g., "Student is off-track due to attendance and core course failure.").
|
| 67 |
+
2. Identify the strategic *type* of intervention needed based on the evidence (e.g., Tier 2 Academic Support, Mentoring, Behavioral Intervention).
|
| 68 |
+
3. Highlight any **resource or staffing implications** suggested by the evidence. This is critical. For example, if the evidence mentions 'Check & Connect', you should note the need for a dedicated staff monitor. If it mentions 'tutoring', note the need for qualified tutors.
|
| 69 |
+
4. Recommend a clear, actionable next step for the principal or their designee (e.g., "Task the Freshman Success Team with creating a BAG report," or "Recommend counselor initiate a Check & Connect protocol.").
|
| 70 |
+
5. Ground all recommendations in the provided context. Do not invent information.
|
| 71 |
+
6. Keep the entire summary concise and formatted for quick reading.
|
| 72 |
""",
|
| 73 |
}
|
|
@@ -109,14 +109,21 @@ def generate_recommendation_summary(
|
|
| 109 |
api_key: str,
|
| 110 |
persona: str = "teacher",
|
| 111 |
model_name: str = GENERATIVE_MODEL_NAME,
|
| 112 |
-
) -> str:
|
| 113 |
"""
|
| 114 |
Generates a synthesized recommendation using the Google Gemini API.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
"""
|
| 116 |
genai.configure(api_key=api_key) # type: ignore
|
| 117 |
|
|
|
|
| 118 |
if persona not in PROMPT_TEMPLATES:
|
| 119 |
-
|
|
|
|
| 120 |
|
| 121 |
context = ""
|
| 122 |
for i, (chunk, _) in enumerate(retrieved_chunks):
|
|
@@ -130,6 +137,18 @@ def generate_recommendation_summary(
|
|
| 130 |
student_narrative=student_narrative, context=context
|
| 131 |
)
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
try:
|
| 134 |
print(
|
| 135 |
f"\nSynthesizing recommendation for persona: '{persona}' using {model_name}..."
|
|
@@ -137,6 +156,7 @@ def generate_recommendation_summary(
|
|
| 137 |
model = genai.GenerativeModel(model_name) # type: ignore
|
| 138 |
response = model.generate_content(prompt)
|
| 139 |
print("Synthesis complete.")
|
| 140 |
-
return response.text
|
| 141 |
except Exception as e:
|
| 142 |
-
|
|
|
|
|
|
| 109 |
api_key: str,
|
| 110 |
persona: str = "teacher",
|
| 111 |
model_name: str = GENERATIVE_MODEL_NAME,
|
| 112 |
+
) -> Tuple[str, Dict[str, Any]]: # Return text and a details dictionary
|
| 113 |
"""
|
| 114 |
Generates a synthesized recommendation using the Google Gemini API.
|
| 115 |
+
|
| 116 |
+
Returns:
|
| 117 |
+
A tuple containing:
|
| 118 |
+
- The synthesized recommendation text (str).
|
| 119 |
+
- A dictionary with detailed prompt information for logging (Dict).
|
| 120 |
"""
|
| 121 |
genai.configure(api_key=api_key) # type: ignore
|
| 122 |
|
| 123 |
+
prompt_details = {}
|
| 124 |
if persona not in PROMPT_TEMPLATES:
|
| 125 |
+
error_message = f"ERROR: Persona '{persona}' is not a valid choice."
|
| 126 |
+
return error_message, {"error": error_message}
|
| 127 |
|
| 128 |
context = ""
|
| 129 |
for i, (chunk, _) in enumerate(retrieved_chunks):
|
|
|
|
| 137 |
student_narrative=student_narrative, context=context
|
| 138 |
)
|
| 139 |
|
| 140 |
+
# --- Assemble the prompt dictionary ---
|
| 141 |
+
prompt_details = {
|
| 142 |
+
"persona": persona,
|
| 143 |
+
"llm_model_used": model_name,
|
| 144 |
+
"prompt_template": prompt_template,
|
| 145 |
+
"prompt_variables": {
|
| 146 |
+
"student_narrative": student_narrative,
|
| 147 |
+
"context": context,
|
| 148 |
+
},
|
| 149 |
+
"final_prompt_text": prompt,
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
try:
|
| 153 |
print(
|
| 154 |
f"\nSynthesizing recommendation for persona: '{persona}' using {model_name}..."
|
|
|
|
| 156 |
model = genai.GenerativeModel(model_name) # type: ignore
|
| 157 |
response = model.generate_content(prompt)
|
| 158 |
print("Synthesis complete.")
|
| 159 |
+
return response.text, prompt_details
|
| 160 |
except Exception as e:
|
| 161 |
+
error_message = f"An error occurred while calling the Gemini API: {e}"
|
| 162 |
+
return error_message, prompt_details
|