chuckfinca's picture
Refactor: Polish project for review
d9d86e9
import os
from dotenv.main import load_dotenv
from fot_recommender.config import PROCESSED_DATA_DIR
from fot_recommender.rag_pipeline import (
load_knowledge_base,
initialize_embedding_model,
create_embeddings,
create_vector_db,
search_interventions,
generate_recommendation_summary,
)
# --- Sample Student Profile from Project Description ---
sample_student_profile = {
"student_id": "9th_Grader_A",
"data_context": "End of First Semester",
"indicators": {
"credits_earned": 2.5,
"core_course_failures": 1,
"attendance_percentage": 88,
"behavioral_flags": 1,
},
"narrative_summary_for_embedding": "This student is struggling to keep up with coursework, "
"having failed one core class and earning only 2.5 credits out of 4 credits "
"expected for the semester. Attendance is becoming a concern at 88% for an average "
"annual target of 90%, and they have had one behavioral incident. "
"The student needs targeted academic and attendance support to get back on track for graduation.",
}
def main():
"""
Main entry point for the FOT Intervention Recommender application.
This script now executes Phase 2 of the implementation plan:
1. Loads the final, chunked knowledge base.
2. Initializes the embedding model.
3. Creates vector embeddings for the knowledge base.
4. Sets up a FAISS vector database.
5. Tests the retrieval system with the sample student profile.
"""
print("--- FOT Intervention Recommender ---")
# --- Load the final knowledge base ---
final_chunks_path = PROCESSED_DATA_DIR / "knowledge_base_final_chunks.json"
knowledge_base_chunks = load_knowledge_base(str(final_chunks_path))
if not knowledge_base_chunks:
print("Halting execution due to missing knowledge base.")
return
print(f"Successfully loaded {len(knowledge_base_chunks)} processed chunks.")
print("-" * 50)
# --- Vector Embedding Setup ---
embedding_model = initialize_embedding_model()
# --- Create Embeddings for Knowledge Base ---
embeddings = create_embeddings(knowledge_base_chunks, embedding_model)
# --- Set up FAISS Vector Database ---
vector_db = create_vector_db(embeddings)
print("-" * 50)
# --- Test Retrieval with Sample Student Profile ---
student_query = sample_student_profile["narrative_summary_for_embedding"]
# Find the top 3 most relevant interventions
top_interventions = search_interventions(
query=student_query,
model=embedding_model,
index=vector_db,
knowledge_base=knowledge_base_chunks,
min_similarity_score=0.4,
k=3,
)
if not top_interventions:
print("Could not find relevant interventions for the student.")
return
# --- 4. Generate Synthesized Recommendation (for 'teacher' persona) ---
load_dotenv()
api_key = os.getenv("FOT_GOOGLE_API_KEY")
if not api_key:
return "ERROR: FOT_GOOGLE_API_KEY is not set. Create a .env file with FOT_GOOGLE_API_KEY='YOUR_KEY_HERE'. Get key: https://aistudio.google.com/apikey"
synthesized_recommendation, _ = generate_recommendation_summary(
top_interventions, student_query, api_key=api_key, persona="teacher"
)
# --- 5. Display Final Output ---
print("\n" + "=" * 50)
print(" FINAL SYNTHESIZED RECOMMENDATION FOR EDUCATOR")
print("=" * 50 + "\n")
print(synthesized_recommendation)
print("\n" + "-" * 50)
print("Evidence retrieved from the following sources:")
for chunk, score in top_interventions:
print(
f"- {chunk['title']} (Source: {chunk['source_document']}, Relevance: {score:.2f})"
)
print("\n\n✅ Full RAG process complete!")
if __name__ == "__main__":
main()