|
|
""" |
|
|
Initialize the vector database with sample educational resources. |
|
|
This provides some starter content for the Learning Path Generator. |
|
|
""" |
|
|
import os |
|
|
import json |
|
|
from pathlib import Path |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
if not os.getenv("OPENAI_API_KEY"): |
|
|
print("ERROR: OPENAI_API_KEY not set in environment variables") |
|
|
print("Please update your .env file with your API key") |
|
|
exit(1) |
|
|
|
|
|
|
|
|
from src.data.document_store import DocumentStore |
|
|
from src.data.resources import ResourceManager |
|
|
from langchain.schema.document import Document |
|
|
|
|
|
def load_sample_resources(): |
|
|
"""Load sample resources from JSON file""" |
|
|
resources_path = Path("samples/sample_resources.json") |
|
|
|
|
|
if not resources_path.exists(): |
|
|
|
|
|
resources_path.parent.mkdir(exist_ok=True, parents=True) |
|
|
|
|
|
|
|
|
sample_resources = [ |
|
|
{ |
|
|
"title": "Introduction to Machine Learning", |
|
|
"type": "course", |
|
|
"description": "A comprehensive beginner's course covering ML fundamentals", |
|
|
"difficulty": "beginner", |
|
|
"time_estimate": "10 hours", |
|
|
"url": "https://example.com/intro-ml", |
|
|
"topic": "machine learning", |
|
|
"learning_styles": ["visual", "reading"] |
|
|
}, |
|
|
{ |
|
|
"title": "Python for Data Science Handbook", |
|
|
"type": "book", |
|
|
"description": "Essential guide to using Python for data analysis and ML", |
|
|
"difficulty": "intermediate", |
|
|
"time_estimate": "20 hours", |
|
|
"url": "https://jakevdp.github.io/PythonDataScienceHandbook/", |
|
|
"topic": "python,data science", |
|
|
"learning_styles": ["reading"] |
|
|
}, |
|
|
{ |
|
|
"title": "Web Development Bootcamp", |
|
|
"type": "course", |
|
|
"description": "Full stack web development from scratch", |
|
|
"difficulty": "beginner", |
|
|
"time_estimate": "40 hours", |
|
|
"url": "https://example.com/web-dev-bootcamp", |
|
|
"topic": "web development", |
|
|
"learning_styles": ["visual", "kinesthetic"] |
|
|
}, |
|
|
{ |
|
|
"title": "Advanced JavaScript Patterns", |
|
|
"type": "video", |
|
|
"description": "Deep dive into advanced JS design patterns", |
|
|
"difficulty": "advanced", |
|
|
"time_estimate": "3 hours", |
|
|
"url": "https://example.com/js-patterns", |
|
|
"topic": "javascript", |
|
|
"learning_styles": ["visual", "auditory"] |
|
|
}, |
|
|
{ |
|
|
"title": "Spanish Learning Podcast", |
|
|
"type": "podcast", |
|
|
"description": "Learn Spanish through immersive audio lessons", |
|
|
"difficulty": "beginner", |
|
|
"time_estimate": "10 hours", |
|
|
"url": "https://example.com/spanish-podcast", |
|
|
"topic": "spanish,language learning", |
|
|
"learning_styles": ["auditory"] |
|
|
} |
|
|
] |
|
|
|
|
|
with open(resources_path, "w") as f: |
|
|
json.dump(sample_resources, f, indent=2) |
|
|
|
|
|
print(f"Created sample resources file at {resources_path}") |
|
|
return sample_resources |
|
|
else: |
|
|
|
|
|
with open(resources_path, "r") as f: |
|
|
return json.load(f) |
|
|
|
|
|
def initialize_database(): |
|
|
"""Initialize the vector database with sample resources""" |
|
|
print("Initializing vector database...") |
|
|
|
|
|
|
|
|
document_store = DocumentStore() |
|
|
|
|
|
|
|
|
resources = load_sample_resources() |
|
|
|
|
|
|
|
|
documents = [] |
|
|
for resource in resources: |
|
|
|
|
|
content = f""" |
|
|
Title: {resource['title']} |
|
|
Description: {resource['description']} |
|
|
Type: {resource['type']} |
|
|
Difficulty: {resource['difficulty']} |
|
|
Topics: {resource.get('topic', '')} |
|
|
""" |
|
|
|
|
|
|
|
|
metadata = { |
|
|
"title": resource["title"], |
|
|
"type": resource["type"], |
|
|
"difficulty": resource["difficulty"], |
|
|
"url": resource["url"], |
|
|
"topic": resource.get("topic", "").split(",") |
|
|
} |
|
|
|
|
|
|
|
|
if "learning_styles" in resource: |
|
|
metadata["learning_styles"] = resource["learning_styles"] |
|
|
|
|
|
|
|
|
doc = Document(page_content=content, metadata=metadata) |
|
|
documents.append(doc) |
|
|
|
|
|
|
|
|
document_store.add_documents(documents) |
|
|
print(f"Added {len(documents)} sample resources to vector database") |
|
|
|
|
|
|
|
|
print("\nTesting search functionality...") |
|
|
results = document_store.search_documents("machine learning beginner", top_k=2) |
|
|
print(f"Found {len(results)} results for 'machine learning beginner'") |
|
|
for result in results: |
|
|
print(f"- {result.metadata.get('title')} (Relevance: {result.metadata.get('relevance_score', 0):.2f})") |
|
|
|
|
|
print("\nDatabase initialization complete!") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
initialize_database() |
|
|
|