Spaces:
Sleeping
Sleeping
File size: 5,540 Bytes
7644eac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
"""
Initialize the vector database with sample educational resources.
This provides some starter content for the Learning Path Generator.
"""
import os
import json
from pathlib import Path
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Ensure OPENAI API key is set
if not os.getenv("OPENAI_API_KEY"):
print("ERROR: OPENAI_API_KEY not set in environment variables")
print("Please update your .env file with your API key")
exit(1)
# Import after checking API key
from src.data.document_store import DocumentStore
from src.data.resources import ResourceManager
from langchain.schema.document import Document
def load_sample_resources():
"""Load sample resources from JSON file"""
resources_path = Path("samples/sample_resources.json")
if not resources_path.exists():
# Create directory if it doesn't exist
resources_path.parent.mkdir(exist_ok=True, parents=True)
# Create sample resources file with basic content
sample_resources = [
{
"title": "Introduction to Machine Learning",
"type": "course",
"description": "A comprehensive beginner's course covering ML fundamentals",
"difficulty": "beginner",
"time_estimate": "10 hours",
"url": "https://example.com/intro-ml",
"topic": "machine learning",
"learning_styles": ["visual", "reading"]
},
{
"title": "Python for Data Science Handbook",
"type": "book",
"description": "Essential guide to using Python for data analysis and ML",
"difficulty": "intermediate",
"time_estimate": "20 hours",
"url": "https://jakevdp.github.io/PythonDataScienceHandbook/",
"topic": "python,data science",
"learning_styles": ["reading"]
},
{
"title": "Web Development Bootcamp",
"type": "course",
"description": "Full stack web development from scratch",
"difficulty": "beginner",
"time_estimate": "40 hours",
"url": "https://example.com/web-dev-bootcamp",
"topic": "web development",
"learning_styles": ["visual", "kinesthetic"]
},
{
"title": "Advanced JavaScript Patterns",
"type": "video",
"description": "Deep dive into advanced JS design patterns",
"difficulty": "advanced",
"time_estimate": "3 hours",
"url": "https://example.com/js-patterns",
"topic": "javascript",
"learning_styles": ["visual", "auditory"]
},
{
"title": "Spanish Learning Podcast",
"type": "podcast",
"description": "Learn Spanish through immersive audio lessons",
"difficulty": "beginner",
"time_estimate": "10 hours",
"url": "https://example.com/spanish-podcast",
"topic": "spanish,language learning",
"learning_styles": ["auditory"]
}
]
with open(resources_path, "w") as f:
json.dump(sample_resources, f, indent=2)
print(f"Created sample resources file at {resources_path}")
return sample_resources
else:
# Load existing resources
with open(resources_path, "r") as f:
return json.load(f)
def initialize_database():
"""Initialize the vector database with sample resources"""
print("Initializing vector database...")
# Create document store
document_store = DocumentStore()
# Load sample resources
resources = load_sample_resources()
# Convert to Document objects
documents = []
for resource in resources:
# Create content from resource information
content = f"""
Title: {resource['title']}
Description: {resource['description']}
Type: {resource['type']}
Difficulty: {resource['difficulty']}
Topics: {resource.get('topic', '')}
"""
# Create metadata
metadata = {
"title": resource["title"],
"type": resource["type"],
"difficulty": resource["difficulty"],
"url": resource["url"],
"topic": resource.get("topic", "").split(",")
}
# Add learning styles if available
if "learning_styles" in resource:
metadata["learning_styles"] = resource["learning_styles"]
# Create document
doc = Document(page_content=content, metadata=metadata)
documents.append(doc)
# Add documents to vector store
document_store.add_documents(documents)
print(f"Added {len(documents)} sample resources to vector database")
# Test search functionality
print("\nTesting search functionality...")
results = document_store.search_documents("machine learning beginner", top_k=2)
print(f"Found {len(results)} results for 'machine learning beginner'")
for result in results:
print(f"- {result.metadata.get('title')} (Relevance: {result.metadata.get('relevance_score', 0):.2f})")
print("\nDatabase initialization complete!")
if __name__ == "__main__":
initialize_database()
|