Spaces:

crackbit
/

ai-learning-path-generator

Sleeping

App Files Files Community

“shubhamdhamal” commited on Jan 17

Commit

7644eac

1 Parent(s): d25847c

Deploy Flask app with Docker

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +22 -0
Dockerfile +47 -0
README.md +19 -6
backend/Dockerfile +20 -0
backend/Procfile +1 -0
backend/__init__.py +4 -0
backend/app.py +44 -0
backend/requirements.txt +7 -0
backend/routes.py +228 -0
clear_cache.py +56 -0
config.py +37 -0
fix_colors.py +154 -0
fix_learning_path_indentation.py +86 -0
init_db.py +21 -0
init_render_db.py +87 -0
initialize_db.py +149 -0
migrations/README +1 -0
migrations/add_chatbot_tables.py +129 -0
migrations/add_conversation_memory.sql +19 -0
migrations/add_resource_progress.py +39 -0
migrations/alembic.ini +50 -0
migrations/env.py +113 -0
migrations/script.py.mako +24 -0
migrations/versions/12d5dfb6fd16_sync_users_table.py +111 -0
migrations/versions/39d22a91999a_initial_migration.py +75 -0
migrations/versions/6b20f44f6a00_make_oauth_user_id_nullable.py +36 -0
migrations/versions/9f32f1920608_add_oauth_table_for_flask_dance.py +46 -0
migrations/versions/a1b2c3d4e5f6_add_progress_tracking_tables.py +86 -0
minimal_test.py +38 -0
requirements.txt +76 -0
run.py +79 -0
run_flask.py +23 -0
setup.py +28 -0
src/__init__.py +1 -0
src/agent.py +577 -0
src/agents/__init__.py +9 -0
src/agents/base_agent.py +234 -0
src/agents/research_agent.py +323 -0
src/agents/teaching_agent.py +356 -0
src/data/bm25_retriever.py +173 -0
src/data/document_store.py +973 -0
src/data/resources.py +202 -0
src/data/skills_database.py +999 -0
src/data/vector_store.py +173 -0
src/direct_openai.py +107 -0
src/learning_path.py +916 -0
src/ml/context_compressor.py +182 -0
src/ml/embeddings.py +130 -0
src/ml/job_market.py +177 -0
src/ml/model_orchestrator.py +1187 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,22 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# Environment
+.env
+.venv
+venv/
+ENV/
+# IDE
+.vscode/
+.idea/
+# Cache
+cache/
+*.cache
+# Local data
+*.sqlite3
+*.db

Dockerfile ADDED Viewed

	@@ -0,0 +1,47 @@

+# Hugging Face Spaces Dockerfile
+# Reference: https://huggingface.co/docs/hub/spaces-sdks-docker
+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    g++ \
+    libmagic1 \
+    && rm -rf /var/lib/apt/lists/*
+# Create a non-root user (required by Hugging Face Spaces)
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set working directory for user
+WORKDIR $HOME/app
+# Copy requirements first for caching
+COPY --chown=user requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY --chown=user . .
+# Create necessary directories with proper permissions
+RUN mkdir -p vector_db cache learning_paths
+# Hugging Face Spaces requires port 7860
+EXPOSE 7860
+# Set environment variables for Hugging Face
+ENV PORT=7860
+ENV FLASK_ENV=production
+ENV PYTHONUNBUFFERED=1
+# Run the Flask app with gunicorn
+CMD ["gunicorn", "run:app", "--bind", "0.0.0.0:7860", "--workers", "2", "--timeout", "120"]

README.md CHANGED Viewed

@@ -1,12 +1,25 @@
 ---
-title: Ai Learning Path Generator
-emoji: 📊
-colorFrom: red
-colorTo: green
 sdk: docker
 pinned: false
 license: mit
-short_description: LLM Based ai-learning-path-generator
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: AI Learning Path Generator
+emoji: 🎓
+colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
 license: mit
+app_port: 7860
+short_description: LLM Based AI Learning Path Generator
 ---
+# AI Learning Path Generator
+Generate personalized AI-powered learning paths for any topic.
+## Features
+- 🎯 Personalized learning path generation
+- 🤖 AI-powered content curation
+- 📚 Structured curriculum creation
+## Setup
+Configure these secrets in your Space settings:
+- `OPENAI_API_KEY` - Your OpenAI API key
+- `SECRET_KEY` - Flask secret key

backend/Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Copy backend requirements
+COPY backend/requirements.txt /app/backend/requirements.txt
+# Install dependencies
+RUN pip install --no-cache-dir -r backend/requirements.txt
+# Copy backend code
+COPY backend/ /app/backend/
+# Copy .env if exists
+COPY .env* /app/
+EXPOSE 5000
+# Use shell form so $PORT expands on Render
+CMD ["sh", "-c", "gunicorn backend.app:app --bind 0.0.0.0:${PORT:-5000} --workers 2 --timeout 30"]

backend/Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: gunicorn backend.app:app --bind 0.0.0.0:$PORT --workers 2 --timeout 30

backend/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+Backend API for AI Learning Path Generator
+Lightweight Flask API that queues tasks and returns status
+"""

backend/app.py ADDED Viewed

	@@ -0,0 +1,44 @@

+"""
+Unified Flask + React Application
+Serves React frontend at root, Flask API routes, and OAuth
+"""
+from backend.routes import api_bp
+from web_app import create_app
+import os
+from flask import jsonify
+from flask_cors import CORS
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Create the main app using the existing web_app factory (includes DB, OAuth, routes)
+app = create_app()
+# Register the lightweight API blueprint for RQ task orchestration under /api
+app.register_blueprint(api_bp, url_prefix='/api')
+# Enable CORS for the React frontend, mobile app, and allow cookies for auth
+frontend_origin = os.getenv('FRONTEND_ORIGIN', 'http://localhost:3000')
+allowed_origins = [
+    frontend_origin,
+    "http://localhost:3000",
+    "http://localhost:8081",   # Expo mobile app
+    "http://127.0.0.1:8081",
+    "http://localhost:19006",  # Expo web
+]
+CORS(
+    app,
+    resources={r"/*": {"origins": allowed_origins}},
+    supports_credentials=True,
+)
+@app.route('/health')
+def health():
+    return jsonify({"status": "healthy", "service": "api+web"}), 200
+if __name__ == '__main__':
+    port = int(os.getenv('PORT', 5000))
+    app.run(host='0.0.0.0', port=port, debug=False)

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+# Backend API requirements (lightweight)
+Flask>=2.0.1
+flask-cors>=4.0.0
+python-dotenv==1.0.1
+redis>=5.0.0
+rq==1.16.1
+gunicorn>=21.2.0

backend/routes.py ADDED Viewed

	@@ -0,0 +1,228 @@

+"""
+API Routes for task management
+"""
+import os
+import uuid
+import json
+from flask import Blueprint, request, jsonify
+from datetime import datetime
+api_bp = Blueprint('rq_api', __name__)
+# Redis connection - optional for local development
+# Note: decode_responses=False is required for RQ (job results are pickled bytes, not strings)
+redis_client = None
+REDIS_URL = os.getenv('REDIS_URL')
+def get_redis_client():
+    """Lazy load Redis client to avoid errors if Redis is not available"""
+    global redis_client
+    if redis_client is not None:
+        return redis_client
+    try:
+        import redis
+        if REDIS_URL and REDIS_URL.startswith(('redis://', 'rediss://')):
+            if REDIS_URL.startswith('rediss://'):
+                redis_client = redis.from_url(
+                    REDIS_URL, decode_responses=False, ssl_cert_reqs=None)
+            else:
+                redis_client = redis.from_url(
+                    REDIS_URL, decode_responses=False)
+        else:
+            redis_client = redis.Redis(
+                host=os.getenv('REDIS_HOST', 'localhost'),
+                port=int(os.getenv('REDIS_PORT', 6379)),
+                db=int(os.getenv('REDIS_DB', 0)),
+                decode_responses=False
+            )
+        # Test connection
+        redis_client.ping()
+        return redis_client
+    except Exception as e:
+        print(f"Redis not available: {e}")
+        redis_client = None
+        return None
+# In-memory storage for synchronous task results (for local dev without Redis)
+sync_task_results = {}
+@api_bp.route('/generate', methods=['POST'])
+def generate_path():
+    """
+    Generate a learning path. Uses RQ queue if Redis is available,
+    otherwise runs synchronously for local development.
+    Returns the job ID immediately (async) or result directly (sync).
+    """
+    try:
+        data = request.get_json()
+        # Validate required fields
+        required_fields = ['topic', 'expertise_level',
+                           'duration_weeks', 'time_commitment']
+        for field in required_fields:
+            if field not in data:
+                return jsonify({"error": f"Missing required field: {field}"}), 400
+        # Try to use Redis/RQ for async processing
+        redis_conn = get_redis_client()
+        if redis_conn:
+            try:
+                from rq import Queue
+                q = Queue('learning-paths', connection=redis_conn)
+                job = q.enqueue(
+                    'worker.tasks.generate_learning_path_for_worker', data)
+                return jsonify({
+                    "task_id": job.id,
+                    "status": "queued",
+                    "message": "Learning path generation started"
+                }), 202
+            except Exception as rq_error:
+                print(f"RQ error, falling back to sync: {rq_error}")
+        # Fallback: Run synchronously for local development
+        task_id = str(uuid.uuid4())
+        sync_task_results[task_id] = {"status": "processing"}
+        try:
+            # Import and run the generation function directly
+            from src.learning_path import LearningPathGenerator
+            generator = LearningPathGenerator()
+            # Normalize goals
+            goals_raw = data.get('goals')
+            if isinstance(goals_raw, list):
+                goals = goals_raw
+            elif isinstance(goals_raw, str) and goals_raw.strip():
+                goals = [goals_raw.strip()]
+            else:
+                goals = None
+            learning_path = generator.generate_path(
+                topic=data['topic'],
+                expertise_level=data['expertise_level'],
+                learning_style=None,
+                time_commitment=data.get('time_commitment', '5-10 hours/week'),
+                duration_weeks=int(data['duration_weeks']),
+                goals=goals,
+                ai_provider=data.get('ai_provider', 'openrouter'),
+                ai_model=data.get('ai_model')
+            )
+            result = learning_path.dict() if hasattr(
+                learning_path, 'dict') else learning_path
+            sync_task_results[task_id] = {
+                "status": "finished",
+                "result": result
+            }
+            return jsonify({
+                "task_id": task_id,
+                "status": "finished",
+                "message": "Learning path generated successfully",
+                "result": result
+            }), 200
+        except Exception as gen_error:
+            sync_task_results[task_id] = {
+                "status": "failed",
+                "error": str(gen_error)
+            }
+            return jsonify({
+                "task_id": task_id,
+                "status": "failed",
+                "error": str(gen_error)
+            }), 500
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@api_bp.route('/status/<task_id>', methods=['GET'])
+def get_status(task_id):
+    """
+    Get the current status of a task (RQ job or sync task)
+    """
+    try:
+        # Check sync task results first
+        if task_id in sync_task_results:
+            task = sync_task_results[task_id]
+            resp = {
+                "task_id": task_id,
+                "status": task["status"]
+            }
+            if task["status"] == "finished":
+                resp["result"] = task.get("result")
+            if task["status"] == "failed":
+                resp["error"] = task.get("error")
+            return jsonify(resp), 200
+        # Try Redis/RQ
+        redis_conn = get_redis_client()
+        if redis_conn:
+            from rq import Queue
+            q = Queue('learning-paths', connection=redis_conn)
+            job = q.fetch_job(task_id)
+            if job is None:
+                return jsonify({"error": "Task not found"}), 404
+            resp = {
+                "task_id": job.id,
+                "status": job.get_status()
+            }
+            if job.is_finished:
+                resp["result"] = job.result
+            if job.is_failed:
+                resp["error"] = str(job.exc_info)
+            return jsonify(resp), 200
+        return jsonify({"error": "Task not found"}), 404
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@api_bp.route('/result/<task_id>', methods=['GET'])
+def get_result(task_id):
+    """
+    Get the final result of a task (RQ job or sync task)
+    """
+    try:
+        # Check sync task results first
+        if task_id in sync_task_results:
+            task = sync_task_results[task_id]
+            if task["status"] == "finished":
+                return jsonify(task.get("result", {})), 200
+            elif task["status"] == "failed":
+                return jsonify({"error": task.get("error")}), 500
+            else:
+                return jsonify({
+                    "error": "Task not yet complete",
+                    "status": task["status"]
+                }), 202
+        # Try Redis/RQ
+        redis_conn = get_redis_client()
+        if redis_conn:
+            from rq import Queue
+            q = Queue('learning-paths', connection=redis_conn)
+            job = q.fetch_job(task_id)
+            if job is None:
+                return jsonify({"error": "Task not found"}), 404
+            if not job.is_finished:
+                return jsonify({
+                    "error": "Task not yet complete",
+                    "status": job.get_status()
+                }), 202
+            return jsonify(job.result), 200
+        return jsonify({"error": "Task not found"}), 404
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500

clear_cache.py ADDED Viewed

	@@ -0,0 +1,56 @@

+"""
+Simple script to clear the Redis cache.
+Run this when you need to reset all cached learning paths.
+"""
+import redis
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+REDIS_HOST = os.getenv('REDIS_HOST', 'localhost')
+REDIS_PORT = int(os.getenv('REDIS_PORT', 6379))
+REDIS_PASSWORD = os.getenv('REDIS_PASSWORD', '').strip()  # Strip whitespace
+REDIS_DB = int(os.getenv('REDIS_DB', 0))
+print(f"🔍 Connecting to Redis at {REDIS_HOST}:{REDIS_PORT} (password: {'set' if REDIS_PASSWORD else 'none'})")
+try:
+    # Build Redis connection params
+    redis_params = {
+        'host': REDIS_HOST,
+        'port': REDIS_PORT,
+        'db': REDIS_DB,
+        'decode_responses': True
+    }
+    # Only add password if it's not empty
+    if REDIS_PASSWORD:
+        redis_params['password'] = REDIS_PASSWORD
+        print("🔐 Using password authentication")
+    redis_client = redis.Redis(**redis_params)
+    # Get all cache keys
+    path_keys = list(redis_client.scan_iter(match="path_cache:*"))
+    semantic_keys = list(redis_client.scan_iter(match="semantic_cache:*"))
+    total_keys = len(path_keys) + len(semantic_keys)
+    if total_keys == 0:
+        print("✅ Cache is already empty!")
+    else:
+        # Delete all cache keys
+        if path_keys:
+            redis_client.delete(*path_keys)
+            print(f"🗑️  Deleted {len(path_keys)} learning path cache entries")
+        if semantic_keys:
+            redis_client.delete(*semantic_keys)
+            print(f"🗑️  Deleted {len(semantic_keys)} semantic cache entries")
+        print(f"✅ Successfully cleared {total_keys} total cache entries!")
+except Exception as e:
+    print(f"❌ Error clearing cache: {e}")
+    print("Make sure Redis is running and your .env file is configured correctly.")

config.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+from dotenv import load_dotenv
+basedir = os.path.abspath(os.path.dirname(__file__))
+# Load .env file only if not on Render
+if not os.environ.get('RENDER'):
+    load_dotenv(os.path.join(basedir, '.env'))
+# Set Flask app for CLI commands (needed for flask db upgrade)
+os.environ.setdefault('FLASK_APP', 'run.py')
+class Config:
+    SECRET_KEY = os.environ.get('FLASK_SECRET_KEY') or 'dev-secret-key-change-in-production-2024'
+    SQLALCHEMY_DATABASE_URI = os.environ.get('DATABASE_URL') or \
+        'sqlite:///' + os.path.join(basedir, 'app.db')
+    SQLALCHEMY_TRACK_MODIFICATIONS = False
+    # Session configuration
+    SESSION_COOKIE_HTTPONLY = True
+    SESSION_COOKIE_SAMESITE = 'Lax'  # Default for local development
+    PERMANENT_SESSION_LIFETIME = 7200  # 2 hours
+    SESSION_REFRESH_EACH_REQUEST = True  # Refresh session on each request
+    SESSION_USE_SIGNER = True  # Sign session cookies for security
+    SESSION_COOKIE_NAME = 'learning_path_session'  # Custom session cookie name
+    # Ensure cookies work with OAuth redirects in production
+    if os.environ.get('RENDER'):
+        SESSION_COOKIE_SECURE = True       # Cookie only over HTTPS
+        SESSION_COOKIE_SAMESITE = 'None'   # Allow cross-site OAuth redirect
+        REMEMBER_COOKIE_SECURE = True
+        REMEMBER_COOKIE_SAMESITE = 'None'
+    else:
+        # Local development - allow HTTP cookies
+        SESSION_COOKIE_SECURE = False
+        REMEMBER_COOKIE_SECURE = False
+    LOG_TO_STDOUT = os.environ.get('LOG_TO_STDOUT')

fix_colors.py ADDED Viewed

	@@ -0,0 +1,154 @@

+#!/usr/bin/env python3
+"""
+Color Fix Script for AI Learning Path Generator
+Automatically replaces white backgrounds and bright colors with dark glassmorphic theme
+"""
+import os
+import shutil
+from pathlib import Path
+def backup_file(filepath):
+    """Create a backup of the original file"""
+    backup_path = f"{filepath}.backup"
+    shutil.copy2(filepath, backup_path)
+    print(f"✅ Backup created: {backup_path}")
+    return backup_path
+def fix_colors(filepath):
+    """Apply color fixes to the template file"""
+    print(f"\n🎨 Fixing colors in: {filepath}")
+    # Read the file
+    with open(filepath, 'r', encoding='utf-8') as f:
+        content = f.read()
+    # Track changes
+    changes = 0
+    # 1. Replace white backgrounds with glass-card
+    replacements = [
+        ('bg-white rounded-xl shadow-xl', 'glass-card'),
+        ('bg-white rounded-lg shadow-xl', 'glass-card'),
+        ('bg-white rounded-lg shadow-md', 'glass-card'),
+        ('bg-white rounded-lg shadow', 'glass-card'),
+        ('bg-white p-4 rounded-lg shadow', 'glass-card p-4'),
+        ('bg-white p-8 rounded-xl', 'glass-card p-8'),
+        ('bg-gray-100', 'glass-card'),
+        ('bg-gray-50', 'glass-card'),
+        ('bg-gray-200', 'glass-card'),
+    ]
+    for old, new in replacements:
+        count = content.count(old)
+        if count > 0:
+            content = content.replace(old, new)
+            changes += count
+            print(f"  ✓ Replaced '{old}' → '{new}' ({count} times)")
+    # 2. Replace text colors
+    text_replacements = [
+        ('text-gray-900', 'text-white'),
+        ('text-gray-800', 'text-white'),
+        ('text-gray-700', 'text-secondary'),
+        ('text-gray-600', 'text-secondary'),
+        ('text-gray-500', 'text-muted'),
+        ('text-magenta', 'text-neon-purple'),
+    ]
+    border_replacements = [
+        ('border-gray-200', 'border-transparent'),
+        ('border-gray-300', 'border-glass'),
+    ]
+    for old, new in text_replacements:
+        count = content.count(old)
+        if count > 0:
+            content = content.replace(old, new)
+            changes += count
+            print(f"  ✓ Replaced '{old}' → '{new}' ({count} times)")
+    for old, new in border_replacements:
+        count = content.count(old)
+        if count > 0:
+            content = content.replace(old, new)
+            changes += count
+            print(f"  ✓ Replaced '{old}' → '{new}' ({count} times)")
+    # 3. Fix specific sections
+    specific_fixes = [
+        # Learning Journey title
+        ('<h3 class="text-2xl font-bold text-white mb-6">Your Learning Journey</h3>',
+         '<h3 class="text-2xl font-bold text-white mb-6">Your Learning <span class="text-neon-cyan">Journey</span></h3>'),
+        # Milestones title
+        ('<h3 class="text-3xl font-bold text-white mb-8 text-center">Your Learning <span class="text-neon-purple">Milestones</span></h3>',
+         '<h3 class="text-3xl font-bold text-white mb-8 text-center">Your Learning <span class="text-neon-purple">Milestones</span></h3>'),
+    ]
+    for old, new in specific_fixes:
+        if old in content and old != new:
+            content = content.replace(old, new)
+            changes += 1
+            print(f"  ✓ Fixed specific section")
+    # 4. Fix Chart.js colors (if present)
+    chart_fixes = [
+        # Pink to Neon Cyan
+        ("'rgba(255, 99, 132, 0.5)'", "'rgba(74, 216, 255, 0.3)'"),
+        ("'rgba(255, 99, 132, 1)'", "'rgba(74, 216, 255, 1)'"),
+        # Yellow to Neon Purple
+        ("'rgba(255, 206, 86, 1)'", "'rgba(179, 125, 255, 1)'"),
+        ("'rgba(255, 206, 86, 0.5)'", "'rgba(179, 125, 255, 0.3)'"),
+    ]
+    for old, new in chart_fixes:
+        if old in content:
+            content = content.replace(old, new)
+            changes += 1
+            print(f"  ✓ Fixed chart color")
+    # Write the updated content
+    with open(filepath, 'w', encoding='utf-8') as f:
+        f.write(content)
+    print(f"\n✅ Applied {changes} color fixes to {filepath}")
+    return changes
+def main():
+    """Main function to fix colors in all template files"""
+    print("🎨 AI Learning Path Generator - Color Fix Script")
+    print("=" * 60)
+    # Define files to fix
+    template_dir = Path("web_app/templates")
+    files_to_fix = [
+        template_dir / "result.html",
+        template_dir / "index.html",
+        template_dir / "dashboard.html",
+    ]
+    total_changes = 0
+    for filepath in files_to_fix:
+        if filepath.exists():
+            # Backup first
+            backup_file(filepath)
+            # Apply fixes
+            changes = fix_colors(filepath)
+            total_changes += changes
+        else:
+            print(f"⚠️  File not found: {filepath}")
+    print("\n" + "=" * 60)
+    print(f"🎉 Color fix complete! Total changes: {total_changes}")
+    print("\n📋 Next steps:")
+    print("1. Review the changes in your IDE")
+    print("2. Test the application")
+    print("3. If issues occur, restore from .backup files")
+    print("\n💡 Tip: Clear browser cache (Ctrl+Shift+R) to see changes")
+if __name__ == "__main__":
+    main()

fix_learning_path_indentation.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""
+Script to fix the indentation in src/learning_path.py
+This adds proper try-except structure for observability tracking.
+"""
+import re
+# Read the file
+with open('src/learning_path.py', 'r', encoding='utf-8') as f:
+    content = f.read()
+# Find the generate_path method and fix indentation
+# The issue is that code after line 323 needs to be indented under the try block
+# Pattern: Find from "relevant_docs = " to the end of generate_path method (before "def save_path")
+# We need to indent everything between the try block and the except block
+lines = content.split('\n')
+fixed_lines = []
+in_try_block = False
+try_start_line = None
+indent_needed = False
+for i, line in enumerate(lines):
+    # Detect the start of the try block in generate_path
+    if 'try:' in line and i > 280 and i < 310:  # Around line 300
+        in_try_block = True
+        try_start_line = i
+        fixed_lines.append(line)
+        continue
+    # Detect where indentation is missing (after the validation checks)
+    if in_try_block and line.strip().startswith('relevant_docs = '):
+        indent_needed = True
+    # Stop indenting at the except block or next method
+    if indent_needed and (line.strip().startswith('except Exception') or line.strip().startswith('def save_path')):
+        indent_needed = False
+        in_try_block = False
+        # Add the except block before this line if it's "def save_path"
+        if line.strip().startswith('def save_path'):
+            # Add proper except block
+            fixed_lines.append('')
+            fixed_lines.append('        except Exception as e:')
+            fixed_lines.append('            # Mark as failed')
+            fixed_lines.append('            error_message = str(e)')
+            fixed_lines.append('            ')
+            fixed_lines.append('            # Log failure metrics')
+            fixed_lines.append('            generation_time_ms = (time.time() - generation_start_time) * 1000')
+            fixed_lines.append('            self.obs_manager.log_metric("path_generation_success", 0.0, {')
+            fixed_lines.append('                "topic": topic,')
+            fixed_lines.append('                "expertise_level": expertise_level,')
+            fixed_lines.append('                "error": error_message,')
+            fixed_lines.append('                "duration_ms": generation_time_ms,')
+            fixed_lines.append('                "user_id": user_id')
+            fixed_lines.append('            })')
+            fixed_lines.append('            ')
+            fixed_lines.append('            self.obs_manager.log_event("path_generation_failed", {')
+            fixed_lines.append('                "topic": topic,')
+            fixed_lines.append('                "expertise_level": expertise_level,')
+            fixed_lines.append('                "error": error_message,')
+            fixed_lines.append('                "generation_time_ms": generation_time_ms,')
+            fixed_lines.append('                "user_id": user_id')
+            fixed_lines.append('            })')
+            fixed_lines.append('            ')
+            fixed_lines.append('            # Re-raise the exception')
+            fixed_lines.append('            raise')
+            fixed_lines.append('')
+    # Add indentation if needed
+    if indent_needed and line and not line.startswith('        '):
+        # Add 4 more spaces of indentation
+        if line.startswith('    '):
+            fixed_lines.append('    ' + line)
+        else:
+            fixed_lines.append(line)
+    else:
+        fixed_lines.append(line)
+# Write back
+with open('src/learning_path.py', 'w', encoding='utf-8') as f:
+    f.write('\n'.join(fixed_lines))
+print("✅ Fixed indentation in src/learning_path.py")
+print("⚠️  Please review the changes manually to ensure correctness")

init_db.py ADDED Viewed

	@@ -0,0 +1,21 @@

+#!/usr/bin/env python3
+"""Initialize database tables for production deployment."""
+import os
+from web_app import create_app, db
+# Create Flask app
+app = create_app()
+with app.app_context():
+    print("Creating database tables...")
+    try:
+        db.create_all()
+        print("✅ Database tables created successfully!")
+    except Exception as e:
+        # If tables/constraints already exist, that's OK
+        if "already exists" in str(e).lower():
+            print("⚠️  Some tables/constraints already exist - continuing...")
+            print("✅ Database is ready!")
+        else:
+            print(f"❌ Error creating tables: {e}")
+            raise

init_render_db.py ADDED Viewed

	@@ -0,0 +1,87 @@

+#!/usr/bin/env python3
+"""
+Initialize PostgreSQL database on Render.
+This script runs database migrations to create all required tables.
+"""
+import os
+import sys
+from flask_migrate import upgrade
+from web_app import create_app, db
+def init_database():
+    """Initialize database with migrations"""
+    print("=" * 60)
+    print("🔧 Initializing PostgreSQL Database on Render")
+    print("=" * 60)
+    # Check if DATABASE_URL is set
+    database_url = os.environ.get('DATABASE_URL')
+    if not database_url:
+        print("❌ ERROR: DATABASE_URL environment variable not set!")
+        print("Please configure PostgreSQL in Render dashboard.")
+        sys.exit(1)
+    print(f"✅ Database URL found: {database_url[:30]}...")
+    # Create Flask app
+    print("\n📦 Creating Flask application...")
+    app = create_app()
+    with app.app_context():
+        print("\n🔍 Checking database connection...")
+        try:
+            # Test database connection
+            db.engine.connect()
+            print("✅ Database connection successful!")
+        except Exception as e:
+            print(f"❌ Database connection failed: {e}")
+            sys.exit(1)
+        print("\n🚀 Running database migrations...")
+        try:
+            # Run all migrations
+            upgrade()
+            print("✅ Database migrations completed successfully!")
+        except Exception as e:
+            print(f"⚠️  Migration warning: {e}")
+            print("\nAttempting to create missing tables...")
+            try:
+                # Create tables if they don't exist (ignores existing ones)
+                from sqlalchemy import inspect
+                inspector = inspect(db.engine)
+                existing_tables = inspector.get_table_names()
+                print(f"📋 Existing tables: {', '.join(existing_tables)}")
+                # Only create tables that don't exist
+                db.create_all()
+                print("✅ Database schema verified/updated!")
+            except Exception as e2:
+                # If it fails due to existing constraints, that's actually OK
+                if "already exists" in str(e2).lower():
+                    print("⚠️  Some tables/constraints already exist - this is OK!")
+                    print("✅ Database schema is ready!")
+                else:
+                    print(f"❌ Failed to create tables: {e2}")
+                    sys.exit(1)
+        print("\n🔍 Verifying tables...")
+        try:
+            # Check if users table exists
+            from web_app.models import User
+            user_count = User.query.count()
+            print(f"✅ Users table exists (current count: {user_count})")
+        except Exception as e:
+            print(f"❌ Users table verification failed: {e}")
+            sys.exit(1)
+        print("\n" + "=" * 60)
+        print("✅ Database initialization complete!")
+        print("=" * 60)
+        print("\nYour database is ready to use. You can now:")
+        print("1. Register new users")
+        print("2. Login with Google OAuth")
+        print("3. Create learning paths")
+        print("\n")
+if __name__ == "__main__":
+    init_database()

initialize_db.py ADDED Viewed

	@@ -0,0 +1,149 @@

+"""
+Initialize the vector database with sample educational resources.
+This provides some starter content for the Learning Path Generator.
+"""
+import os
+import json
+from pathlib import Path
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Ensure OPENAI API key is set
+if not os.getenv("OPENAI_API_KEY"):
+    print("ERROR: OPENAI_API_KEY not set in environment variables")
+    print("Please update your .env file with your API key")
+    exit(1)
+# Import after checking API key
+from src.data.document_store import DocumentStore
+from src.data.resources import ResourceManager
+from langchain.schema.document import Document
+def load_sample_resources():
+    """Load sample resources from JSON file"""
+    resources_path = Path("samples/sample_resources.json")
+    if not resources_path.exists():
+        # Create directory if it doesn't exist
+        resources_path.parent.mkdir(exist_ok=True, parents=True)
+        # Create sample resources file with basic content
+        sample_resources = [
+            {
+                "title": "Introduction to Machine Learning",
+                "type": "course",
+                "description": "A comprehensive beginner's course covering ML fundamentals",
+                "difficulty": "beginner",
+                "time_estimate": "10 hours",
+                "url": "https://example.com/intro-ml",
+                "topic": "machine learning",
+                "learning_styles": ["visual", "reading"]
+            },
+            {
+                "title": "Python for Data Science Handbook",
+                "type": "book",
+                "description": "Essential guide to using Python for data analysis and ML",
+                "difficulty": "intermediate",
+                "time_estimate": "20 hours",
+                "url": "https://jakevdp.github.io/PythonDataScienceHandbook/",
+                "topic": "python,data science",
+                "learning_styles": ["reading"]
+            },
+            {
+                "title": "Web Development Bootcamp",
+                "type": "course",
+                "description": "Full stack web development from scratch",
+                "difficulty": "beginner",
+                "time_estimate": "40 hours",
+                "url": "https://example.com/web-dev-bootcamp",
+                "topic": "web development",
+                "learning_styles": ["visual", "kinesthetic"]
+            },
+            {
+                "title": "Advanced JavaScript Patterns",
+                "type": "video",
+                "description": "Deep dive into advanced JS design patterns",
+                "difficulty": "advanced",
+                "time_estimate": "3 hours",
+                "url": "https://example.com/js-patterns",
+                "topic": "javascript",
+                "learning_styles": ["visual", "auditory"]
+            },
+            {
+                "title": "Spanish Learning Podcast",
+                "type": "podcast",
+                "description": "Learn Spanish through immersive audio lessons",
+                "difficulty": "beginner",
+                "time_estimate": "10 hours",
+                "url": "https://example.com/spanish-podcast",
+                "topic": "spanish,language learning",
+                "learning_styles": ["auditory"]
+            }
+        ]
+        with open(resources_path, "w") as f:
+            json.dump(sample_resources, f, indent=2)
+        print(f"Created sample resources file at {resources_path}")
+        return sample_resources
+    else:
+        # Load existing resources
+        with open(resources_path, "r") as f:
+            return json.load(f)
+def initialize_database():
+    """Initialize the vector database with sample resources"""
+    print("Initializing vector database...")
+    # Create document store
+    document_store = DocumentStore()
+    # Load sample resources
+    resources = load_sample_resources()
+    # Convert to Document objects
+    documents = []
+    for resource in resources:
+        # Create content from resource information
+        content = f"""
+        Title: {resource['title']}
+        Description: {resource['description']}
+        Type: {resource['type']}
+        Difficulty: {resource['difficulty']}
+        Topics: {resource.get('topic', '')}
+        """
+        # Create metadata
+        metadata = {
+            "title": resource["title"],
+            "type": resource["type"],
+            "difficulty": resource["difficulty"],
+            "url": resource["url"],
+            "topic": resource.get("topic", "").split(",")
+        }
+        # Add learning styles if available
+        if "learning_styles" in resource:
+            metadata["learning_styles"] = resource["learning_styles"]
+        # Create document
+        doc = Document(page_content=content, metadata=metadata)
+        documents.append(doc)
+    # Add documents to vector store
+    document_store.add_documents(documents)
+    print(f"Added {len(documents)} sample resources to vector database")
+    # Test search functionality
+    print("\nTesting search functionality...")
+    results = document_store.search_documents("machine learning beginner", top_k=2)
+    print(f"Found {len(results)} results for 'machine learning beginner'")
+    for result in results:
+        print(f"- {result.metadata.get('title')} (Relevance: {result.metadata.get('relevance_score', 0):.2f})")
+    print("\nDatabase initialization complete!")
+if __name__ == "__main__":
+    initialize_database()

migrations/README ADDED Viewed

	@@ -0,0 +1 @@


1	+ Single-database configuration for Flask.

migrations/add_chatbot_tables.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""
+Database Migration: Add Conversational Chatbot Tables
+Run this to create the new tables for:
+- ChatMessage (conversation history)
+- PathModification (modification tracking)
+- ConversationSession (session management)
+Usage:
+    python -m migrations.add_chatbot_tables
+"""
+import sys
+import os
+# Add project root to Python path
+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.insert(0, project_root)
+def run_migration():
+    """Create the new chatbot tables."""
+    print("Initializing database migration...")
+    try:
+        # Import only what we need to avoid loading heavy dependencies
+        from flask import Flask
+        from flask_sqlalchemy import SQLAlchemy
+        from dotenv import load_dotenv
+        # Load environment variables
+        env_path = os.path.join(project_root, '.env')
+        load_dotenv(env_path)
+        # Create minimal Flask app
+        app = Flask(__name__)
+        # Get database URL from environment
+        database_url = os.getenv('DATABASE_URL', 'sqlite:///learning_path.db')
+        # Fix SQLite path if needed
+        if database_url.startswith('sqlite:///') and not database_url.startswith('sqlite:////'):
+            db_path = database_url.replace('sqlite:///', '')
+            if not os.path.isabs(db_path):
+                db_path = os.path.join(project_root, db_path)
+                database_url = f'sqlite:///{db_path}'
+        app.config['SQLALCHEMY_DATABASE_URI'] = database_url
+        app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
+        # Initialize SQLAlchemy
+        db = SQLAlchemy(app)
+        # Define the models directly here to avoid import issues
+        with app.app_context():
+            print(f"Using database: {database_url}")
+            print("\nCreating chatbot tables...")
+            # Execute raw SQL to create tables
+            db.session.execute(db.text("""
+                CREATE TABLE IF NOT EXISTS conversation_sessions (
+                    id VARCHAR(36) PRIMARY KEY,
+                    user_id INTEGER NOT NULL,
+                    learning_path_id VARCHAR(36),
+                    started_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    last_activity_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    ended_at DATETIME,
+                    summary TEXT,
+                    message_count INTEGER DEFAULT 0,
+                    total_tokens_used INTEGER DEFAULT 0,
+                    is_active BOOLEAN DEFAULT 1,
+                    FOREIGN KEY (user_id) REFERENCES users(id),
+                    FOREIGN KEY (learning_path_id) REFERENCES user_learning_paths(id)
+                )
+            """))
+            db.session.execute(db.text("""
+                CREATE TABLE IF NOT EXISTS chat_messages (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    user_id INTEGER NOT NULL,
+                    learning_path_id VARCHAR(36),
+                    message TEXT NOT NULL,
+                    role VARCHAR(20) NOT NULL,
+                    intent VARCHAR(50),
+                    entities TEXT,
+                    timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    tokens_used INTEGER DEFAULT 0,
+                    response_time_ms INTEGER,
+                    session_id VARCHAR(36),
+                    FOREIGN KEY (user_id) REFERENCES users(id),
+                    FOREIGN KEY (learning_path_id) REFERENCES user_learning_paths(id),
+                    FOREIGN KEY (session_id) REFERENCES conversation_sessions(id)
+                )
+            """))
+            db.session.execute(db.text("""
+                CREATE TABLE IF NOT EXISTS path_modifications (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    learning_path_id VARCHAR(36) NOT NULL,
+                    user_id INTEGER NOT NULL,
+                    chat_message_id INTEGER,
+                    modification_type VARCHAR(50) NOT NULL,
+                    target_path VARCHAR(200),
+                    change_description TEXT NOT NULL,
+                    old_value TEXT,
+                    new_value TEXT,
+                    timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    is_reverted BOOLEAN DEFAULT 0,
+                    FOREIGN KEY (learning_path_id) REFERENCES user_learning_paths(id),
+                    FOREIGN KEY (user_id) REFERENCES users(id),
+                    FOREIGN KEY (chat_message_id) REFERENCES chat_messages(id)
+                )
+            """))
+            db.session.commit()
+            print("✅ Successfully created chatbot tables:")
+            print("   - conversation_sessions")
+            print("   - chat_messages")
+            print("   - path_modifications")
+            print("\n🎉 Your database is ready for the enhanced chatbot!")
+    except Exception as e:
+        print(f"❌ Error creating tables: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+if __name__ == '__main__':
+    run_migration()

migrations/add_conversation_memory.sql ADDED Viewed

	@@ -0,0 +1,19 @@

+-- Migration: Add Conversation Memory to ChatMessage Model
+-- Date: 2025-01-02
+-- Description: Adds conversation_id and context fields for memory-enabled chatbot
+-- Add conversation_id column (groups related messages)
+ALTER TABLE chat_messages ADD COLUMN conversation_id VARCHAR(36);
+-- Add context column (stores learning path context as JSON)
+ALTER TABLE chat_messages ADD COLUMN context JSON;
+-- Create index on conversation_id for fast queries
+CREATE INDEX idx_chat_messages_conversation_id ON chat_messages(conversation_id);
+-- Update existing records to use session_id as conversation_id (backward compatibility)
+UPDATE chat_messages SET conversation_id = session_id WHERE session_id IS NOT NULL;
+-- Add comments
+COMMENT ON COLUMN chat_messages.conversation_id IS 'Groups related messages in a conversation';
+COMMENT ON COLUMN chat_messages.context IS 'Stores learning path state, progress, and milestone data';

migrations/add_resource_progress.py ADDED Viewed

	@@ -0,0 +1,39 @@

+"""
+Migration script to add ResourceProgress table for persistent resource tracking.
+Run this after updating models.py
+"""
+import sys
+import os
+# Add parent directory to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from web_app import create_app, db
+from web_app.models import ResourceProgress
+def migrate():
+    """Create the resource_progress table"""
+    app = create_app()
+    with app.app_context():
+        print("Creating resource_progress table...")
+        # Create the table
+        db.create_all()
+        print("✅ ResourceProgress table created successfully!")
+        print("\nTable structure:")
+        print("- id (Primary Key)")
+        print("- user_id (Foreign Key -> users.id)")
+        print("- learning_path_id (Foreign Key -> user_learning_paths.id)")
+        print("- milestone_index (Integer)")
+        print("- resource_index (Integer)")
+        print("- resource_url (String)")
+        print("- completed (Boolean)")
+        print("- completed_at (DateTime)")
+        print("- created_at (DateTime)")
+        print("- updated_at (DateTime)")
+        print("\n✨ Users can now track resource completion persistently!")
+if __name__ == "__main__":
+    migrate()

migrations/alembic.ini ADDED Viewed

	@@ -0,0 +1,50 @@

+# A generic, single database configuration.
+[alembic]
+# template used to generate migration files
+# file_template = %%(rev)s_%%(slug)s
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic,flask_migrate
+[handlers]
+keys = console
+[formatters]
+keys = generic
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+[logger_flask_migrate]
+level = INFO
+handlers =
+qualname = flask_migrate
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S

migrations/env.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import logging
+from logging.config import fileConfig
+from flask import current_app
+from alembic import context
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+fileConfig(config.config_file_name)
+logger = logging.getLogger('alembic.env')
+def get_engine():
+    try:
+        # this works with Flask-SQLAlchemy<3 and Alchemical
+        return current_app.extensions['migrate'].db.get_engine()
+    except (TypeError, AttributeError):
+        # this works with Flask-SQLAlchemy>=3
+        return current_app.extensions['migrate'].db.engine
+def get_engine_url():
+    try:
+        return get_engine().url.render_as_string(hide_password=False).replace(
+            '%', '%%')
+    except AttributeError:
+        return str(get_engine().url).replace('%', '%%')
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+config.set_main_option('sqlalchemy.url', get_engine_url())
+target_db = current_app.extensions['migrate'].db
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+def get_metadata():
+    if hasattr(target_db, 'metadatas'):
+        return target_db.metadatas[None]
+    return target_db.metadata
+def run_migrations_offline():
+    """Run migrations in 'offline' mode.
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+    Calls to context.execute() here emit the given string to the
+    script output.
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url, target_metadata=get_metadata(), literal_binds=True
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+def run_migrations_online():
+    """Run migrations in 'online' mode.
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+    """
+    # this callback is used to prevent an auto-migration from being generated
+    # when there are no changes to the schema
+    # reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html
+    def process_revision_directives(context, revision, directives):
+        if getattr(config.cmd_opts, 'autogenerate', False):
+            script = directives[0]
+            if script.upgrade_ops.is_empty():
+                directives[:] = []
+                logger.info('No changes in schema detected.')
+    conf_args = current_app.extensions['migrate'].configure_args
+    if conf_args.get("process_revision_directives") is None:
+        conf_args["process_revision_directives"] = process_revision_directives
+    connectable = get_engine()
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection,
+            target_metadata=get_metadata(),
+            **conf_args
+        )
+        with context.begin_transaction():
+            context.run_migrations()
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()

migrations/script.py.mako ADDED Viewed

	@@ -0,0 +1,24 @@

+"""${message}
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+"""
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+def upgrade():
+    ${upgrades if upgrades else "pass"}
+def downgrade():
+    ${downgrades if downgrades else "pass"}

migrations/versions/12d5dfb6fd16_sync_users_table.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""sync users table
+Revision ID: 12d5dfb6fd16
+Revises: 39d22a91999a
+Create Date: 2025-10-01 21:25:39.871657
+"""
+from alembic import op
+import sqlalchemy as sa
+# revision identifiers, used by Alembic.
+revision = '12d5dfb6fd16'
+down_revision = '39d22a91999a'
+branch_labels = None
+depends_on = None
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('chat_messages',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('user_id', sa.Integer(), nullable=False),
+    sa.Column('learning_path_id', sa.String(length=36), nullable=True),
+    sa.Column('message', sa.Text(), nullable=False),
+    sa.Column('role', sa.String(length=20), nullable=False),
+    sa.Column('intent', sa.String(length=50), nullable=True),
+    sa.Column('entities', sa.JSON(), nullable=True),
+    sa.Column('timestamp', sa.DateTime(), nullable=True),
+    sa.Column('tokens_used', sa.Integer(), nullable=True),
+    sa.Column('response_time_ms', sa.Integer(), nullable=True),
+    sa.Column('session_id', sa.String(length=36), nullable=True),
+    sa.ForeignKeyConstraint(['learning_path_id'], ['user_learning_paths.id'], ),
+    sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
+    sa.PrimaryKeyConstraint('id')
+    )
+    with op.batch_alter_table('chat_messages', schema=None) as batch_op:
+        batch_op.create_index(batch_op.f('ix_chat_messages_session_id'), ['session_id'], unique=False)
+        batch_op.create_index(batch_op.f('ix_chat_messages_timestamp'), ['timestamp'], unique=False)
+    op.create_table('conversation_sessions',
+    sa.Column('id', sa.String(length=36), nullable=False),
+    sa.Column('user_id', sa.Integer(), nullable=False),
+    sa.Column('learning_path_id', sa.String(length=36), nullable=True),
+    sa.Column('started_at', sa.DateTime(), nullable=True),
+    sa.Column('last_activity_at', sa.DateTime(), nullable=True),
+    sa.Column('ended_at', sa.DateTime(), nullable=True),
+    sa.Column('summary', sa.Text(), nullable=True),
+    sa.Column('message_count', sa.Integer(), nullable=True),
+    sa.Column('total_tokens_used', sa.Integer(), nullable=True),
+    sa.Column('is_active', sa.Boolean(), nullable=True),
+    sa.ForeignKeyConstraint(['learning_path_id'], ['user_learning_paths.id'], ),
+    sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
+    sa.PrimaryKeyConstraint('id')
+    )
+    with op.batch_alter_table('conversation_sessions', schema=None) as batch_op:
+        batch_op.create_index(batch_op.f('ix_conversation_sessions_started_at'), ['started_at'], unique=False)
+    op.create_table('path_modifications',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('learning_path_id', sa.String(length=36), nullable=False),
+    sa.Column('user_id', sa.Integer(), nullable=False),
+    sa.Column('chat_message_id', sa.Integer(), nullable=True),
+    sa.Column('modification_type', sa.String(length=50), nullable=False),
+    sa.Column('target_path', sa.String(length=200), nullable=True),
+    sa.Column('change_description', sa.Text(), nullable=False),
+    sa.Column('old_value', sa.JSON(), nullable=True),
+    sa.Column('new_value', sa.JSON(), nullable=True),
+    sa.Column('timestamp', sa.DateTime(), nullable=True),
+    sa.Column('is_reverted', sa.Boolean(), nullable=True),
+    sa.ForeignKeyConstraint(['chat_message_id'], ['chat_messages.id'], ),
+    sa.ForeignKeyConstraint(['learning_path_id'], ['user_learning_paths.id'], ),
+    sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
+    sa.PrimaryKeyConstraint('id')
+    )
+    with op.batch_alter_table('path_modifications', schema=None) as batch_op:
+        batch_op.create_index(batch_op.f('ix_path_modifications_timestamp'), ['timestamp'], unique=False)
+    with op.batch_alter_table('users', schema=None) as batch_op:
+        batch_op.add_column(sa.Column('last_seen', sa.DateTime(), nullable=True))
+        batch_op.add_column(sa.Column('registration_source', sa.String(length=20), nullable=True))
+        batch_op.add_column(sa.Column('login_count', sa.Integer(), nullable=True))
+        batch_op.add_column(sa.Column('display_name', sa.String(length=100), nullable=True))
+        batch_op.add_column(sa.Column('bio', sa.Text(), nullable=True))
+    # ### end Alembic commands ###
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('users', schema=None) as batch_op:
+        batch_op.drop_column('bio')
+        batch_op.drop_column('display_name')
+        batch_op.drop_column('login_count')
+        batch_op.drop_column('registration_source')
+        batch_op.drop_column('last_seen')
+    with op.batch_alter_table('path_modifications', schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f('ix_path_modifications_timestamp'))
+    op.drop_table('path_modifications')
+    with op.batch_alter_table('conversation_sessions', schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f('ix_conversation_sessions_started_at'))
+    op.drop_table('conversation_sessions')
+    with op.batch_alter_table('chat_messages', schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f('ix_chat_messages_timestamp'))
+        batch_op.drop_index(batch_op.f('ix_chat_messages_session_id'))
+    op.drop_table('chat_messages')
+    # ### end Alembic commands ###

migrations/versions/39d22a91999a_initial_migration.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""Initial migration
+Revision ID: 39d22a91999a
+Revises:
+Create Date: 2025-06-03 11:10:55.881578
+"""
+from alembic import op
+import sqlalchemy as sa
+# revision identifiers, used by Alembic.
+revision = '39d22a91999a'
+down_revision = None
+branch_labels = None
+depends_on = None
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('users',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('username', sa.String(length=64), nullable=False),
+    sa.Column('email', sa.String(length=120), nullable=False),
+    sa.Column('password_hash', sa.String(length=256), nullable=True),
+    sa.Column('created_at', sa.DateTime(), nullable=True),
+    sa.PrimaryKeyConstraint('id')
+    )
+    with op.batch_alter_table('users', schema=None) as batch_op:
+        batch_op.create_index(batch_op.f('ix_users_email'), ['email'], unique=True)
+        batch_op.create_index(batch_op.f('ix_users_username'), ['username'], unique=True)
+    op.create_table('user_learning_paths',
+    sa.Column('id', sa.String(length=36), nullable=False),
+    sa.Column('user_id', sa.Integer(), nullable=False),
+    sa.Column('path_data_json', sa.JSON(), nullable=False),
+    sa.Column('title', sa.String(length=200), nullable=True),
+    sa.Column('topic', sa.String(length=100), nullable=True),
+    sa.Column('created_at', sa.DateTime(), nullable=True),
+    sa.Column('last_accessed_at', sa.DateTime(), nullable=True),
+    sa.Column('is_archived', sa.Boolean(), nullable=True),
+    sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
+    sa.PrimaryKeyConstraint('id')
+    )
+    with op.batch_alter_table('user_learning_paths', schema=None) as batch_op:
+        batch_op.create_index(batch_op.f('ix_user_learning_paths_created_at'), ['created_at'], unique=False)
+    op.create_table('learning_progress',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('user_learning_path_id', sa.String(length=36), nullable=False),
+    sa.Column('milestone_identifier', sa.String(length=200), nullable=False),
+    sa.Column('status', sa.String(length=50), nullable=True),
+    sa.Column('started_at', sa.DateTime(), nullable=True),
+    sa.Column('completed_at', sa.DateTime(), nullable=True),
+    sa.Column('notes', sa.Text(), nullable=True),
+    sa.ForeignKeyConstraint(['user_learning_path_id'], ['user_learning_paths.id'], ),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('user_learning_path_id', 'milestone_identifier', name='_user_path_milestone_uc')
+    )
+    # ### end Alembic commands ###
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('learning_progress')
+    with op.batch_alter_table('user_learning_paths', schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f('ix_user_learning_paths_created_at'))
+    op.drop_table('user_learning_paths')
+    with op.batch_alter_table('users', schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f('ix_users_username'))
+        batch_op.drop_index(batch_op.f('ix_users_email'))
+    op.drop_table('users')
+    # ### end Alembic commands ###

migrations/versions/6b20f44f6a00_make_oauth_user_id_nullable.py ADDED Viewed

	@@ -0,0 +1,36 @@

+"""Make OAuth user_id nullable
+Revision ID: 6b20f44f6a00
+Revises: 9f32f1920608
+Create Date: 2025-10-05 13:02:17.393003
+"""
+from alembic import op
+import sqlalchemy as sa
+# revision identifiers, used by Alembic.
+revision = '6b20f44f6a00'
+down_revision = '9f32f1920608'
+branch_labels = None
+depends_on = None
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('flask_dance_oauth', schema=None) as batch_op:
+        batch_op.alter_column('user_id',
+               existing_type=sa.INTEGER(),
+               nullable=True)
+    # ### end Alembic commands ###
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('flask_dance_oauth', schema=None) as batch_op:
+        batch_op.alter_column('user_id',
+               existing_type=sa.INTEGER(),
+               nullable=False)
+    # ### end Alembic commands ###

migrations/versions/9f32f1920608_add_oauth_table_for_flask_dance.py ADDED Viewed

	@@ -0,0 +1,46 @@

+"""Add OAuth table for Flask-Dance
+Revision ID: 9f32f1920608
+Revises: 12d5dfb6fd16
+Create Date: 2025-10-05 12:30:20.870839
+"""
+from alembic import op
+import sqlalchemy as sa
+# revision identifiers, used by Alembic.
+revision = '9f32f1920608'
+down_revision = '12d5dfb6fd16'
+branch_labels = None
+depends_on = None
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('flask_dance_oauth',
+    sa.Column('user_id', sa.Integer(), nullable=False),
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('provider', sa.String(length=50), nullable=False),
+    sa.Column('created_at', sa.DateTime(), nullable=False),
+    sa.Column('token', sa.JSON(), nullable=False),
+    sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
+    sa.PrimaryKeyConstraint('id')
+    )
+    with op.batch_alter_table('chat_messages', schema=None) as batch_op:
+        batch_op.add_column(sa.Column('conversation_id', sa.String(length=36), nullable=True))
+        batch_op.add_column(sa.Column('context', sa.JSON(), nullable=True))
+        batch_op.create_index(batch_op.f('ix_chat_messages_conversation_id'), ['conversation_id'], unique=False)
+    # ### end Alembic commands ###
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('chat_messages', schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f('ix_chat_messages_conversation_id'))
+        batch_op.drop_column('context')
+        batch_op.drop_column('conversation_id')
+    op.drop_table('flask_dance_oauth')
+    # ### end Alembic commands ###

migrations/versions/a1b2c3d4e5f6_add_progress_tracking_tables.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""add progress tracking tables
+Revision ID: a1b2c3d4e5f6
+Revises: 6b20f44f6a00
+Create Date: 2025-10-14 01:30:00.000000
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy import inspect
+# revision identifiers, used by Alembic.
+revision = 'a1b2c3d4e5f6'
+down_revision = '6b20f44f6a00'
+branch_labels = None
+depends_on = None
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    # Get the database connection to check for existing columns
+    bind = op.get_bind()
+    inspector = inspect(bind)
+    columns = [col['name'] for col in inspector.get_columns('chat_messages')]
+    # Add missing fields to chat_messages table only if they don't exist
+    with op.batch_alter_table('chat_messages', schema=None) as batch_op:
+        if 'conversation_id' not in columns:
+            batch_op.add_column(sa.Column('conversation_id', sa.String(length=36), nullable=True))
+            batch_op.create_index(batch_op.f('ix_chat_messages_conversation_id'), ['conversation_id'], unique=False)
+        if 'context' not in columns:
+            batch_op.add_column(sa.Column('context', sa.JSON(), nullable=True))
+    # Create resource_progress table
+    op.create_table('resource_progress',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('user_id', sa.Integer(), nullable=False),
+    sa.Column('learning_path_id', sa.String(length=36), nullable=False),
+    sa.Column('milestone_index', sa.Integer(), nullable=False),
+    sa.Column('resource_index', sa.Integer(), nullable=False),
+    sa.Column('resource_url', sa.String(length=500), nullable=False),
+    sa.Column('completed', sa.Boolean(), nullable=True),
+    sa.Column('completed_at', sa.DateTime(), nullable=True),
+    sa.Column('created_at', sa.DateTime(), nullable=True),
+    sa.Column('updated_at', sa.DateTime(), nullable=True),
+    sa.ForeignKeyConstraint(['learning_path_id'], ['user_learning_paths.id'], ),
+    sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('user_id', 'learning_path_id', 'milestone_index', 'resource_index',
+                        name='_user_path_milestone_resource_uc')
+    )
+    # Create milestone_progress table
+    op.create_table('milestone_progress',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('user_id', sa.Integer(), nullable=False),
+    sa.Column('learning_path_id', sa.String(length=36), nullable=False),
+    sa.Column('milestone_index', sa.Integer(), nullable=False),
+    sa.Column('completed', sa.Boolean(), nullable=True),
+    sa.Column('completed_at', sa.DateTime(), nullable=True),
+    sa.Column('created_at', sa.DateTime(), nullable=True),
+    sa.Column('updated_at', sa.DateTime(), nullable=True),
+    sa.ForeignKeyConstraint(['learning_path_id'], ['user_learning_paths.id'], ),
+    sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('user_id', 'learning_path_id', 'milestone_index',
+                        name='_user_path_milestone_uc')
+    )
+    # ### end Alembic commands ###
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('milestone_progress')
+    op.drop_table('resource_progress')
+    # Remove added fields from chat_messages table
+    with op.batch_alter_table('chat_messages', schema=None) as batch_op:
+        batch_op.drop_index(batch_op.f('ix_chat_messages_conversation_id'))
+        batch_op.drop_column('context')
+        batch_op.drop_column('conversation_id')
+    # ### end Alembic commands ###

minimal_test.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+import sys
+from dotenv import load_dotenv
+# Add the project root to sys.path
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__)))
+sys.path.insert(0, PROJECT_ROOT)
+# Load environment variables
+load_dotenv()
+print("=== Starting Minimal Test ===")
+# Test basic Python environment
+print("Python version:", sys.version)
+print("Current working directory:", os.getcwd())
+print("Project root:", PROJECT_ROOT)
+# Test environment variables
+api_key = os.getenv("OPENAI_API_KEY")
+print("OPENAI_API_KEY exists:", bool(api_key))
+if api_key:
+    print("API key starts with:", api_key[:5] + "...")
+# Test basic imports
+try:
+    import pydantic
+    print(f"Pydantic version: {pydantic.__version__}")
+except ImportError as e:
+    print(f"Pydantic import error: {e}")
+try:
+    from langchain_openai import OpenAI
+    print("Successfully imported langchain_openai")
+except ImportError as e:
+    print(f"langchain_openai import error: {e}")
+print("=== Test Completed ===")

requirements.txt ADDED Viewed

	@@ -0,0 +1,76 @@

+# Core dependencies
+gunicorn>=21.2.0
+python-dotenv==1.0.1
+Flask>=2.0.1
+Flask-Cors>=4.0.0
+requests>=2.31.0
+# Pydantic v1 - MUST be installed first and locked
+pydantic==1.10.18
+email-validator==2.1.0.post1
+# LangChain - using older versions compatible with Pydantic v1
+langchain==0.0.267
+openai>=1.0.0  # Using new OpenAI API
+tiktoken>=0.5.0  # Token counting for cost optimization
+Flask-SQLAlchemy==3.1.1
+psycopg2-binary>=2.9.9  # Postgres driver for production
+Flask-Login==0.6.3
+Flask-WTF==1.2.1
+Flask-Migrate==4.0.7
+# Document processing
+unstructured==0.10.30  # Using base package without all-docs to avoid complex deps
+pypandoc>=1.11
+python-magic>=0.4.27; sys_platform != 'win32'
+onnxruntime>=1.20.0  # Explicitly specify a compatible version
+# RAG and embeddings
+faiss-cpu>=1.7.4
+# Vector database
+chromadb==0.3.29  # Last version compatible with Pydantic v1
+# ML & NLP
+sentence-transformers>=2.2.2
+scikit-learn>=1.2.2
+numpy>=1.24.0
+pandas>=2.0.0
+Flask-Dance[google]==7.1.0
+# Web UI
+flask-wtf>=1.0.0
+WTForms>=3.1.0
+Jinja2>=3.0.1
+werkzeug>=2.0.1
+# HTTP client for async resource validation
+aiohttp>=3.9.0
+# Caching
+redis>=4.6.0,<5.0.0
+rq==1.16.1
+celery>=5.3.0
+# Search & Reranking
+rank-bm25>=0.2.2
+cohere>=4.0.0  # Optional: for Cohere reranking
+# Observability & Monitoring
+langsmith<0.1.0,>=0.0.21  # LLM tracing and debugging (compatible with langchain 0.0.267)
+wandb>=0.16.0  # Experiment tracking and metrics
+# Development
+pytest>=7.0.0
+pytest-asyncio>=0.21.0  # For async test support
+# Platform-specific dependencies
+--find-links https://download.pytorch.org/whl/torch_stable.html
+# Build tools
+setuptools>=65.0.0
+google-auth>=2.0.0
+google-auth-httplib2>=0.2.0
+google-auth-oauthlib>=0.4.6

run.py ADDED Viewed

	@@ -0,0 +1,79 @@

+"""
+This script handles the setup and execution of the web application.
+"""
+from pathlib import Path
+import shutil
+from dotenv import load_dotenv
+from web_app import create_app
+from backend.routes import api_bp
+import os
+# Fix protobuf compatibility issue with transformers
+os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
+print("--- run.py started ---")
+# Load environment variables
+env_path = Path('.env')
+env_example_path = Path('.env.example')
+# If .env doesn't exist, create it from example
+if not env_path.exists() and env_example_path.exists():
+    shutil.copy(env_example_path, env_path)
+    print("Created .env file from .env.example. Please update your API keys before proceeding.")
+# Load environment vars
+load_dotenv()
+print("--- dotenv loaded ---")
+# Check if required API keys are set based on provider
+provider = os.getenv("DEFAULT_PROVIDER", "openai").lower()
+if provider == "openai" and not os.getenv("OPENAI_API_KEY"):
+    print("WARNING: OPENAI_API_KEY not found in environment variables.")
+    print("Please set your API key in the .env file before running the application.")
+    exit(1)
+elif provider == "deepseek" and not os.getenv("DEEPSEEK_API_KEY"):
+    print("WARNING: DEEPSEEK_API_KEY not found in environment variables.")
+    print("Please set your API key in the .env file before running the application.")
+    exit(1)
+elif provider == "openrouter":
+    print("✅ Using OpenRouter with free models (no API key required)")
+# Create necessary directories
+os.makedirs("vector_db", exist_ok=True)
+os.makedirs("learning_paths", exist_ok=True)
+print("--- API key checked and dirs created ---")
+# Import and run Flask app
+app = create_app()
+# Register the API blueprint for RQ task orchestration under /api
+app.register_blueprint(api_bp, url_prefix='/api')
+print("--- Flask app created via factory ---")
+# Pre-warm the model orchestrator to avoid cold start delays
+def prewarm_models():
+    """Pre-initialize models to avoid cold start on first request."""
+    try:
+        print("🔥 Pre-warming AI models (this may take a moment on first run)...")
+        from src.ml.model_orchestrator import ModelOrchestrator
+        orchestrator = ModelOrchestrator()
+        # Make a simple test call to ensure the model is fully loaded
+        print("✅ AI models pre-warmed successfully!")
+    except Exception as e:
+        print(f"⚠️ Model pre-warming failed (will initialize on first request): {e}")
+if __name__ == "__main__":
+    port = int(os.getenv("PORT", 5000))
+    # Disable debug mode to prevent auto-reloading issues
+    debug = False
+    # Pre-warm models before starting server
+    prewarm_models()
+    print(f"Starting AI Learning Path Generator on port {port}")
+    print("Visit http://localhost:5000 in your browser")
+    app.run(host="0.0.0.0", port=port, debug=debug, use_reloader=False)

run_flask.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import os
+import sys
+# Add the project root to sys.path
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__)))
+sys.path.insert(0, PROJECT_ROOT)
+# Import the app factory
+import os
+from web_app import create_app
+# Set DEV_MODE=True in .env to bypass API key checks
+DEV_MODE = os.environ.get('DEV_MODE', 'False').lower() == 'true'
+if DEV_MODE:
+    print("\033[93m⚠️  Running in DEV_MODE - API calls will be stubbed!\033[0m")
+    os.environ['FLASK_ENV'] = 'development'
+app = create_app()
+if __name__ == "__main__":
+    print("Starting Flask application...")
+    app.run(debug=True, port=5000)

setup.py ADDED Viewed

	@@ -0,0 +1,28 @@

+"""
+Setup script for the AI Learning Path Generator.
+"""
+from setuptools import setup, find_packages
+setup(
+    name="ai-learning-path-generator",
+    version="1.0.0",
+    description="An intelligent system that generates personalized learning paths using AI",
+    author="AI Learning Path Generator Team",
+    packages=find_packages(),
+    install_requires=[
+        "python-dotenv>=1.0.0",
+        "Flask>=2.0.1",
+        "langchain>=0.0.267",
+        "langchain-openai>=0.0.1",
+        "openai>=1.0.0",
+        "chromadb>=0.4.13",
+        "sentence-transformers>=2.2.2",
+        "scikit-learn>=1.2.2",
+        "numpy>=1.24.0",
+        "pandas>=2.0.0",
+        "flask-wtf>=1.0.0",
+        "Jinja2>=3.0.1",
+        "werkzeug>=2.0.1",
+    ],
+    python_requires=">=3.8",
+)

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/agent.py ADDED Viewed

	@@ -0,0 +1,577 @@

+"""
+AI agent implementation for the Learning Path Generator.
+Handles complex interactions and orchestrates the learning path generation process.
+"""
+print("--- src/agent.py execution started ---")
+from typing import Dict, List, Any, Optional, Tuple
+import json
+import datetime
+from pathlib import Path
+print("--- src/agent.py initial imports done ---")
+from src.learning_path import LearningPathGenerator, LearningPath
+print("--- src/agent.py learning_path imported ---")
+from src.ml.model_orchestrator import ModelOrchestrator
+from src.data.vector_store import VectorStore
+from src.data.document_store import DocumentStore
+from src.utils.config import (
+    LEARNING_STYLES,
+    EXPERTISE_LEVELS,
+    TIME_COMMITMENTS,
+)
+class LearningAgent:
+    """
+    AI agent that orchestrates the learning path generation process.
+    """
+    def __init__(self, api_key: Optional[str] = None):
+        print("--- LearningAgent.__init__ started ---")
+        """
+        Initialize the learning agent with RAG capabilities.
+        Args:
+            api_key: Optional OpenAI API key
+        """
+        self.api_key = api_key
+        self.path_generator = LearningPathGenerator(api_key)
+        self.model_orchestrator = ModelOrchestrator(api_key)
+        self.document_store = DocumentStore()
+        self.vector_store = VectorStore(api_key)
+        print("--- LearningAgent.__init__: All components initialized ---")
+        # Track agent state
+        self.current_path = None
+        self.user_profile = {}
+        self.session_history = []
+        self.context = []
+        self.goal = None
+        self.planning_enabled = True
+        # Load initial documents for RAG
+        print("--- LearningAgent.__init__: Calling _load_initial_knowledge ---")
+        self._load_initial_knowledge()
+        print("--- LearningAgent.__init__ finished ---")
+    def _load_initial_knowledge(self):
+        print("--- LearningAgent._load_initial_knowledge started ---")
+        """
+        Load initial knowledge documents into the vector store.
+        """
+        # Create vector store directory if it doesn't exist
+        vector_db_path = Path("vector_db")
+        documents_dir = vector_db_path / "documents"
+        if not vector_db_path.exists():
+            vector_db_path.mkdir(parents=True)
+            print(f"Created vector store directory at {vector_db_path}")
+        if not documents_dir.exists():
+            documents_dir.mkdir()
+            print(f"Created documents directory at {documents_dir}")
+        # Load documents if they exist
+        if documents_dir.exists():
+            try:
+                print(f"Loading documents from {documents_dir}...")
+                self.vector_store.load_documents(str(documents_dir))
+                print(f"Documents loaded successfully from {documents_dir}.")
+            except Exception as e:
+                print(f"Warning: Failed to load documents: {str(e)}")
+        else:
+            print(f"Warning: Documents directory not found at {documents_dir}")
+        # Initialize vector store if it doesn't exist
+        if not (vector_db_path / "index.faiss").exists():
+            try:
+                # Create a dummy document to initialize the vector store
+                with open(documents_dir / "dummy.txt", "w") as f:
+                    f.write("This is a dummy document to initialize the vector store.")
+                self.vector_store.load_documents(str(documents_dir))
+                print("Vector store initialized successfully")
+            except Exception as e:
+                print(f"Warning: Failed to initialize vector store: {str(e)}")
+    def process_request(self, request: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Process a user request and generate an appropriate response with RAG and agentic behavior.
+        Args:
+            request: Dictionary containing user request data
+        Returns:
+            Response dictionary with generated content
+        """
+        # Get the AI provider from the request (if specified)
+        ai_provider = request.get("ai_provider")
+        # Create provider-specific instances if provider is specified
+        if ai_provider:
+            provider_orchestrator = ModelOrchestrator(self.api_key, provider=ai_provider)
+            provider_path_generator = LearningPathGenerator(self.api_key)
+            provider_path_generator.model_orchestrator = provider_orchestrator
+            use_orchestrator = provider_orchestrator
+            use_path_generator = provider_path_generator
+        else:
+            use_orchestrator = self.model_orchestrator
+            use_path_generator = self.path_generator
+        # Get relevant context using RAG
+        query = request.get("query", "")
+        if query:
+            relevant_docs = self.vector_store.search(query)
+            context = [doc["content"] for doc in relevant_docs]
+            self.context.extend(context)
+            # Update user profile with preferences from context
+            self._update_user_profile(context)
+        # Plan if planning is enabled
+        if self.planning_enabled:
+            self._plan_next_steps(request)
+        # Process the request based on its type
+        request_type = request.get("type", "generate_path")
+        # Add context to the request
+        request["context"] = self.context
+        if request_type == "generate_path":
+            return self._handle_path_generation(request, use_path_generator)
+        elif request_type == "modify_path":
+            return self._handle_path_modification(request, use_path_generator)
+        elif request_type == "ask_question":
+            return self._handle_question(request, use_orchestrator)
+        elif request_type == "get_resources":
+            return self._handle_resource_request(request, use_orchestrator)
+        else:
+            return {
+                "success": False,
+                "message": f"Unknown request type: {request_type}",
+                "data": None
+            }
+    def _handle_path_generation(self, request: Dict[str, Any], path_generator=None) -> Dict[str, Any]:
+        """
+        Handle a request to generate a new learning path with RAG and agentic behavior.
+        Args:
+            request: Dictionary with path generation parameters
+            path_generator: Optional custom path generator
+        Returns:
+            Response with the generated path or error
+        """
+        try:
+            # Extract request parameters
+            topic = request.get("topic")
+            expertise_level = request.get("expertise_level", "beginner")
+            learning_style = request.get("learning_style", "visual")
+            time_commitment = request.get("time_commitment", "moderate")
+            goals = request.get("goals", [])
+            additional_info = request.get("additional_info")
+            # Validate required parameters
+            if not topic:
+                return {
+                    "success": False,
+                    "message": "Topic is required",
+                    "data": None
+                }
+            # Use the provided path generator or fall back to the default
+            current_generator = path_generator or self.path_generator
+            # Get relevant context using RAG
+            relevant_docs = self.vector_store.search(topic)
+            context = [doc["content"] for doc in relevant_docs] if relevant_docs else []
+            # Add any context from the request
+            if request.get("context"):
+                context.extend(request.get("context"))
+            # Generate the learning path with context
+            learning_path = current_generator.generate_path(
+                topic=topic,
+                expertise_level=expertise_level,
+                learning_style=learning_style,
+                time_commitment=time_commitment,
+                goals=goals,
+                additional_info=additional_info,
+                context=context
+            )
+            # Save the generated path
+            if request.get("save_path", True):
+                path_file = current_generator.save_path(learning_path)
+            # Update agent state
+            self.current_path = learning_path
+            self.user_profile.update({
+                "last_topic": topic,
+                "expertise_level": expertise_level,
+                "learning_style": learning_style,
+                "time_commitment": time_commitment
+            })
+            # Log the interaction
+            self._log_interaction("generate_path", request, {"path_id": learning_path.id})
+            return {
+                "success": True,
+                "message": f"Successfully generated learning path for {topic}",
+                "data": learning_path.dict()
+            }
+        except ValueError as e:
+            return {
+                "success": False,
+                "message": str(e),
+                "data": None
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "message": f"Error generating learning path: {str(e)}",
+                "data": None
+            }
+    def _handle_path_modification(self, request: Dict[str, Any], path_generator=None) -> Dict[str, Any]:
+        """
+        Handle a request to modify an existing learning path.
+        Args:
+            request: Dictionary with modification parameters
+        Returns:
+            Response with the modified path or error
+        """
+        try:
+            # Extract request parameters
+            path_id = request.get("path_id")
+            modifications = request.get("modifications", {})
+            # Validate required parameters
+            if not path_id:
+                return {
+                    "success": False,
+                    "message": "Path ID is required",
+                    "data": None
+                }
+            if not modifications:
+                return {
+                    "success": False,
+                    "message": "No modifications specified",
+                    "data": None
+                }
+            # Use the provided path generator or fall back to the default
+            current_generator = path_generator or self.path_generator
+            # Load the existing path
+            learning_path = current_generator.load_path(path_id)
+            if not learning_path:
+                return {
+                    "success": False,
+                    "message": f"Learning path with ID {path_id} not found",
+                    "data": None
+                }
+            # Apply modifications
+            path_data = learning_path.dict()
+            for key, value in modifications.items():
+                if key in path_data and key not in ["id", "created_at"]:
+                    path_data[key] = value
+            # Create a new path with the modifications
+            modified_path = LearningPath(**path_data)
+            # Save the modified path
+            if request.get("save_path", True):
+                path_file = current_generator.save_path(modified_path)
+            # Update agent state
+            self.current_path = modified_path
+            # Log the interaction
+            self._log_interaction("modify_path", request, {"path_id": modified_path.id})
+            return {
+                "success": True,
+                "message": f"Successfully modified learning path {path_id}",
+                "data": modified_path.dict()
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "message": f"Error modifying learning path: {str(e)}",
+                "data": None
+            }
+    def _handle_question(self, request: Dict[str, Any], orchestrator=None) -> Dict[str, Any]:
+        """
+        Handle a question with RAG and agentic behavior.
+        Args:
+            request: Dictionary containing question data
+            orchestrator: Optional custom model orchestrator
+        Returns:
+            Response with the answer or error
+        """
+        try:
+            # Extract request parameters
+            question = request.get("question")
+            # Handle context properly (could be a list or dict)
+            context = request.get("context", [])
+            path_id = None
+            # If context is a dictionary, extract path_id
+            if isinstance(context, dict):
+                path_id = context.get("path_id")
+            # If it's a list or other type, just use it as context data
+            # Validate required parameters
+            if not question:
+                return {
+                    "success": False,
+                    "message": "Question is required",
+                    "data": None
+                }
+            # Prepare context for the model
+            context_data = []
+            # Add learning path context if available
+            if path_id:
+                learning_path = self.path_generator.load_path(path_id)
+                if learning_path:
+                    context_data.append(f"Learning Path: {learning_path.title}")
+                    context_data.append(f"Description: {learning_path.description}")
+                    context_data.append(f"Topic: {learning_path.topic}")
+                    context_data.append(f"Expertise Level: {learning_path.expertise_level}")
+                    # Add milestone information
+                    for i, milestone in enumerate(learning_path.milestones):
+                        context_data.append(f"Milestone {i+1}: {milestone.title}")
+                        context_data.append(f"  Description: {milestone.description}")
+            # Search for relevant documents
+            topic = None
+            if isinstance(context, dict):
+                topic = context.get("topic")
+            elif 'learning_path' in locals() and learning_path:
+                topic = learning_path.topic
+            if topic:
+                docs = self.document_store.search_documents(
+                    query=question,
+                    filters={"topic": topic} if topic else None,
+                    top_k=3
+                )
+                for doc in docs:
+                    context_data.append(doc.page_content)
+            # Get relevant context using RAG
+            try:
+                relevant_docs = self.vector_store.search(question)
+                if relevant_docs:
+                    for doc in relevant_docs:
+                        context_data.append(doc["content"])
+            except Exception as e:
+                print(f"Warning: Error searching vector store: {str(e)}")
+            # Use the provided model orchestrator or fall back to the default
+            current_orchestrator = orchestrator or self.model_orchestrator
+            # Generate the answer with RAG context
+            answer = current_orchestrator.generate_answer(
+                question=question,
+                context=context_data if context_data else None
+            )
+            # Log the interaction
+            self._log_interaction("ask_question", request, {"answer_length": len(answer)})
+            return {
+                "success": True,
+                "message": "Successfully answered question",
+                "data": {
+                    "question": question,
+                    "answer": answer
+                }
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "message": f"Error answering question: {str(e)}",
+                "data": None
+            }
+    def _plan_next_steps(self, request: Dict[str, Any]) -> None:
+        """
+        Plan the next steps based on the current request and agent state.
+        Args:
+            request: The current request being processed
+        """
+        request_type = request.get("type", "generate_path")
+        topic = request.get("topic", "")
+        # Set a goal if none exists
+        if not self.goal:
+            if request_type == "generate_path":
+                self.goal = f"Create a comprehensive learning path for {topic}"
+            elif request_type == "modify_path":
+                self.goal = "Refine the learning path based on user feedback"
+            elif request_type == "ask_question":
+                self.goal = f"Answer the user's question about {topic}"
+            else:
+                self.goal = "Assist the user with their learning journey"
+        # Update context with relevant information
+        if topic and topic not in self.context:
+            self.context.append(f"Current topic: {topic}")
+        # Track user preferences
+        expertise_level = request.get("expertise_level")
+        if expertise_level:
+            self.context.append(f"User expertise level: {expertise_level}")
+        learning_style = request.get("learning_style")
+        if learning_style:
+            self.context.append(f"User learning style: {learning_style}")
+    def _update_user_profile(self, context: List[str]) -> None:
+        """
+        Update the user profile based on context.
+        Args:
+            context: List of context strings
+        """
+        # Extract preferences from context
+        for item in context:
+            if "expertise level" in item.lower():
+                parts = item.split(":", 1)
+                if len(parts) > 1:
+                    self.user_profile["expertise_level"] = parts[1].strip()
+            elif "learning style" in item.lower():
+                parts = item.split(":", 1)
+                if len(parts) > 1:
+                    self.user_profile["learning_style"] = parts[1].strip()
+            elif "topic" in item.lower():
+                parts = item.split(":", 1)
+                if len(parts) > 1:
+                    self.user_profile["interests"] = self.user_profile.get("interests", []) + [parts[1].strip()]
+    def _handle_resource_request(self, request: Dict[str, Any], orchestrator=None) -> Dict[str, Any]:
+        """
+        Handle a request for learning resources.
+        Args:
+            request: Dictionary with resource request parameters
+        Returns:
+            Response with resources or error
+        """
+        try:
+            # Extract request parameters
+            topic = request.get("topic")
+            learning_style = request.get("learning_style", "visual")
+            expertise_level = request.get("expertise_level", "beginner")
+            count = int(request.get("count", 5))
+            # Validate required parameters
+            if not topic:
+                return {
+                    "success": False,
+                    "message": "Topic is required",
+                    "data": None
+                }
+            # Use the provided model orchestrator or fall back to the default
+            current_orchestrator = model_orchestrator or self.model_orchestrator
+            # Generate recommendations using the model orchestrator
+            resources = current_orchestrator.generate_resource_recommendations(
+                topic=topic,
+                learning_style=learning_style,
+                expertise_level=expertise_level,
+                count=count
+            )
+            # Log the interaction
+            self._log_interaction("get_resources", request, {"resource_count": len(resources)})
+            return {
+                "success": True,
+                "message": f"Successfully found {len(resources)} resources for {topic}",
+                "data": {
+                    "topic": topic,
+                    "resources": resources
+                }
+            }
+        except Exception as e:
+            return {
+                "success": False,
+                "message": f"Error finding resources: {str(e)}",
+                "data": None
+            }
+    def _log_interaction(
+        self,
+        interaction_type: str,
+        request: Dict[str, Any],
+        result: Dict[str, Any]
+    ) -> None:
+        """
+        Log an interaction with the agent.
+        Args:
+            interaction_type: Type of interaction
+            request: The request data
+            result: The result data
+        """
+        # Create an interaction log
+        log_entry = {
+            "timestamp": datetime.datetime.now().isoformat(),
+            "type": interaction_type,
+            "request": request,
+            "result": result
+        }
+        # Add to session history
+        self.session_history.append(log_entry)
+        # Limit history size
+        if len(self.session_history) > 100:
+            self.session_history = self.session_history[-100:]
+    def get_learning_styles(self) -> Dict[str, str]:
+        """
+        Get available learning styles.
+        Returns:
+            Dictionary of learning styles and descriptions
+        """
+        return LEARNING_STYLES
+    def get_expertise_levels(self) -> Dict[str, str]:
+        """
+        Get available expertise levels.
+        Returns:
+            Dictionary of expertise levels and descriptions
+        """
+        return EXPERTISE_LEVELS
+    def get_time_commitments(self) -> Dict[str, str]:
+        """
+        Get available time commitment options.
+        Returns:
+            Dictionary of time commitment options and descriptions
+        """
+        return TIME_COMMITMENTS

src/agents/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""
+Autonomous Learning Agents module
+Contains specialized agents for different learning tasks
+"""
+from .base_agent import BaseAgent
+from .research_agent import ResearchAgent
+from .teaching_agent import TeachingAgent
+__all__ = ['BaseAgent', 'ResearchAgent', 'TeachingAgent']

src/agents/base_agent.py ADDED Viewed

	@@ -0,0 +1,234 @@

+"""
+Base class for all autonomous learning agents
+"""
+from typing import List, Dict, Any, Optional
+import abc
+from datetime import datetime
+import json
+from src.utils.config import OPENAI_API_KEY
+from src.ml.model_orchestrator import ModelOrchestrator
+from src.data.vector_store import VectorStore
+class BaseAgent(abc.ABC):
+    """
+    Base class for all autonomous learning agents
+    Provides common functionality for all agents
+    """
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize the base agent
+        Args:
+            api_key: Optional API key for language models
+        """
+        try:
+            self.api_key = api_key or OPENAI_API_KEY
+            if not self.api_key:
+                print("Warning: No API key provided. Some features may not work correctly.")
+            # Initialize model orchestrator
+            self.model_orchestrator = ModelOrchestrator(api_key=self.api_key)
+            # Initialize vector store with error handling
+            self.vector_store = VectorStore(api_key=self.api_key)
+            try:
+                # Try to load documents from the default directory
+                docs_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'vector_db', 'documents')
+                self.vector_store.load_documents(docs_dir)
+            except Exception as e:
+                print(f"Warning: Could not load documents: {str(e)}")
+                # Fall back to minimal vector store
+                self.vector_store._create_minimal_vector_store()
+            self.memory = []
+            self.goals = []
+            self.current_task = None
+            self.last_action = None
+        except Exception as e:
+            print(f"Error initializing agent: {str(e)}")
+            # Try to continue with minimal functionality
+            self.api_key = api_key or OPENAI_API_KEY
+            self.memory = []
+            self.goals = []
+            self.current_task = None
+            self.last_action = None
+            # Try to create a minimal vector store
+            try:
+                self.vector_store = VectorStore(api_key=self.api_key)
+                self.vector_store._create_minimal_vector_store()
+            except:
+                print("Warning: Could not initialize vector store. Some features may not work.")
+                self.vector_store = None
+    @abc.abstractmethod
+    def execute_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Execute a specific task
+        Args:
+            task: Task description and parameters
+        Returns:
+            Task execution results
+        """
+        pass
+    def add_to_memory(self, content: str) -> None:
+        """
+        Add content to agent's memory
+        Args:
+            content: Content to remember
+        """
+        timestamp = datetime.now().isoformat()
+        memory_item = {
+            "timestamp": timestamp,
+            "content": content
+        }
+        self.memory.append(memory_item)
+        # Keep memory size manageable
+        if len(self.memory) > 100:
+            self.memory = self.memory[-100:]
+    def get_relevant_memory(self, query: str) -> List[Dict[str, Any]]:
+        """
+        Get relevant memories based on a query
+        Args:
+            query: Query to find relevant memories
+        Returns:
+            List of relevant memory items
+        """
+        if not self.memory:
+            return []
+        try:
+            # Convert memory to text format
+            memory_texts = [f"{item['timestamp']}: {item['content']}" for item in self.memory]
+            # If vector store is not available, do a simple text search
+            if not hasattr(self, 'vector_store') or self.vector_store is None:
+                # Simple text-based search as fallback
+                query = query.lower()
+                return [
+                    item for item in self.memory
+                    if query in item['content'].lower()
+                ][:5]  # Limit to top 5 matches
+            # Use vector store to find most relevant memories
+            relevant_memories = self.vector_store.search(query, documents=memory_texts)
+            # Convert back to memory format
+            relevant_items = []
+            for memory in self.memory:
+                memory_text = f"{memory['timestamp']}: {memory['content']}"
+                if any(memory_text in item for item in relevant_memories):
+                    relevant_items.append(memory)
+            return relevant_items
+        except Exception as e:
+            print(f"Error in get_relevant_memory: {str(e)}")
+            # Fallback to simple text search
+            query = query.lower()
+            return [
+                item for item in self.memory
+                if query in item['content'].lower()
+            ][:5]  # Limit to top 5 matches
+    def plan_next_action(self, current_state: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Plan the next action based on current state
+        Args:
+            current_state: Current state information
+        Returns:
+            Planned action
+        """
+        # Get relevant memories
+        relevant_memories = self.get_relevant_memory("next action plan")
+        # Create planning prompt
+        memory_summary = "\n".join(item["content"] for item in relevant_memories)
+        prompt = f"""
+        You are a specialized learning agent. Plan your next action based on:
+        Current State:
+        {json.dumps(current_state, indent=2)}
+        Relevant Past Actions:
+        {memory_summary}
+        Goals:
+        {json.dumps(self.goals, indent=2)}
+        Propose a specific, actionable next step.
+        Format your response as JSON with these fields:
+        - action: string (what to do)
+        - parameters: object (any parameters needed)
+        - reason: string (why this action)
+        """
+        # Generate plan
+        plan = json.loads(self.model_orchestrator.generate_structured_response(
+            prompt=prompt,
+            output_schema="""
+            {
+                "action": "string",
+                "parameters": "object",
+                "reason": "string"
+            }
+            """
+        ))
+        # Store the plan
+        self.last_action = plan
+        self.add_to_memory(f"Planned action: {json.dumps(plan)}")
+        return plan
+    def self_improve(self) -> None:
+        """
+        Analyze past performance and improve agent's capabilities
+        """
+        # Analyze recent actions
+        recent_actions = self.memory[-10:]
+        # Get feedback on performance
+        prompt = f"""
+        Analyze these recent actions and suggest improvements:
+        {json.dumps(recent_actions, indent=2)}
+        Suggest specific improvements for:
+        1. Task execution efficiency
+        2. Memory management
+        3. Goal achievement
+        4. Resource utilization
+        Format your response as JSON with specific suggestions.
+        """
+        # Get improvement suggestions
+        improvements = json.loads(self.model_orchestrator.generate_structured_response(
+            prompt=prompt,
+            output_schema="""
+            {
+                "improvements": [
+                    {
+                        "area": "string",
+                        "suggestion": "string",
+                        "implementation": "string"
+                    }
+                ]
+            }
+            """
+        ))
+        # Store improvements for future reference
+        self.add_to_memory(f"Self-improvement suggestions: {json.dumps(improvements)}")

src/agents/research_agent.py ADDED Viewed

	@@ -0,0 +1,323 @@

+"""
+Research Agent for autonomous learning
+Handles research tasks and knowledge acquisition
+"""
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+import json
+from .base_agent import BaseAgent
+class ResearchAgent(BaseAgent):
+    """
+    Specialized agent for conducting research and acquiring knowledge
+    """
+    def __init__(self, api_key: Optional[str] = None):
+        super().__init__(api_key)
+        self.research_topics = []
+        self.research_history = []
+        self.current_research_focus = None
+    def execute_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Execute a research task
+        Args:
+            task: Task description and parameters
+        Returns:
+            Research results
+        """
+        task_type = task.get("type", "research")
+        if task_type == "research":
+            return self.conduct_research(task)
+        elif task_type == "update_knowledge":
+            return self.update_knowledge(task)
+        elif task_type == "analyze_trends":
+            return self.analyze_trends(task)
+        else:
+            return {
+                "success": False,
+                "message": f"Unknown task type: {task_type}"
+            }
+    def conduct_research(self, task: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Conduct research on a specific topic
+        Args:
+            task: Research task parameters
+        Returns:
+            Research findings
+        """
+        topic = task.get("topic")
+        depth = task.get("depth", "medium")
+        context = task.get("context", [])
+        if not topic:
+            return {
+                "success": False,
+                "message": "Topic is required for research"
+            }
+        # Create a more intelligent research prompt that can handle any type of query
+        context_str = '\n'.join(context) if context else ''
+        # Enhanced prompt with better instruction and flexibility for ANY topic
+        prompt = f"""
+        I want you to act as an expert AI educational assistant with comprehensive knowledge in all fields of study, technologies, skills, and courses. The user has requested information about: "{topic}"
+        This could be a request for:
+        1. A learning path on this topic or skill
+        2. Specific research or information on this subject
+        3. How to accomplish a task or learn a technique
+        4. Explanations or definitions of concepts
+        5. Course recommendations for a particular field
+        6. Career advice related to skills or technologies
+        7. Comparisons between different technologies, methods, or approaches
+        YOUR GOAL: Provide the most helpful, accurate, and comprehensive information possible about ANY educational topic the user asks about. You should be able to address questions about programming languages, data science, machine learning, web development, mobile development, cloud computing, cybersecurity, design, business, humanities, sciences, mathematics, or any other educational subject.
+        If it seems like they want a learning path:
+        - Provide a step-by-step progression from basics to advanced
+        - Include estimated time commitments for each stage
+        - Recommend specific resources (books, courses, tutorials) for each step
+        If it seems like they want specific information:
+        - Provide detailed, technically accurate information
+        - Include practical applications and examples
+        - Balance theoretical knowledge with practical insights
+        Your response should be thorough, accurate, helpful for any level of expertise, and include both theoretical understanding and practical application.
+        Additional context:
+        {context_str}
+        Provide your findings in this JSON format:
+        {{
+            "summary": "A clear 2-3 paragraph summary answering the query directly, with specific details and actionable insights",
+            "key_concepts": ["List of 4-6 key concepts relevant to the query, with brief explanations"],
+            "learning_path": ["Detailed steps for learning this topic in a logical order, from beginner to advanced"],
+            "resources": ["Specific recommended resources including books, courses, tutorials, documentation, and communities"],
+            "code_examples": ["Relevant code examples or practical exercises that demonstrate key concepts"],
+            "advanced_topics": ["More advanced topics to explore after mastering basics, with brief explanations of why they matter"],
+            "career_applications": ["How these skills apply to real-world jobs and career paths"],
+            "curiosity_trails": ["A list of 3-5 intriguing follow-up questions or related sub-topics to explore further, designed to spark curiosity and deeper learning."]
+        }}
+        For the "curiosity_trails", think about what someone who has just learned the main topic might wonder next, or what fascinating related areas they could branch into.
+        Be extremely thorough, accurate, and helpful. Don't just provide general advice - give specific, actionable information that would genuinely help someone learn this topic or skill.
+        """
+        # Generate research findings with error handling
+        findings_json = self.model_orchestrator.generate_structured_response(
+            prompt=prompt,
+            output_schema="""
+            {
+                "summary": "string",
+                "key_concepts": ["string"],
+                "learning_path": ["string"],
+                "resources": ["string"],
+                "code_examples": ["string"],
+                "advanced_topics": ["string"],
+                "career_applications": ["string"],
+                "curiosity_trails": ["string"]
+            }
+            """
+        )
+        if not findings_json:
+            return {
+                "success": False,
+                "message": "AI provider did not return a valid response. Please try again later."
+            }
+        try:
+            findings = json.loads(findings_json)
+        except Exception as e:
+            return {
+                "success": False,
+                "message": f"Failed to parse AI response: {str(e)}",
+                "raw_response": findings_json
+            }
+        # Store findings
+        self.add_to_memory(f"Research findings on {topic}: {json.dumps(findings)}")
+        self.research_history.append({
+            "topic": topic,
+            "timestamp": datetime.now().isoformat(),
+            "depth": depth,
+            "findings": findings
+        })
+        return {
+            "success": True,
+            "findings": findings,
+            "message": f"Successfully completed research on {topic}"
+        }
+    def update_knowledge(self, task: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Update knowledge based on new information
+        Args:
+            task: Update task parameters
+        Returns:
+            Update results
+        """
+        new_info = task.get("new_information")
+        related_topics = task.get("related_topics", [])
+        if not new_info:
+            return {
+                "success": False,
+                "message": "New information is required for knowledge update"
+            }
+        # Analyze new information
+        prompt = f"""
+        Analyze this new information and update existing knowledge:
+        {new_info}
+        """
+        # Include related topics
+        related_topics = self._find_related_topics(new_info)
+        if related_topics:
+            related_topics_str = '\n'.join(related_topics)
+            prompt += f"\n\nRelated topics to consider:\n{related_topics_str}"
+        prompt += f"""
+        Identify:
+        1. What new knowledge should be added
+        2. What existing knowledge should be updated
+        3. What knowledge should be deprecated
+        """
+        analysis = json.loads(self.model_orchestrator.generate_structured_response(
+            prompt=prompt,
+            output_schema="""
+            {
+                "new_knowledge": ["string"],
+                "updated_knowledge": ["string"],
+                "deprecated_knowledge": ["string"]
+            }
+            """
+        ))
+        # Update knowledge base
+        self.add_to_memory(f"Knowledge update: {json.dumps(analysis)}")
+        return {
+            "success": True,
+            "analysis": analysis,
+            "message": "Knowledge base updated successfully"
+        }
+    def analyze_trends(self, task: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Analyze trends in a specific area
+        Args:
+            task: Trend analysis parameters
+        Returns:
+            Trend analysis results
+        """
+        area = task.get("area")
+        timeframe = task.get("timeframe", "recent")
+        context = task.get("context", [])
+        if not area:
+            return {
+                "success": False,
+                "message": "Area is required for trend analysis"
+            }
+        # Create analysis prompt
+        prompt = f"""
+        Analyze current trends in: {area}
+        Timeframe: {timeframe}
+        """
+        # Add context if available
+        if context:
+            context_str = '\n'.join(context)
+            prompt += f"\n\nContext:\n{context_str}"
+        prompt += f"""
+        Provide analysis in JSON format with:
+        - Current trends
+        - Emerging patterns
+        - Predicted developments
+        - Impact assessment
+        """
+        # Generate trend analysis
+        analysis = json.loads(self.model_orchestrator.generate_structured_response(
+            prompt=prompt,
+            output_schema="""
+            {
+                "current_trends": ["string"],
+                "emerging_patterns": ["string"],
+                "predicted_developments": ["string"],
+                "impact": ["string"]
+            }
+            """
+        ))
+        # Store analysis
+        self.add_to_memory(f"Trend analysis for {area}: {json.dumps(analysis)}")
+        return {
+            "success": True,
+            "analysis": analysis,
+            "message": f"Successfully analyzed trends in {area}"
+        }
+    def plan_next_research(self) -> Dict[str, Any]:
+        """
+        Plan next research task based on current knowledge
+        Returns:
+            Next research plan
+        """
+        # Get current knowledge gaps
+        relevant_memories = self.get_relevant_memory("knowledge gaps")
+        # Create planning prompt
+        memory_summary = "\n".join(item["content"] for item in relevant_memories)
+        prompt = f"""
+        Based on current knowledge:
+        {memory_summary}
+        Identify:
+        1. Most important knowledge gaps
+        2. Areas requiring deeper research
+        3. Emerging topics to explore
+        Propose next research task with:
+        - Topic
+        - Research depth
+        - Related topics
+        """
+        # Generate research plan
+        plan = json.loads(self.model_orchestrator.generate_structured_response(
+            prompt=prompt,
+            output_schema="""
+            {
+                "topic": "string",
+                "depth": "string",
+                "related_topics": ["string"],
+                "reason": "string"
+            }
+            """
+        ))
+        # Store plan
+        self.add_to_memory(f"Next research plan: {json.dumps(plan)}")
+        return plan

src/agents/teaching_agent.py ADDED Viewed

	@@ -0,0 +1,356 @@

+"""
+Teaching Agent for autonomous learning
+Handles teaching and learning path creation
+"""
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+import json
+from .base_agent import BaseAgent
+from .research_agent import ResearchAgent
+class TeachingAgent(BaseAgent):
+    """
+    Specialized agent for teaching and learning path creation
+    """
+    def __init__(self, api_key: Optional[str] = None):
+        super().__init__(api_key)
+        self.learning_paths = []
+        self.teaching_style = "adaptive"
+        self.current_lesson = None
+    def execute_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Execute a teaching task
+        Args:
+            task: Task description and parameters
+        Returns:
+            Teaching results
+        """
+        task_type = task.get("type", "create_path")
+        if task_type == "create_path":
+            return self.create_learning_path(task)
+        elif task_type == "adapt_path":
+            return self.adapt_learning_path(task)
+        elif task_type == "generate_lesson":
+            return self.generate_lesson(task)
+        else:
+            return {
+                "success": False,
+                "message": f"Unknown task type: {task_type}"
+            }
+    def create_learning_path(self, task: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Create a personalized learning path
+        Args:
+            task: Learning path creation parameters
+        Returns:
+            Created learning path
+        """
+        topic = task.get("topic")
+        expertise_level = task.get("expertise_level", "beginner")
+        learning_style = task.get("learning_style", "visual")
+        time_commitment = task.get("time_commitment", "moderate")
+        if not topic:
+            return {
+                "success": False,
+                "message": "Topic is required for learning path creation"
+            }
+        # Get relevant research
+        research_result = {
+            "success": True,
+            "findings": ["Sample research finding 1", "Sample research finding 2"]
+        }
+        # Temporarily disabled actual research to fix circular import
+        # research_agent = ResearchAgent(self.api_key)
+        # research_result = research_agent.conduct_research({
+        #     "topic": topic,
+        #     "depth": "deep"
+        # })
+        #
+        # if not research_result["success"]:
+        #     return research_result
+        # Create teaching prompt
+        prompt = f"""
+        Create a personalized learning path for: {topic}
+        User preferences:
+        - Expertise level: {expertise_level}
+        - Learning style: {learning_style}
+        - Time commitment: {time_commitment}
+        Research findings:
+        {json.dumps(research_result["findings"])}
+        Create a structured learning path with:
+        1. Learning objectives
+        2. Milestones
+        3. Resources
+        4. Assessment points
+        5. Adaptation points
+        """
+        # Generate learning path
+        path = json.loads(self.model_orchestrator.generate_structured_response(
+            prompt=prompt,
+            output_schema="""
+            {
+                "title": "string",
+                "description": "string",
+                "objectives": ["string"],
+                "milestones": [
+                    {
+                        "title": "string",
+                        "description": "string",
+                        "resources": ["string"],
+                        "assessment": "string",
+                        "adaptation_points": ["string"]
+                    }
+                ],
+                "total_duration": "string",
+                "prerequisites": ["string"]
+            }
+            """
+        ))
+        # Store learning path
+        self.learning_paths.append({
+            "path": path,
+            "created_at": datetime.now().isoformat(),
+            "topic": topic,
+            "expertise_level": expertise_level
+        })
+        # Add to memory
+        self.add_to_memory(f"Created learning path for {topic}: {json.dumps(path)}")
+        return {
+            "success": True,
+            "learning_path": path,
+            "message": f"Successfully created learning path for {topic}"
+        }
+    def adapt_learning_path(self, task: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Adapt an existing learning path based on user progress
+        Args:
+            task: Adaptation parameters
+        Returns:
+            Adapted learning path
+        """
+        path_id = task.get("path_id")
+        user_progress = task.get("user_progress")
+        feedback = task.get("feedback", [])
+        if not path_id or not user_progress:
+            return {
+                "success": False,
+                "message": "Path ID and user progress are required for adaptation"
+            }
+        # Find the learning path
+        path = None
+        for p in self.learning_paths:
+            if p.get("id") == path_id:
+                path = p["path"]
+                break
+        if not path:
+            return {
+                "success": False,
+                "message": f"Learning path with ID {path_id} not found"
+            }
+        # Prepare feedback string
+        feedback_str = '\n'.join(feedback) if feedback else 'No feedback provided'
+        # Create adaptation prompt
+        prompt = f"""
+        Adapt this learning path based on user progress and feedback:
+        {json.dumps(path)}
+        User progress:
+        {json.dumps(user_progress)}
+        Feedback:
+        {feedback_str}
+        Suggest specific adaptations for:
+        1. Content difficulty
+        2. Resource types
+        3. Assessment methods
+        4. Learning pace
+        """
+        # Generate adaptations
+        adaptations = json.loads(self.model_orchestrator.generate_structured_response(
+            prompt=prompt,
+            output_schema="""
+            {
+                "content_changes": ["string"],
+                "resource_changes": ["string"],
+                "assessment_changes": ["string"],
+                "pace_changes": ["string"]
+            }
+            """
+        ))
+        # Apply adaptations
+        for change in adaptations["content_changes"]:
+            self._apply_change(path, change)
+        # Store adaptation
+        self.add_to_memory(f"Adapted learning path {path_id}: {json.dumps(adaptations)}")
+        return {
+            "success": True,
+            "adaptations": adaptations,
+            "updated_path": path,
+            "message": f"Successfully adapted learning path {path_id}"
+        }
+    def generate_lesson(self, task: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Generate a specific lesson for a topic
+        Args:
+            task: Lesson generation parameters
+        Returns:
+            Generated lesson
+        """
+        topic = task.get("topic")
+        lesson_type = task.get("type", "introductory")
+        duration = task.get("duration", "60 minutes")
+        if not topic:
+            return {
+                "success": False,
+                "message": "Topic is required for lesson generation"
+            }
+        # Create lesson prompt
+        prompt = f"""
+        Generate a {lesson_type} lesson on: {topic}
+        Duration: {duration}
+        Include:
+        1. Key concepts
+        2. Practical examples
+        3. Interactive elements
+        4. Assessment questions
+        5. Additional resources
+        Format as JSON with clear structure
+        """
+        # Generate lesson
+        lesson = json.loads(self.model_orchestrator.generate_structured_response(
+            prompt=prompt,
+            output_schema="""
+            {
+                "title": "string",
+                "description": "string",
+                "sections": [
+                    {
+                        "title": "string",
+                        "content": "string",
+                        "examples": ["string"],
+                        "questions": ["string"]
+                    }
+                ],
+                "interactive_elements": ["string"],
+                "resources": ["string"]
+            }
+            """
+        ))
+        # Add to memory
+        self.add_to_memory(f"Generated lesson for {topic}: {json.dumps(lesson)}")
+        return {
+            "success": True,
+            "lesson": lesson,
+            "message": f"Successfully generated lesson for {topic}"
+        }
+    def _apply_change(self, path: Dict[str, Any], change: str) -> None:
+        """
+        Apply a specific change to the learning path
+        Args:
+            path: Learning path to modify
+            change: Change description
+        """
+        # Parse change description
+        try:
+            change_type, details = change.split(":", 1)
+            details = details.strip()
+            if change_type == "difficulty":
+                self._adjust_difficulty(path, details)
+            elif change_type == "resources":
+                self._update_resources(path, details)
+            elif change_type == "assessment":
+                self._modify_assessment(path, details)
+            elif change_type == "pace":
+                self._adjust_pace(path, details)
+        except Exception as e:
+            self.add_to_memory(f"Failed to apply change: {str(e)}")
+    def _adjust_difficulty(self, path: Dict[str, Any], details: str) -> None:
+        """
+        Adjust content difficulty
+        Args:
+            path: Learning path
+            details: Difficulty adjustment details
+        """
+        # Implementation of difficulty adjustment
+        pass
+    def _update_resources(self, path: Dict[str, Any], details: str) -> None:
+        """
+        Update learning resources
+        Args:
+            path: Learning path
+            details: Resource update details
+        """
+        # Implementation of resource updates
+        pass
+    def _modify_assessment(self, path: Dict[str, Any], details: str) -> None:
+        """
+        Modify assessment methods
+        Args:
+            path: Learning path
+            details: Assessment modification details
+        """
+        # Implementation of assessment modifications
+        pass
+    def _adjust_pace(self, path: Dict[str, Any], details: str) -> None:
+        """
+        Adjust learning pace
+        Args:
+            path: Learning path
+            details: Pace adjustment details
+        """
+        # Implementation of pace adjustments
+        pass

src/data/bm25_retriever.py ADDED Viewed

	@@ -0,0 +1,173 @@

+"""
+BM25 Retriever for keyword-based document search.
+BM25 (Best Matching 25) is a probabilistic ranking function used for keyword-based
+document retrieval. It's particularly effective for exact keyword matches that
+semantic search might miss.
+"""
+from typing import List, Dict, Any, Optional
+import numpy as np
+from rank_bm25 import BM25Okapi
+from langchain.schema import Document
+class BM25Retriever:
+    """
+    BM25-based keyword retriever for hybrid search.
+    BM25 uses term frequency (TF) and inverse document frequency (IDF) to rank
+    documents based on keyword relevance.
+    """
+    def __init__(self, k1: float = 1.5, b: float = 0.75):
+        """
+        Initialize BM25 retriever.
+        Args:
+            k1: Term frequency saturation parameter (default: 1.5)
+                Higher values give more weight to term frequency
+            b: Length normalization parameter (default: 0.75)
+                0 = no normalization, 1 = full normalization
+        """
+        self.k1 = k1
+        self.b = b
+        self.bm25 = None
+        self.documents = []
+        self.tokenized_corpus = []
+    def index_documents(self, documents: List[Document]) -> None:
+        """
+        Index documents for BM25 search.
+        Args:
+            documents: List of Document objects to index
+        """
+        if not documents:
+            return
+        self.documents = documents
+        # Tokenize documents (simple whitespace tokenization)
+        self.tokenized_corpus = [
+            doc.page_content.lower().split()
+            for doc in documents
+        ]
+        # Create BM25 index
+        self.bm25 = BM25Okapi(self.tokenized_corpus, k1=self.k1, b=self.b)
+        print(f"✅ BM25 index created with {len(documents)} documents")
+    def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
+        """
+        Search documents using BM25 keyword matching.
+        Args:
+            query: Search query
+            top_k: Number of top results to return
+        Returns:
+            List of dictionaries with 'document' and 'score' keys
+        """
+        if not self.bm25 or not self.documents:
+            return []
+        # Tokenize query
+        tokenized_query = query.lower().split()
+        # Get BM25 scores
+        scores = self.bm25.get_scores(tokenized_query)
+        # Get top-k indices
+        top_indices = np.argsort(scores)[::-1][:top_k]
+        # Build results
+        results = []
+        for idx in top_indices:
+            if scores[idx] > 0:  # Only include documents with non-zero scores
+                results.append({
+                    'document': self.documents[idx],
+                    'score': float(scores[idx]),
+                    'rank': len(results) + 1
+                })
+        return results
+    def get_stats(self) -> Dict[str, Any]:
+        """
+        Get statistics about the indexed corpus.
+        Returns:
+            Dictionary with corpus statistics
+        """
+        if not self.bm25:
+            return {"indexed": False}
+        return {
+            "indexed": True,
+            "document_count": len(self.documents),
+            "avg_doc_length": np.mean([len(doc) for doc in self.tokenized_corpus]),
+            "k1": self.k1,
+            "b": self.b
+        }
+def reciprocal_rank_fusion(
+    results_list: List[List[Dict[str, Any]]],
+    k: int = 60
+) -> List[Dict[str, Any]]:
+    """
+    Combine multiple ranked lists using Reciprocal Rank Fusion (RRF).
+    RRF is a simple but effective method for combining rankings from different
+    retrieval systems. It gives higher scores to documents that appear in
+    multiple result lists and/or appear higher in those lists.
+    Formula: RRF_score(d) = Σ(1 / (k + rank(d)))
+    Args:
+        results_list: List of result lists from different retrievers
+        k: Constant to prevent division by zero (default: 60)
+    Returns:
+        Fused and re-ranked results
+    """
+    # Track scores for each unique document
+    doc_scores = {}
+    doc_objects = {}
+    for results in results_list:
+        for result in results:
+            doc = result['document']
+            rank = result.get('rank', result.get('score', 1))
+            # Use document content as key (hash for uniqueness)
+            doc_key = hash(doc.page_content)
+            # Calculate RRF score
+            rrf_score = 1.0 / (k + rank)
+            # Accumulate scores
+            if doc_key in doc_scores:
+                doc_scores[doc_key] += rrf_score
+            else:
+                doc_scores[doc_key] = rrf_score
+                doc_objects[doc_key] = doc
+    # Sort by RRF score
+    sorted_docs = sorted(
+        doc_scores.items(),
+        key=lambda x: x[1],
+        reverse=True
+    )
+    # Build final results
+    fused_results = []
+    for doc_key, score in sorted_docs:
+        fused_results.append({
+            'document': doc_objects[doc_key],
+            'score': score,
+            'rank': len(fused_results) + 1
+        })
+    return fused_results

src/data/document_store.py ADDED Viewed

	@@ -0,0 +1,973 @@

+"""
+Vector database interface for the AI Learning Path Generator.
+Handles document storage, retrieval, and semantic search.
+Optimizations:
+- Singleton pattern for connection pooling
+- Batch operations for efficiency
+- Query optimization and caching
+- Relevance score filtering (>0.7)
+- Performance logging
+"""
+import os
+import time
+import hashlib
+import sqlite3
+import json
+from typing import List, Dict, Any, Optional
+from pathlib import Path
+import threading
+import chromadb
+from chromadb.config import Settings
+from chromadb.utils import embedding_functions
+from langchain.schema import Document
+from src.utils.config import (
+    VECTOR_DB_PATH,
+    OPENAI_API_KEY,
+    EMBEDDING_MODEL,
+    # Advanced RAG config
+    ENABLE_SEMANTIC_CACHE,
+    QUERY_REWRITE_ENABLED,
+    RERANK_ENABLED,
+    CONTEXTUAL_COMPRESSION_ENABLED,
+    USE_LOCAL_RERANKER,
+    COHERE_API_KEY,
+    COHERE_RERANK_MODEL,
+    LOCAL_RERANKER_MODEL,
+    QUERY_REWRITE_MODEL,
+    QUERY_REWRITE_MAX_TOKENS,
+    COMPRESSION_MODEL,
+    COMPRESSION_MAX_TOKENS,
+    RERANK_TOP_K,
+    HYBRID_TOP_K,
+    BM25_K1,
+    BM25_B,
+    REDIS_URL,
+    REDIS_HOST,
+    REDIS_PORT,
+    REDIS_PASSWORD,
+    REDIS_DB,
+    SEMANTIC_CACHE_TTL,
+    SEMANTIC_CACHE_THRESHOLD
+)
+from src.utils.cache import cache
+# Singleton instance and lock for thread-safe initialization
+_instance = None
+_lock = threading.Lock()
+class DocumentStore:
+    """
+    Enhanced document retrieval using ChromaDB vector database with connection pooling.
+    Features:
+    - Singleton pattern for connection reuse
+    - Batch operations for efficiency
+    - Query optimization and caching
+    - Relevance score filtering (>0.7)
+    - Performance logging
+    """
+    # Class-level client for connection pooling
+    _shared_client = None
+    _shared_embedding_function = None
+    def __new__(cls, db_path: Optional[str] = None):
+        """Singleton pattern: ensure only one instance exists."""
+        global _instance
+        if _instance is None:
+            with _lock:
+                if _instance is None:
+                    _instance = super(DocumentStore, cls).__new__(cls)
+                    _instance._initialized = False
+        return _instance
+    def __init__(self, db_path: Optional[str] = None):
+        """
+        Initialize the document store with connection pooling.
+        Args:
+            db_path: Optional path to the vector database
+        """
+        # Skip if already initialized (singleton pattern)
+        if self._initialized:
+            return
+        print(f"--- DocumentStore.__init__ started (db_path: {db_path or VECTOR_DB_PATH}) ---")
+        self.db_path = db_path or VECTOR_DB_PATH
+        # Performance tracking
+        self.search_count = 0
+        self.cache_hits = 0
+        # Ensure the directory exists
+        os.makedirs(self.db_path, exist_ok=True)
+        print(f"--- DocumentStore.__init__: Ensured directory exists: {self.db_path} ---")
+        # Initialize shared client (connection pooling)
+        if DocumentStore._shared_client is None:
+            print("--- DocumentStore.__init__: Initializing shared chromadb.Client ---")
+            try:
+                DocumentStore._shared_client = chromadb.Client(
+                    Settings(
+                        chroma_db_impl="duckdb+parquet",
+                        persist_directory=self.db_path,
+                        anonymized_telemetry=False,
+                        allow_reset=True
+                    )
+                )
+                print("✅ Shared ChromaDB client initialized (connection pooling active)")
+            except Exception as e:
+                print(f"⚠️  Failed to initialize ChromaDB client: {e}")
+                raise
+        self.client = DocumentStore._shared_client
+        # Initialize shared embedding function (reuse across requests)
+        if DocumentStore._shared_embedding_function is None:
+            print(f"--- DocumentStore.__init__: Initializing custom embedding function ---")
+            try:
+                # Use free local embedding function if OpenAI API key not available
+                if OPENAI_API_KEY:
+                    # Create custom embedding function compatible with OpenAI v1.x
+                    from openai import OpenAI
+                    class CustomOpenAIEmbedding:
+                        def __init__(self, api_key, model_name="text-embedding-ada-002"):
+                            self.client = OpenAI(api_key=api_key)
+                            self.model_name = model_name
+                        def __call__(self, texts):
+                            """Generate embeddings for a list of texts."""
+                            if isinstance(texts, str):
+                                texts = [texts]
+                            response = self.client.embeddings.create(
+                                input=texts,
+                                model=self.model_name
+                            )
+                            return [item.embedding for item in response.data]
+                    DocumentStore._shared_embedding_function = CustomOpenAIEmbedding(
+                        api_key=OPENAI_API_KEY,
+                        model_name=EMBEDDING_MODEL
+                    )
+                    print("✅ Shared embedding function initialized (OpenAI)")
+                else:
+                    # Use free sentence-transformers embedding (no API key needed)
+                    print("Using free local embeddings (sentence-transformers)...")
+                    DocumentStore._shared_embedding_function = embedding_functions.SentenceTransformerEmbeddingFunction(
+                        model_name="all-MiniLM-L6-v2"
+                    )
+                    print("✅ Shared embedding function initialized (Local SentenceTransformer)")
+            except Exception as e:
+                print(f"⚠️  Failed to initialize embedding function: {e}")
+                raise
+        self.embedding_function = DocumentStore._shared_embedding_function
+        # Create or get the collections
+        print("--- DocumentStore.__init__: Getting/creating 'learning_resources' collection ---")
+        self.resources_collection = self._initialize_collection(
+            name="learning_resources",
+            metadata={"description": "Educational resources and materials"}
+        )
+        print("--- DocumentStore.__init__: 'learning_resources' collection obtained ---")
+        print("--- DocumentStore.__init__: Getting/creating 'learning_paths' collection ---")
+        self.paths_collection = self._initialize_collection(
+            name="learning_paths",
+            metadata={"description": "Generated learning paths"}
+        )
+        print("--- DocumentStore.__init__: 'learning_paths' collection obtained ---")
+        # Mark as initialized
+        self._initialized = True
+        print("--- DocumentStore.__init__ finished ---")
+    def add_document(
+        self,
+        content: str,
+        metadata: Dict[str, Any],
+        collection_name: str = "learning_resources",
+        document_id: Optional[str] = None
+    ) -> str:
+        """
+        Add a document to the vector database.
+        Args:
+            content: Document content
+            metadata: Document metadata
+            collection_name: Name of the collection to add to
+            document_id: Optional ID for the document
+        Returns:
+            ID of the added document
+        """
+        # Generate a document ID if not provided
+        doc_id = document_id or f"doc_{len(content) % 10000}_{hash(content) % 1000000}"
+        # Get the appropriate collection
+        collection = self._initialize_collection(name=collection_name)
+        # Add the document
+        collection.add(
+            documents=[content],
+            metadatas=[metadata],
+            ids=[doc_id]
+        )
+        return doc_id
+    def add_documents(
+        self,
+        documents: List[Document],
+        collection_name: str = "learning_resources"
+    ) -> List[str]:
+        """
+        Add multiple documents to the vector database.
+        Args:
+            documents: List of Document objects
+            collection_name: Name of the collection to add to
+        Returns:
+            List of document IDs
+        """
+        if not documents:
+            return []
+        # Get the appropriate collection
+        collection = self._initialize_collection(name=collection_name)
+        # Prepare document data
+        contents = [doc.page_content for doc in documents]
+        metadatas = [doc.metadata for doc in documents]
+        ids = [f"doc_{i}_{hash(doc.page_content) % 1000000}" for i, doc in enumerate(documents)]
+        # Add documents in batches (ChromaDB has limits)
+        batch_size = 100
+        for i in range(0, len(documents), batch_size):
+            batch_end = min(i + batch_size, len(documents))
+            collection.add(
+                documents=contents[i:batch_end],
+                metadatas=metadatas[i:batch_end],
+                ids=ids[i:batch_end]
+            )
+        return ids
+    def search_documents(
+        self,
+        query: str,
+        collection_name: str = "learning_resources",
+        filters: Optional[Dict[str, Any]] = None,
+        top_k: int = 5,
+        offset: int = 0
+    ) -> List[Document]:
+        """
+        Search for documents using semantic similarity with pagination.
+        Args:
+            query: Search query
+            collection_name: Collection to search in
+            filters: Optional metadata filters
+            top_k: Number of results to return (default: 5)
+            offset: Number of results to skip for pagination (default: 0)
+        Returns:
+            List of relevant Document objects
+        """
+        # Get the collection
+        try:
+            collection = self._initialize_collection(name=collection_name)
+        except Exception:
+            # Collection doesn't exist
+            return []
+        # Prepare filter if provided
+        where = {}
+        if filters:
+            for key, value in filters.items():
+                if isinstance(value, list):
+                    # For list values, we need to use the $in operator
+                    where[key] = {"$in": value}
+                else:
+                    where[key] = value
+        # Execute the search (get more results for pagination)
+        try:
+            result = collection.query(
+                query_texts=[query],
+                n_results=top_k + offset,  # Get enough results for pagination
+                where=where if where else None
+            )
+        except Exception as e:
+            print(f"⚠️  Search failed: {e}")
+            print(f"🔧 Attempting schema repair for error: {type(e).__name__}")
+            # Try to repair schema and retry once
+            if self._try_repair_collection_schema(e):
+                print(f"🔄 Schema repaired, retrying query...")
+                try:
+                    result = collection.query(
+                        query_texts=[query],
+                        n_results=top_k + offset,
+                        where=where if where else None
+                    )
+                    print(f"✅ Query retry successful after schema repair")
+                except Exception as retry_error:
+                    print(f"⚠️  Search retry failed: {retry_error}")
+                    return []
+            else:
+                print(f"❌ Schema repair not applicable for this error")
+                return []
+        # Convert results to Document objects
+        documents = []
+        if result and result.get("documents"):
+            # Apply offset for pagination
+            start_idx = offset
+            end_idx = offset + top_k
+            for i in range(start_idx, min(end_idx, len(result["documents"][0]))):
+                content = result["documents"][0][i]
+                metadata = result["metadatas"][0][i] if result.get("metadatas") and result["metadatas"][0] else {}
+                distance = result["distances"][0][i] if result.get("distances") and result["distances"][0] else 1.0
+                # Add relevance score to metadata
+                metadata["relevance_score"] = 1.0 - (distance / 2.0)  # Convert distance to relevance (0-1)
+                documents.append(Document(
+                    page_content=content,
+                    metadata=metadata
+                ))
+        return documents
+    def hybrid_search(
+        self,
+        query: str,
+        collection_name: str = "learning_resources",
+        filters: Optional[Dict[str, Any]] = None,
+        top_k: int = 5,
+        min_relevance: float = 0.7,
+        use_cache: bool = True
+    ) -> List[Document]:
+        """
+        Perform optimized hybrid search with caching and relevance filtering.
+        Optimizations:
+        - Query truncation to 500 chars
+        - Stop word removal
+        - Result caching (1 hour)
+        - Relevance score filtering (>0.7)
+        - Performance logging
+        Args:
+            query: Search query
+            collection_name: Collection to search in
+            filters: Optional metadata filters
+            top_k: Number of results to return (default: 5)
+            min_relevance: Minimum relevance score (default: 0.7)
+            use_cache: Whether to use cached results (default: True)
+        Returns:
+            List of relevant Document objects
+        """
+        start_time = time.time()
+        self.search_count += 1
+        # Optimize query: truncate to 500 chars
+        optimized_query = query[:500] if len(query) > 500 else query
+        # Remove common stop words to focus on meaningful keywords
+        stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
+        query_words = optimized_query.lower().split()
+        filtered_words = [w for w in query_words if w not in stop_words]
+        optimized_query = ' '.join(filtered_words) if filtered_words else optimized_query
+        # Check cache first
+        if use_cache:
+            cache_key = cache.cache_key(
+                "hybrid_search",
+                optimized_query,
+                collection_name,
+                str(filters),
+                top_k,
+                min_relevance
+            )
+            cached_results = cache.get(cache_key)
+            if cached_results:
+                self.cache_hits += 1
+                elapsed = time.time() - start_time
+                print(f"💰 Cache hit! Search completed in {elapsed*1000:.1f}ms (saved API call)")
+                return cached_results
+        # Perform semantic search
+        semantic_results = self.search_documents(
+            query=optimized_query,
+            collection_name=collection_name,
+            filters=filters,
+            top_k=top_k * 2  # Get more results for reranking
+        )
+        # Prepare keyword results for simple matching
+        keyword_docs = []
+        try:
+            # Get all documents matching the filters
+            collection = self._initialize_collection(name=collection_name)
+            # Prepare filter for keyword search
+            where = {}
+            if filters:
+                where.update(filters)
+            # Get documents matching the filter
+            result = collection.get(where=where if where else None)
+            if result and result.get("documents"):
+                # Simple keyword matching
+                query_terms = set(query.lower().split())
+                for i, content in enumerate(result["documents"]):
+                    # Count matching terms in content
+                    content_lower = content.lower()
+                    match_count = sum(1 for term in query_terms if term in content_lower)
+                    if match_count > 0:
+                        metadata = result["metadatas"][i] if result.get("metadatas") else {}
+                        # Score based on ratio of matching terms
+                        metadata["relevance_score"] = match_count / len(query_terms)
+                        keyword_docs.append(Document(
+                            page_content=content,
+                            metadata=metadata
+                        ))
+        except Exception:
+            # Keyword search failed, continue with semantic results only
+            pass
+        # Combine results, removing duplicates
+        all_docs = {}
+        # Add semantic results
+        for doc in semantic_results:
+            doc_key = hash(doc.page_content)
+            all_docs[doc_key] = doc
+        # Add keyword results that don't duplicate semantic results
+        for doc in keyword_docs:
+            doc_key = hash(doc.page_content)
+            if doc_key not in all_docs:
+                all_docs[doc_key] = doc
+        # Sort by relevance score
+        sorted_docs = sorted(
+            all_docs.values(),
+            key=lambda x: x.metadata.get("relevance_score", 0),
+            reverse=True
+        )
+        # Filter by minimum relevance score
+        filtered_docs = [
+            doc for doc in sorted_docs
+            if doc.metadata.get("relevance_score", 0) >= min_relevance
+        ]
+        # Take top_k results
+        results = filtered_docs[:top_k]
+        # Performance logging
+        elapsed = time.time() - start_time
+        print(f"🔍 Search completed in {elapsed*1000:.1f}ms - Found {len(results)}/{len(sorted_docs)} results (min_relevance={min_relevance})")
+        # Cache the results for 1 hour
+        if use_cache and results:
+            cache.set(cache_key, results, ttl=3600)
+        return results
+    def delete_document(
+        self,
+        document_id: str,
+        collection_name: str = "learning_resources"
+    ) -> bool:
+        """
+        Delete a document from the vector database.
+        Args:
+            document_id: ID of the document to delete
+            collection_name: Collection to delete from
+        Returns:
+            Success status
+        """
+        try:
+            collection = self._initialize_collection(name=collection_name)
+            collection.delete(ids=[document_id])
+            return True
+        except Exception:
+            return False
+    def clear_collection(self, collection_name: str) -> bool:
+        """
+        Clear all documents from a collection.
+        Args:
+            collection_name: Collection to clear
+        Returns:
+            Success status
+        """
+        try:
+            self.client.delete_collection(collection_name)
+            self._initialize_collection(name=collection_name)
+            return True
+        except Exception:
+            return False
+    def add_documents_batch(
+        self,
+        documents: List[Document],
+        collection_name: str = "learning_resources",
+        batch_size: int = 100
+    ) -> List[str]:
+        """
+        Add documents in batches to avoid memory issues.
+        Args:
+            documents: List of Document objects
+            collection_name: Collection to add to
+            batch_size: Number of documents per batch (default: 100)
+        Returns:
+            List of document IDs
+        """
+        if not documents:
+            return []
+        print(f"📦 Adding {len(documents)} documents in batches of {batch_size}")
+        start_time = time.time()
+        try:
+            collection = self._initialize_collection(name=collection_name)
+            all_ids = []
+            for i in range(0, len(documents), batch_size):
+                batch_end = min(i + batch_size, len(documents))
+                batch = documents[i:batch_end]
+                # Prepare batch data
+                contents = [doc.page_content for doc in batch]
+                metadatas = [doc.metadata for doc in batch]
+                ids = [f"doc_{i+j}_{hash(doc.page_content) % 1000000}" for j, doc in enumerate(batch)]
+                # Add batch
+                collection.add(
+                    documents=contents,
+                    metadatas=metadatas,
+                    ids=ids
+                )
+                all_ids.extend(ids)
+                print(f"  ✅ Batch {i//batch_size + 1}/{(len(documents)-1)//batch_size + 1} added ({len(batch)} docs)")
+            elapsed = time.time() - start_time
+            print(f"✅ Added {len(documents)} documents in {elapsed:.2f}s ({len(documents)/elapsed:.1f} docs/sec)")
+            return all_ids
+        except Exception as e:
+            print(f"⚠️  Batch add failed: {e}")
+            return []
+    def get_collection_stats(self, collection_name: str = "learning_resources") -> Dict[str, Any]:
+        """
+        Get statistics about a collection.
+        Args:
+            collection_name: Collection to get stats for
+        Returns:
+            Dictionary with collection statistics
+        """
+        try:
+            collection = self._initialize_collection(name=collection_name)
+            # Get collection count
+            count = collection.count()
+            # Get sample documents to estimate size
+            sample = collection.get(limit=10)
+            avg_doc_size = 0
+            if sample and sample.get("documents"):
+                total_size = sum(len(doc) for doc in sample["documents"])
+                avg_doc_size = total_size / len(sample["documents"])
+            return {
+                "collection_name": collection_name,
+                "document_count": count,
+                "avg_document_size_bytes": avg_doc_size,
+                "estimated_total_size_kb": (count * avg_doc_size) / 1024,
+                "search_count": self.search_count,
+                "cache_hits": self.cache_hits,
+                "cache_hit_rate": f"{(self.cache_hits / self.search_count * 100):.1f}%" if self.search_count > 0 else "0%"
+            }
+        except Exception as e:
+            print(f"⚠️  Failed to get collection stats: {e}")
+            return {"error": str(e)}
+    def cleanup_old_embeddings(
+        self,
+        collection_name: str = "learning_resources",
+        days_old: int = 30
+    ) -> int:
+        """
+        Clean up old or unused embeddings to save space.
+        Args:
+            collection_name: Collection to clean up
+            days_old: Delete documents older than this many days
+        Returns:
+            Number of documents deleted
+        """
+        try:
+            collection = self._initialize_collection(name=collection_name)
+            # Get all documents
+            result = collection.get()
+            if not result or not result.get("metadatas"):
+                return 0
+            # Find old documents
+            import datetime
+            cutoff_time = time.time() - (days_old * 24 * 60 * 60)
+            old_ids = []
+            for i, metadata in enumerate(result["metadatas"]):
+                created_at = metadata.get("created_at", time.time())
+                if created_at < cutoff_time:
+                    old_ids.append(result["ids"][i])
+            # Delete old documents
+            if old_ids:
+                collection.delete(ids=old_ids)
+                print(f"🗑️  Cleaned up {len(old_ids)} old documents from {collection_name}")
+            return len(old_ids)
+        except Exception as e:
+            print(f"⚠️  Cleanup failed: {e}")
+            return 0
+    def advanced_rag_search(
+        self,
+        query: str,
+        collection_name: str = "learning_resources",
+        filters: Optional[Dict[str, Any]] = None,
+        top_k: int = 5,
+        use_cache: bool = True
+    ) -> List[Document]:
+        """
+        Advanced RAG pipeline with all optimizations.
+        Pipeline:
+        1. Semantic cache check (Redis)
+        2. Query rewriting (LLM)
+        3. Hybrid retrieval (BM25 + Semantic)
+        4. Reciprocal rank fusion
+        5. Reranking (Cohere/Cross-encoder)
+        6. Contextual compression (LLM)
+        Args:
+            query: Search query
+            collection_name: Collection to search
+            filters: Optional metadata filters
+            top_k: Final number of results
+            use_cache: Whether to use semantic caching
+        Returns:
+            Optimized, relevant documents
+        """
+        print(f"\n🚀 Advanced RAG Pipeline Started")
+        print(f"Query: '{query}'")
+        # Step 1: Check semantic cache
+        cached_result = None
+        if ENABLE_SEMANTIC_CACHE and use_cache:
+            try:
+                from src.utils.semantic_cache import SemanticCache
+                cache_client = SemanticCache(
+                    redis_url=REDIS_URL,
+                    redis_host=REDIS_HOST,
+                    redis_port=REDIS_PORT,
+                    redis_password=REDIS_PASSWORD,
+                    redis_db=REDIS_DB,
+                    ttl=SEMANTIC_CACHE_TTL,
+                    similarity_threshold=SEMANTIC_CACHE_THRESHOLD
+                )
+                cached_result = cache_client.get(query)
+                if cached_result:
+                    print("💰 Cache hit! Returning cached results")
+                    return cached_result
+            except Exception as e:
+                print(f"⚠️  Semantic cache check failed: {e}")
+        # Step 2: Query rewriting
+        original_query = query
+        if QUERY_REWRITE_ENABLED:
+            try:
+                from src.ml.query_rewriter import QueryRewriter
+                rewriter = QueryRewriter(
+                    model=QUERY_REWRITE_MODEL,
+                    max_tokens=QUERY_REWRITE_MAX_TOKENS
+                )
+                query = rewriter.rewrite_if_needed(query)
+            except Exception as e:
+                print(f"⚠️  Query rewriting failed: {e}")
+        # Step 3: Hybrid retrieval
+        try:
+            from src.data.bm25_retriever import BM25Retriever, reciprocal_rank_fusion
+            # Get all documents for BM25 indexing
+            try:
+                collection = self.client.get_collection(
+                    name=collection_name,
+                    embedding_function=self.embedding_function
+                )
+                all_docs_result = collection.get()
+                if all_docs_result and all_docs_result.get("documents"):
+                    all_documents = [
+                        Document(
+                            page_content=doc,
+                            metadata=all_docs_result["metadatas"][i] if all_docs_result.get("metadatas") else {}
+                        )
+                        for i, doc in enumerate(all_docs_result["documents"])
+                    ]
+                else:
+                    all_documents = []
+            except Exception:
+                all_documents = []
+            # BM25 search
+            bm25_results = []
+            if all_documents:
+                bm25 = BM25Retriever(k1=BM25_K1, b=BM25_B)
+                bm25.index_documents(all_documents)
+                bm25_results = bm25.search(query, top_k=HYBRID_TOP_K)
+            # Semantic search
+            semantic_docs = self.search_documents(
+                query=query,
+                collection_name=collection_name,
+                filters=filters,
+                top_k=HYBRID_TOP_K
+            )
+            semantic_results = [
+                {
+                    'document': doc,
+                    'score': doc.metadata.get('relevance_score', 0.5),
+                    'rank': i + 1
+                }
+                for i, doc in enumerate(semantic_docs)
+            ]
+            # Fusion
+            if bm25_results and semantic_results:
+                fused_results = reciprocal_rank_fusion([bm25_results, semantic_results])
+                print(f"🔀 Fused {len(bm25_results)} BM25 + {len(semantic_results)} semantic results")
+            elif bm25_results:
+                fused_results = bm25_results
+            else:
+                fused_results = semantic_results
+            # Extract documents from fused results
+            candidate_docs = [r['document'] for r in fused_results[:HYBRID_TOP_K]]
+        except Exception as e:
+            print(f"⚠️  Hybrid retrieval failed: {e}. Falling back to semantic only.")
+            candidate_docs = self.search_documents(
+                query=query,
+                collection_name=collection_name,
+                filters=filters,
+                top_k=HYBRID_TOP_K
+            )
+        # Step 4: Reranking
+        if RERANK_ENABLED and candidate_docs:
+            try:
+                from src.ml.reranker import Reranker
+                reranker = Reranker(
+                    use_local=USE_LOCAL_RERANKER,
+                    cohere_api_key=COHERE_API_KEY,
+                    cohere_model=COHERE_RERANK_MODEL,
+                    local_model=LOCAL_RERANKER_MODEL
+                )
+                reranked_results = reranker.rerank(query, candidate_docs, top_k=RERANK_TOP_K)
+                candidate_docs = [r['document'] for r in reranked_results]
+            except Exception as e:
+                print(f"⚠️  Reranking failed: {e}")
+                candidate_docs = candidate_docs[:RERANK_TOP_K]
+        else:
+            candidate_docs = candidate_docs[:top_k]
+        # Step 5: Contextual compression
+        final_docs = candidate_docs
+        if CONTEXTUAL_COMPRESSION_ENABLED and candidate_docs:
+            try:
+                from src.ml.context_compressor import ContextCompressor
+                compressor = ContextCompressor(
+                    model=COMPRESSION_MODEL,
+                    max_tokens=COMPRESSION_MAX_TOKENS
+                )
+                final_docs = compressor.compress(query, candidate_docs)
+            except Exception as e:
+                print(f"⚠️  Compression failed: {e}")
+        # Cache the results
+        if ENABLE_SEMANTIC_CACHE and use_cache and final_docs:
+            try:
+                cache_client.set(original_query, final_docs)
+            except Exception as e:
+                print(f"⚠️  Cache set failed: {e}")
+        print(f"✅ Advanced RAG Complete: {len(final_docs)} optimized documents\n")
+        return final_docs
+    def _initialize_collection(self, name: str, metadata: Optional[Dict[str, Any]] = None):
+        """Safely get or create a Chroma collection, repairing schema if needed."""
+        try:
+            return self.client.get_or_create_collection(
+                name=name,
+                embedding_function=self.embedding_function,
+                metadata=metadata
+            )
+        except Exception as exc:
+            if self._try_repair_collection_schema(exc):
+                return self.client.get_or_create_collection(
+                    name=name,
+                    embedding_function=self.embedding_function,
+                    metadata=metadata
+                )
+            raise
+    def _try_repair_collection_schema(self, error: Exception) -> bool:
+        """Attempt to repair missing columns in any Chroma table."""
+        message = str(error)
+        missing_prefix = "no such column: "
+        if missing_prefix not in message:
+            return False
+        # Extract table name and column name from error message
+        # Format: "no such column: table_name.column_name"
+        try:
+            parts = message.split(missing_prefix, 1)[1].split()[0].strip('"`[]')
+            if '.' not in parts:
+                return False
+            table_name, column_name = parts.split('.', 1)
+        except (IndexError, ValueError):
+            return False
+        # Validate table and column names (only alphanumeric and underscore)
+        safe_table = ''.join(ch for ch in table_name if ch.isalnum() or ch == '_')
+        safe_column = ''.join(ch for ch in column_name if ch.isalnum() or ch == '_')
+        if safe_table != table_name or safe_column != column_name:
+            return False
+        db_file = Path(self.db_path) / "chroma.sqlite3"
+        if not db_file.exists():
+            return False
+        try:
+            with sqlite3.connect(str(db_file)) as conn:
+                conn.execute(f"ALTER TABLE {safe_table} ADD COLUMN {safe_column} TEXT")
+                conn.commit()
+                print(f"✅ Added missing '{safe_table}.{safe_column}' column to Chroma DB")
+            return True
+        except sqlite3.OperationalError as alter_err:
+            print(f"⚠️  Failed to add column {safe_table}.{safe_column}: {alter_err}")
+            return False
+    def get_cached_path(self, key: str) -> Optional[Dict[str, Any]]:
+        """Get a cached learning path from Redis."""
+        try:
+            import redis
+            # Use REDIS_URL if available and valid (for Upstash, Render, etc.)
+            if REDIS_URL and REDIS_URL.strip() and REDIS_URL.startswith(('redis://', 'rediss://', 'unix://')):
+                redis_client = redis.from_url(
+                    REDIS_URL,
+                    decode_responses=True,
+                    ssl_cert_reqs=None
+                )
+            else:
+                # Build Redis connection params
+                redis_params = {
+                    'host': REDIS_HOST,
+                    'port': REDIS_PORT,
+                    'db': REDIS_DB,
+                    'decode_responses': True
+                }
+                # Only add password if it's not empty (strip whitespace)
+                password = (REDIS_PASSWORD or '').strip()
+                if password:
+                    redis_params['password'] = password
+                redis_client = redis.Redis(**redis_params)
+            cached_data = redis_client.get(f"path_cache:{key}")
+            if cached_data:
+                return json.loads(cached_data)
+            return None
+        except Exception as e:
+            print(f"⚠️  Path cache GET failed: {e}")
+            return None
+    def cache_path(self, key: str, path: Dict[str, Any], ttl: int = 3600):
+        """Cache a learning path in Redis."""
+        try:
+            import redis
+            # Use REDIS_URL if available and valid (for Upstash, Render, etc.)
+            if REDIS_URL and REDIS_URL.strip() and REDIS_URL.startswith(('redis://', 'rediss://', 'unix://')):
+                redis_client = redis.from_url(
+                    REDIS_URL,
+                    decode_responses=True,
+                    ssl_cert_reqs=None
+                )
+            else:
+                # Build Redis connection params
+                redis_params = {
+                    'host': REDIS_HOST,
+                    'port': REDIS_PORT,
+                    'db': REDIS_DB,
+                    'decode_responses': True
+                }
+                # Only add password if it's not empty (strip whitespace)
+                password = (REDIS_PASSWORD or '').strip()
+                if password:
+                    redis_params['password'] = password
+                redis_client = redis.Redis(**redis_params)
+            redis_client.setex(f"path_cache:{key}", ttl, json.dumps(path))
+            print(f"💾 Cached learning path: {key[:8]}... (TTL: {ttl}s)")
+        except Exception as e:
+            print(f"⚠️  Path cache SET failed: {e}")
+    @classmethod
+    def shutdown(cls):
+        """Gracefully shutdown the shared client connection."""
+        if cls._shared_client is not None:
+            print("🔌 Shutting down ChromaDB connection...")
+            cls._shared_client = None
+            cls._shared_embedding_function = None
+            print("✅ Connection closed")

src/data/resources.py ADDED Viewed

	@@ -0,0 +1,202 @@

+"""
+Educational resource handling for the AI Learning Path Generator.
+Manages resource recommendation and categorization.
+"""
+from typing import List, Dict, Any, Optional
+import json
+from pathlib import Path
+from src.ml.model_orchestrator import ModelOrchestrator
+from src.utils.helpers import difficulty_to_score
+from src.utils.config import RESOURCE_TYPES, LEARNING_STYLES
+class ResourceManager:
+    """
+    Manages educational resources and recommendations.
+    """
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize the resource manager.
+        Args:
+            api_key: Optional OpenAI API key
+        """
+        self.model_orchestrator = ModelOrchestrator(api_key)
+        self.cached_resources = {}
+    def recommend_resources(
+        self,
+        topic: str,
+        learning_style: str,
+        expertise_level: str,
+        count: int = 5,
+        resource_type: Optional[str] = None
+    ) -> List[Dict[str, Any]]:
+        """
+        Recommend educational resources for a topic.
+        Args:
+            topic: The topic to find resources for
+            learning_style: Preferred learning style
+            expertise_level: User's expertise level
+            count: Number of resources to recommend
+            resource_type: Optional specific resource type
+        Returns:
+            List of resource recommendations
+        """
+        # Check cache first
+        cache_key = f"{topic}_{learning_style}_{expertise_level}_{resource_type}"
+        if cache_key in self.cached_resources:
+            resources = self.cached_resources[cache_key]
+            return resources[:count]
+        # Generate resources using the model
+        resources = self.model_orchestrator.generate_resource_recommendations(
+            topic=topic,
+            learning_style=learning_style,
+            expertise_level=expertise_level,
+            count=count
+        )
+        # Filter by resource type if specified
+        if resource_type and resources:
+            resources = [r for r in resources if r.get("type") == resource_type]
+        # Cache the results
+        self.cached_resources[cache_key] = resources
+        return resources
+    def categorize_by_learning_style(
+        self,
+        resources: List[Dict[str, Any]]
+    ) -> Dict[str, List[Dict[str, Any]]]:
+        """
+        Categorize resources by most suitable learning style.
+        Args:
+            resources: List of resource dictionaries
+        Returns:
+            Dictionary of resources grouped by learning style
+        """
+        result = {style: [] for style in LEARNING_STYLES}
+        for resource in resources:
+            resource_type = resource.get("type", "article")
+            # Find the learning style with highest score for this resource type
+            best_style = "reading"  # Default
+            best_score = 0
+            if resource_type in RESOURCE_TYPES:
+                for style, score in RESOURCE_TYPES[resource_type].items():
+                    if score > best_score:
+                        best_score = score
+                        best_style = style
+            # Add resource to the appropriate category
+            result[best_style].append(resource)
+        return result
+    def load_curated_resources(
+        self,
+        file_path: str = "data/curated_resources.json"
+    ) -> List[Dict[str, Any]]:
+        """
+        Load curated resources from a JSON file.
+        Args:
+            file_path: Path to the JSON file
+        Returns:
+            List of resource dictionaries
+        """
+        try:
+            with open(file_path, "r") as f:
+                resources = json.load(f)
+                return resources
+        except (FileNotFoundError, json.JSONDecodeError):
+            return []
+    def save_curated_resources(
+        self,
+        resources: List[Dict[str, Any]],
+        file_path: str = "data/curated_resources.json"
+    ) -> bool:
+        """
+        Save curated resources to a JSON file.
+        Args:
+            resources: List of resource dictionaries
+            file_path: Path to save to
+        Returns:
+            Success status
+        """
+        try:
+            # Ensure directory exists
+            Path(file_path).parent.mkdir(exist_ok=True, parents=True)
+            with open(file_path, "w") as f:
+                json.dump(resources, f, indent=2)
+            return True
+        except Exception:
+            return False
+    def analyze_difficulty(self, resource: Dict[str, Any]) -> float:
+        """
+        Analyze the difficulty level of a resource.
+        Args:
+            resource: Resource dictionary with description
+        Returns:
+            Difficulty score between 0 and 1
+        """
+        # Try to extract difficulty from the resource directly
+        if "difficulty" in resource:
+            return difficulty_to_score(resource["difficulty"])
+        # Analyze the description
+        description = resource.get("description", "")
+        if description:
+            return self.model_orchestrator.analyze_difficulty(description)
+        # Default to medium difficulty
+        return 0.5
+    def filter_by_difficulty(
+        self,
+        resources: List[Dict[str, Any]],
+        max_difficulty: float = 1.0,
+        min_difficulty: float = 0.0
+    ) -> List[Dict[str, Any]]:
+        """
+        Filter resources by difficulty level.
+        Args:
+            resources: List of resource dictionaries
+            max_difficulty: Maximum difficulty score (0-1)
+            min_difficulty: Minimum difficulty score (0-1)
+        Returns:
+            Filtered list of resources
+        """
+        result = []
+        for resource in resources:
+            # Get or calculate difficulty score
+            if "difficulty_score" in resource:
+                score = float(resource["difficulty_score"])
+            else:
+                difficulty = resource.get("difficulty", "intermediate")
+                score = difficulty_to_score(difficulty)
+            # Add to result if within range
+            if min_difficulty <= score <= max_difficulty:
+                result.append(resource)
+        return result

src/data/skills_database.py ADDED Viewed

	@@ -0,0 +1,999 @@

+"""
+Comprehensive Skills Database
+Contains salary ranges, curated resources, and market information for all supported skills.
+Last Updated: 2025-01-05
+"""
+SKILLS_DATABASE = {
+    # ===== CLOUD & DEVOPS =====
+    "AWS": {
+        "category": "Cloud & DevOps",
+        "salary_range": "$110,000 - $160,000",
+        "salary_min": 110000,
+        "salary_max": 160000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+30%",
+            "open_positions": "60,000+",
+            "top_employers": ["Amazon", "Netflix", "Airbnb", "Capital One", "GE", "NASA"],
+            "related_roles": ["Cloud Engineer", "Solutions Architect", "DevOps Engineer", "Cloud Consultant"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["freeCodeCamp.org", "AWS Online Tech Talks", "Simplilearn"],
+                "websites": ["Aws.amazon.com/training", "Aws.amazon.com/getting-started", "Coursera"]
+            },
+            "intermediate": {
+                "youtube": ["Adrian Cantrill", "Stephane Maarek", "A Cloud Guru"],
+                "websites": ["Re:Invent.aws", "Udemy", "Linux Academy"]
+            },
+            "advanced": {
+                "youtube": ["AWS re:Invent", "AWS Summits", "AWS This Week"],
+                "websites": ["AWS Well-Architected", "AWS Whitepapers", "AWS Architecture Center"]
+            }
+        }
+    },
+    # ===== DATA SCIENCE & AI =====
+    "Machine Learning": {
+        "category": "Data Science & AI",
+        "salary_range": "$100,000 - $180,000",
+        "salary_min": 100000,
+        "salary_max": 180000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+35%",
+            "open_positions": "50,000+",
+            "top_employers": ["Google", "Meta", "Amazon", "Microsoft", "OpenAI", "Tesla"],
+            "related_roles": ["ML Engineer", "Data Scientist", "AI Research Scientist", "MLOps Engineer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["3Blue1Brown", "Sentdex", "freeCodeCamp.org"],
+                "websites": ["Coursera", "Kaggle", "Datacamp"]
+            },
+            "intermediate": {
+                "youtube": ["DeepLearningAI", "Andrej Karpathy", "StatQuest with Josh Starmer"],
+                "websites": ["Fast.ai", "MachineLearningMastery", "TowardsDataScience"]
+            },
+            "advanced": {
+                "youtube": ["Two Minute Papers", "Andrej Karpathy", "DeepLearningAI"],
+                "websites": ["ArXiv.org", "Papers with Code", "Distill.pub"]
+            }
+        }
+    },
+    "Deep Learning": {
+        "category": "Data Science & AI",
+        "salary_range": "$130,000 - $200,000",
+        "salary_min": 130000,
+        "salary_max": 200000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+40%",
+            "open_positions": "35,000+",
+            "top_employers": ["OpenAI", "Google DeepMind", "Meta AI", "NVIDIA", "Tesla", "Amazon"],
+            "related_roles": ["Deep Learning Engineer", "AI Researcher", "Computer Vision Engineer", "NLP Engineer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["DeepLearningAI", "Sentdex", "3Blue1Brown"],
+                "websites": ["Coursera", "Fast.ai", "TensorFlow.org"]
+            },
+            "intermediate": {
+                "youtube": ["Andrej Karpathy", "Siraj Raval", "DeepLearningAI"],
+                "websites": ["PyTorch.org", "TowardsDataScience", "Papers with Code"]
+            },
+            "advanced": {
+                "youtube": ["Two Minute Papers", "Yannic Kilcher", "AI Coffee Break"],
+                "websites": ["ArXiv.org", "Distill.pub", "OpenAI Research"]
+            }
+        }
+    },
+    "Data Analysis": {
+        "category": "Data Science & AI",
+        "salary_range": "$90,000 - $130,000",
+        "salary_min": 90000,
+        "salary_max": 130000,
+        "market_info": {
+            "demand": "High",
+            "growth_rate": "+25%",
+            "open_positions": "80,000+",
+            "top_employers": ["Google", "Amazon", "Microsoft", "Meta", "Netflix", "Uber"],
+            "related_roles": ["Data Analyst", "Business Analyst", "Data Scientist", "Analytics Engineer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["freeCodeCamp.org", "Data School", "Corey Schafer"],
+                "websites": ["Datacamp", "Kaggle", "Mode Analytics"]
+            },
+            "intermediate": {
+                "youtube": ["StatQuest with Josh Starmer", "Brandon Foltz", "Ken Jee"],
+                "websites": ["TowardsDataScience", "AnalyticsVidhya", "Tableau Public"]
+            },
+            "advanced": {
+                "youtube": ["StatQuest with Josh Starmer", "Data Science Dojo"],
+                "websites": ["KDnuggets", "Analytics Vidhya", "Kaggle Competitions"]
+            }
+        }
+    },
+    "Natural Language Processing": {
+        "category": "Data Science & AI",
+        "salary_range": "$100,000 - $150,000",
+        "salary_min": 100000,
+        "salary_max": 150000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+38%",
+            "open_positions": "25,000+",
+            "top_employers": ["OpenAI", "Google", "Meta", "Amazon", "Microsoft", "Anthropic"],
+            "related_roles": ["NLP Engineer", "Computational Linguist", "ML Engineer", "AI Researcher"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["Sentdex", "freeCodeCamp.org", "Krish Naik"],
+                "websites": ["Coursera", "NLTK.org", "Spacy.io"]
+            },
+            "intermediate": {
+                "youtube": ["DeepLearningAI", "Stanford NLP", "Jay Alammar"],
+                "websites": ["HuggingFace.co", "TowardsDataScience", "Papers with Code"]
+            },
+            "advanced": {
+                "youtube": ["Yannic Kilcher", "AI Coffee Break", "Stanford CS224N"],
+                "websites": ["ArXiv.org", "ACL Anthology", "OpenAI Research"]
+            }
+        }
+    },
+    "Computer Vision": {
+        "category": "Data Science & AI",
+        "salary_range": "$80,000 - $170,000",
+        "salary_min": 80000,
+        "salary_max": 170000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+36%",
+            "open_positions": "30,000+",
+            "top_employers": ["Tesla", "NVIDIA", "Meta", "Google", "Amazon", "Apple"],
+            "related_roles": ["Computer Vision Engineer", "ML Engineer", "Robotics Engineer", "AI Researcher"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["freeCodeCamp.org", "Sentdex", "OpenCV"],
+                "websites": ["OpenCV.org", "PyImageSearch", "Coursera"]
+            },
+            "intermediate": {
+                "youtube": ["DeepLearningAI", "Two Minute Papers", "First Principles of Computer Vision"],
+                "websites": ["PyTorch.org/vision", "TowardsDataScience", "Papers with Code"]
+            },
+            "advanced": {
+                "youtube": ["Yannic Kilcher", "AI Coffee Break", "CVPR Talks"],
+                "websites": ["ArXiv.org", "CVPR Conference", "ECCV Conference"]
+            }
+        }
+    },
+    "Data Engineering": {
+        "category": "Data Science & AI",
+        "salary_range": "$120,000 - $180,000",
+        "salary_min": 120000,
+        "salary_max": 180000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+30%",
+            "open_positions": "45,000+",
+            "top_employers": ["Amazon", "Google", "Microsoft", "Meta", "Uber", "Airbnb"],
+            "related_roles": ["Data Engineer", "Analytics Engineer", "ETL Developer", "Big Data Engineer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["freeCodeCamp.org", "Corey Schafer", "Tech With Tim"],
+                "websites": ["Datacamp", "Coursera", "Mode Analytics"]
+            },
+            "intermediate": {
+                "youtube": ["Databricks", "Apache Spark", "Seattle Data Guy"],
+                "websites": ["Databricks.com", "Apache.org/Spark", "TowardsDataScience"]
+            },
+            "advanced": {
+                "youtube": ["Data Engineering Podcast", "Advancing Analytics"],
+                "websites": ["Databricks University", "Confluent.io", "DataEngineeringPodcast"]
+            }
+        }
+    },
+    "Big Data": {
+        "category": "Data Science & AI",
+        "salary_range": "$110,000 - $160,000",
+        "salary_min": 110000,
+        "salary_max": 160000,
+        "market_info": {
+            "demand": "High",
+            "growth_rate": "+28%",
+            "open_positions": "40,000+",
+            "top_employers": ["Amazon", "Google", "Microsoft", "IBM", "Oracle", "Cloudera"],
+            "related_roles": ["Big Data Engineer", "Data Architect", "Hadoop Developer", "Data Platform Engineer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["Simplilearn", "Edureka", "freeCodeCamp.org"],
+                "websites": ["Hadoop.apache.org", "Cloudera.com", "Coursera"]
+            },
+            "intermediate": {
+                "youtube": ["Hadoop Illuminated", "Databricks", "Apache Spark"],
+                "websites": ["Apache.org/Spark", "Databricks.com", "KDnuggets"]
+            },
+            "advanced": {
+                "youtube": ["Data Engineering Podcast", "Confluent"],
+                "websites": ["Confluent.io", "Apache Kafka", "BigDataUniversity"]
+            }
+        }
+    },
+    "AI Ethics": {
+        "category": "Data Science & AI",
+        "salary_range": "$120,000 - $170,000",
+        "salary_min": 120000,
+        "salary_max": 170000,
+        "market_info": {
+            "demand": "Growing",
+            "growth_rate": "+45%",
+            "open_positions": "5,000+",
+            "top_employers": ["OpenAI", "Google", "Meta", "Microsoft", "Anthropic", "Partnership on AI"],
+            "related_roles": ["AI Ethics Researcher", "Responsible AI Lead", "AI Policy Analyst", "ML Fairness Engineer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["TED-Ed", "Computerphile", "CrashCourse"],
+                "websites": ["AIethicsguidelines.global", "Coursera", "Ethics.ai"]
+            },
+            "intermediate": {
+                "youtube": ["DeepLearningAI", "Stanford HAI", "Montreal AI Ethics Institute"],
+                "websites": ["Futureoflife.org", "AIindex.stanford.edu", "Partnership on AI"]
+            },
+            "advanced": {
+                "youtube": ["Timnit Gebru", "Kate Crawford", "Joy Buolamwini"],
+                "websites": ["FAccT Conference", "AIES Conference", "ArXiv.org"]
+            }
+        }
+    },
+    # ===== WEB DEVELOPMENT =====
+    "Frontend (React, Vue, Angular)": {
+        "category": "Web Development",
+        "salary_range": "$100,000 - $140,000",
+        "salary_min": 100000,
+        "salary_max": 140000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+22%",
+            "open_positions": "100,000+",
+            "top_employers": ["Meta", "Google", "Amazon", "Netflix", "Airbnb", "Uber"],
+            "related_roles": ["Frontend Developer", "UI Engineer", "React Developer", "Web Developer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["Traversy Media", "freeCodeCamp.org", "The Net Ninja"],
+                "websites": ["Reactjs.org", "MDN Web Docs", "FreeCodeCamp"]
+            },
+            "intermediate": {
+                "youtube": ["Academind", "Fireship", "Web Dev Simplified"],
+                "websites": ["Vuejs.org", "Angular.io", "FrontendMasters"]
+            },
+            "advanced": {
+                "youtube": ["Jack Herrington", "Theo - t3.gg", "UI.dev"],
+                "websites": ["React Advanced", "Patterns.dev", "Web.dev"]
+            }
+        }
+    },
+    "Backend (Node.js, Django, Flask)": {
+        "category": "Web Development",
+        "salary_range": "$110,000 - $150,000",
+        "salary_min": 110000,
+        "salary_max": 150000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+25%",
+            "open_positions": "90,000+",
+            "top_employers": ["Amazon", "Google", "Microsoft", "Meta", "Netflix", "Uber"],
+            "related_roles": ["Backend Developer", "API Developer", "Software Engineer", "Full Stack Developer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["Traversy Media", "freeCodeCamp.org", "Corey Schafer"],
+                "websites": ["Nodejs.org", "Djangoproject.com", "Flask.palletsprojects.com"]
+            },
+            "intermediate": {
+                "youtube": ["Programming with Mosh", "The Net Ninja", "Tech With Tim"],
+                "websites": ["Expressjs.com", "FastAPI.tiangolo.com", "RealPython"]
+            },
+            "advanced": {
+                "youtube": ["Hussein Nasser", "CodeOpinion", "ArjanCodes"],
+                "websites": ["System Design Primer", "Microservices.io", "Martin Fowler"]
+            }
+        }
+    },
+    "Full Stack": {
+        "category": "Web Development",
+        "salary_range": "$110,000 - $170,000",
+        "salary_min": 110000,
+        "salary_max": 170000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+27%",
+            "open_positions": "120,000+",
+            "top_employers": ["Amazon", "Google", "Meta", "Microsoft", "Shopify", "Stripe"],
+            "related_roles": ["Full Stack Developer", "Software Engineer", "Web Developer", "Application Developer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["freeCodeCamp.org", "Traversy Media", "The Net Ninja"],
+                "websites": ["Fullstackopen.com", "FreeCodeCamp", "Codecademy"]
+            },
+            "intermediate": {
+                "youtube": ["Academind", "Web Dev Simplified", "Fireship"],
+                "websites": ["Udemy", "Coursera", "Dev.to"]
+            },
+            "advanced": {
+                "youtube": ["Theo - t3.gg", "Jack Herrington", "Hussein Nasser"],
+                "websites": ["System Design", "Microservices Patterns", "Web.dev"]
+            }
+        }
+    },
+    "JavaScript": {
+        "category": "Web Development",
+        "salary_range": "$100,000 - $140,000",
+        "salary_min": 100000,
+        "salary_max": 140000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+20%",
+            "open_positions": "150,000+",
+            "top_employers": ["Google", "Meta", "Amazon", "Microsoft", "Netflix", "Airbnb"],
+            "related_roles": ["JavaScript Developer", "Frontend Developer", "Full Stack Developer", "Web Developer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["Traversy Media", "freeCodeCamp.org", "Programming with Mosh"],
+                "websites": ["Javascript.info", "MDN Web Docs", "FreeCodeCamp"]
+            },
+            "intermediate": {
+                "youtube": ["The Net Ninja", "Web Dev Simplified", "Fireship"],
+                "websites": ["Eloquentjavascript.net", "JavaScript30", "Frontend Masters"]
+            },
+            "advanced": {
+                "youtube": ["Fun Fun Function", "MPJ", "Theo - t3.gg"],
+                "websites": ["You Don't Know JS", "JavaScript Weekly", "TC39 Proposals"]
+            }
+        }
+    },
+    "TypeScript": {
+        "category": "Web Development",
+        "salary_range": "$110,000 - $150,000",
+        "salary_min": 110000,
+        "salary_max": 150000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+35%",
+            "open_positions": "80,000+",
+            "top_employers": ["Microsoft", "Google", "Meta", "Amazon", "Airbnb", "Stripe"],
+            "related_roles": ["TypeScript Developer", "Frontend Engineer", "Full Stack Developer", "Software Engineer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["Traversy Media", "freeCodeCamp.org", "The Net Ninja"],
+                "websites": ["Typescriptlang.org", "TypeScript Handbook", "Execute Program"]
+            },
+            "intermediate": {
+                "youtube": ["Academind", "Matt Pocock", "Jack Herrington"],
+                "websites": ["Basarat.gitbook.io/typescript", "Total TypeScript", "Frontend Masters"]
+            },
+            "advanced": {
+                "youtube": ["Matt Pocock", "Theo - t3.gg", "Jack Herrington"],
+                "websites": ["Type Challenges", "Advanced TypeScript", "TypeScript Deep Dive"]
+            }
+        }
+    },
+    "Web Performance": {
+        "category": "Web Development",
+        "salary_range": "$110,000 - $150,000",
+        "salary_min": 110000,
+        "salary_max": 150000,
+        "market_info": {
+            "demand": "High",
+            "growth_rate": "+30%",
+            "open_positions": "15,000+",
+            "top_employers": ["Google", "Meta", "Amazon", "Netflix", "Cloudflare", "Vercel"],
+            "related_roles": ["Performance Engineer", "Frontend Engineer", "Web Developer", "Site Reliability Engineer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["Google Chrome Developers", "Web Dev Simplified", "Fireship"],
+                "websites": ["Web.dev", "MDN Performance", "PageSpeed Insights"]
+            },
+            "intermediate": {
+                "youtube": ["Harry Roberts", "Addy Osmani", "Paul Irish"],
+                "websites": ["Developers.google.com/web", "Smashingmagazine.com", "Perf.rocks"]
+            },
+            "advanced": {
+                "youtube": ["Chrome Dev Summit", "Performance.now()"],
+                "websites": ["WebPageTest", "Lighthouse CI", "Web Vitals"]
+            }
+        }
+    },
+    "Web Security": {
+        "category": "Web Development",
+        "salary_range": "$110,000 - $150,000",
+        "salary_min": 110000,
+        "salary_max": 150000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+32%",
+            "open_positions": "25,000+",
+            "top_employers": ["Google", "Meta", "Amazon", "Microsoft", "Cloudflare", "Auth0"],
+            "related_roles": ["Security Engineer", "Application Security Engineer", "Web Developer", "DevSecOps Engineer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["freeCodeCamp.org", "Traversy Media", "OWASP"],
+                "websites": ["Owasp.org", "Web.dev/security", "MDN Security"]
+            },
+            "intermediate": {
+                "youtube": ["LiveOverflow", "The Cyber Mentor", "PwnFunction"],
+                "websites": ["Portswigger.net", "HackerOne", "BugBountyHunter"]
+            },
+            "advanced": {
+                "youtube": ["LiveOverflow", "IppSec", "John Hammond"],
+                "websites": ["OWASP Top 10", "Web Security Academy", "HackerOne Reports"]
+            }
+        }
+    },
+    "Progressive Web Apps": {
+        "category": "Web Development",
+        "salary_range": "$110,000 - $150,000",
+        "salary_min": 110000,
+        "salary_max": 150000,
+        "market_info": {
+            "demand": "Growing",
+            "growth_rate": "+28%",
+            "open_positions": "20,000+",
+            "top_employers": ["Google", "Microsoft", "Twitter", "Starbucks", "Uber", "Pinterest"],
+            "related_roles": ["PWA Developer", "Frontend Developer", "Mobile Web Developer", "Web Developer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["Google Chrome Developers", "Traversy Media", "freeCodeCamp.org"],
+                "websites": ["Web.dev/progressive-web-apps", "PWA Builder", "MDN PWA"]
+            },
+            "intermediate": {
+                "youtube": ["Academind", "Maximilian Schwarzmüller", "Fireship"],
+                "websites": ["Developers.google.com/web/pwa", "Workboxjs.org", "PWA Stats"]
+            },
+            "advanced": {
+                "youtube": ["Chrome Dev Summit", "Google I/O"],
+                "websites": ["Service Worker Cookbook", "PWA Directory", "Web Capabilities"]
+            }
+        }
+    },
+    # ===== MOBILE DEVELOPMENT =====
+    "iOS Development": {
+        "category": "Mobile Development",
+        "salary_range": "$120,000 - $160,000",
+        "salary_min": 120000,
+        "salary_max": 160000,
+        "market_info": {
+            "demand": "High",
+            "growth_rate": "+18%",
+            "open_positions": "40,000+",
+            "top_employers": ["Apple", "Meta", "Amazon", "Uber", "Airbnb", "Netflix"],
+            "related_roles": ["iOS Engineer", "Swift Developer", "Mobile Developer", "App Developer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["CodeWithChris", "Sean Allen", "iOS Academy"],
+                "websites": ["Developer.apple.com", "Hackingwithswift.com", "Raywenderlich.com"]
+            },
+            "intermediate": {
+                "youtube": ["Lets Build That App", "Kavsoft", "SwiftUI Lab"],
+                "websites": ["Swift.org", "Apple Developer Tutorials", "Udemy"]
+            },
+            "advanced": {
+                "youtube": ["WWDC Videos", "Point-Free", "Swift by Sundell"],
+                "websites": ["Swift Forums", "NSHipster", "objc.io"]
+            }
+        }
+    },
+    "Android Development": {
+        "category": "Mobile Development",
+        "salary_range": "$100,000 - $140,000",
+        "salary_min": 100000,
+        "salary_max": 140000,
+        "market_info": {
+            "demand": "High",
+            "growth_rate": "+20%",
+            "open_positions": "50,000+",
+            "top_employers": ["Google", "Meta", "Amazon", "Uber", "Netflix", "Spotify"],
+            "related_roles": ["Android Engineer", "Kotlin Developer", "Mobile Developer", "App Developer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["Philipp Lackner", "freeCodeCamp.org", "Android Developers"],
+                "websites": ["Developer.android.com", "Kotlinlang.org", "Udacity"]
+            },
+            "intermediate": {
+                "youtube": ["Coding in Flow", "Reso Coder", "Stevdza-San"],
+                "websites": ["Raywenderlich.com", "Vogella", "Android Weekly"]
+            },
+            "advanced": {
+                "youtube": ["Android Developers", "Philipp Lackner Advanced", "Coding with Mitch"],
+                "websites": ["Android Dev Summit", "ProAndroidDev", "Android Arsenal"]
+            }
+        }
+    },
+    "React Native": {
+        "category": "Mobile Development",
+        "salary_range": "$100,000 - $150,000",
+        "salary_min": 100000,
+        "salary_max": 150000,
+        "market_info": {
+            "demand": "High",
+            "growth_rate": "+25%",
+            "open_positions": "35,000+",
+            "top_employers": ["Meta", "Microsoft", "Tesla", "Shopify", "Discord", "Coinbase"],
+            "related_roles": ["React Native Developer", "Mobile Developer", "Cross-Platform Developer", "JavaScript Developer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["freeCodeCamp.org", "The Net Ninja", "Programming with Mosh"],
+                "websites": ["Reactnative.dev", "Expo.dev", "React Native School"]
+            },
+            "intermediate": {
+                "youtube": ["Academind", "Maximilian Schwarzmüller", "Not Just Dev"],
+                "websites": ["Udemy", "Coursera", "React Native Directory"]
+            },
+            "advanced": {
+                "youtube": ["William Candillon", "Catalin Miron", "Infinite Red"],
+                "websites": ["React Native EU", "Chain React", "React Native Radio"]
+            }
+        }
+    },
+    "Flutter": {
+        "category": "Mobile Development",
+        "salary_range": "$100,000 - $140,000",
+        "salary_min": 100000,
+        "salary_max": 140000,
+        "market_info": {
+            "demand": "Growing",
+            "growth_rate": "+30%",
+            "open_positions": "30,000+",
+            "top_employers": ["Google", "Alibaba", "BMW", "eBay", "Groupon", "Philips"],
+            "related_roles": ["Flutter Developer", "Mobile Developer", "Dart Developer", "Cross-Platform Developer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["The Net Ninja", "freeCodeCamp.org", "Flutter"],
+                "websites": ["Flutter.dev", "Dart.dev", "Flutter Codelabs"]
+            },
+            "intermediate": {
+                "youtube": ["Reso Coder", "Academind", "Robert Brunhage"],
+                "websites": ["Udemy", "Coursera", "Flutter Awesome"]
+            },
+            "advanced": {
+                "youtube": ["Flutter Europe", "Filledstacks", "Reso Coder Advanced"],
+                "websites": ["Flutter Engage", "DartPad", "Pub.dev"]
+            }
+        }
+    },
+    "Mobile UI/UX": {
+        "category": "Mobile Development",
+        "salary_range": "$90,000 - $130,000",
+        "salary_min": 90000,
+        "salary_max": 130000,
+        "market_info": {
+            "demand": "High",
+            "growth_rate": "+22%",
+            "open_positions": "25,000+",
+            "top_employers": ["Apple", "Google", "Meta", "Airbnb", "Uber", "Netflix"],
+            "related_roles": ["Mobile UI Designer", "UX Designer", "Product Designer", "Interaction Designer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["DesignCourse", "Flux Academy", "Jesse Showalter"],
+                "websites": ["Material.io", "Humaninterface.apple.com", "Uxdesign.cc"]
+            },
+            "intermediate": {
+                "youtube": ["ChunBuns", "Mizko", "Malewicz"],
+                "websites": ["Interaction-design.org", "Adobe.com/xd", "Figma.com"]
+            },
+            "advanced": {
+                "youtube": ["Config by Figma", "Apple Design Resources"],
+                "websites": ["WWDC Design Sessions", "Material Design Awards", "Mobbin"]
+            }
+        }
+    },
+    "Cross-Platform": {
+        "category": "Mobile Development",
+        "salary_range": "$100,000 - $140,000",
+        "salary_min": 100000,
+        "salary_max": 140000,
+        "market_info": {
+            "demand": "High",
+            "growth_rate": "+28%",
+            "open_positions": "40,000+",
+            "top_employers": ["Microsoft", "Google", "Meta", "Shopify", "Adobe", "SAP"],
+            "related_roles": ["Cross-Platform Developer", "Mobile Developer", "Hybrid App Developer", "Multi-Platform Engineer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["freeCodeCamp.org", "Academind", "The Net Ninja"],
+                "websites": ["Reactnative.dev", "Flutter.dev", "Ionicframework.com"]
+            },
+            "intermediate": {
+                "youtube": ["Simon Grimm", "Fireship", "Traversy Media"],
+                "websites": ["Xamarin.com", "Capacitorjs.com", "Udemy"]
+            },
+            "advanced": {
+                "youtube": ["React Native EU", "Flutter Engage", "Ionic Conf"],
+                "websites": ["Native Script", "Kotlin Multiplatform", "Tauri"]
+            }
+        }
+    },
+    "Mobile Games": {
+        "category": "Mobile Development",
+        "salary_range": "$90,000 - $140,000",
+        "salary_min": 90000,
+        "salary_max": 140000,
+        "market_info": {
+            "demand": "Moderate",
+            "growth_rate": "+15%",
+            "open_positions": "20,000+",
+            "top_employers": ["King", "Supercell", "Rovio", "Zynga", "Electronic Arts", "Activision"],
+            "related_roles": ["Mobile Game Developer", "Unity Developer", "Game Programmer", "Gameplay Engineer"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["Brackeys", "Blackthornprod", "Unity"],
+                "websites": ["Unity.com/learn", "Gamedev.tv", "Itch.io"]
+            },
+            "intermediate": {
+                "youtube": ["Code Monkey", "Jonas Tyroller", "Dani"],
+                "websites": ["Udemy", "Coursera", "Gamedev.net"]
+            },
+            "advanced": {
+                "youtube": ["GDC", "Unite Conference", "Game Maker's Toolkit"],
+                "websites": ["Gamasutra", "Unity Asset Store", "Unreal Marketplace"]
+            }
+        }
+    },
+    "Mobile Security": {
+        "category": "Mobile Development",
+        "salary_range": "$100,000 - $150,000",
+        "salary_min": 100000,
+        "salary_max": 150000,
+        "market_info": {
+            "demand": "High",
+            "growth_rate": "+35%",
+            "open_positions": "15,000+",
+            "top_employers": ["Apple", "Google", "Meta", "Amazon", "Microsoft", "Zimperium"],
+            "related_roles": ["Mobile Security Engineer", "Application Security Engineer", "Security Researcher", "Penetration Tester"]
+        },
+        "resources": {
+            "beginner": {
+                "youtube": ["The Cyber Mentor", "NetworkChuck", "freeCodeCamp.org"],
+                "websites": ["Owasp.org/mobile", "Developer.android.com/security", "Developer.apple.com/security"]
+            },
+            "intermediate": {
+                "youtube": ["LiveOverflow", "John Hammond", "David Bombal"],
+                "websites": ["HackerOne", "Bugcrowd", "Mobile Security Testing Guide"]
+            },
+            "advanced": {
+                "youtube": ["Black Hat", "DEF CON", "OWASP Mobile"],
+                "websites": ["OWASP MSTG", "Mobile Security Framework", "Frida"]
+            }
+        }
+    },
+    # ===== EMERGING AI ROLES 2025 =====
+    "Prompt Engineering": {
+        "category": "Emerging AI Roles",
+        "salary_range": "$140,000 - $220,000",
+        "salary_min": 140000,
+        "salary_max": 220000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+60%",
+            "open_positions": "12,000+",
+            "top_employers": ["OpenAI", "Anthropic", "Google", "Microsoft", "Meta", "Startups"],
+            "related_roles": ["Prompt Engineer", "AI Product Manager", "LLM Specialist", "Conversational AI Designer"]
+        },
+        "description": "A specialist who crafts precise inputs (prompts) for generative AI models to optimize outputs, bridging human intent and AI capabilities.",
+        "key_responsibilities": [
+            "Designing and testing prompts for optimal AI outputs",
+            "Iterating on AI responses for accuracy and relevance",
+            "Collaborating with developers to refine models",
+            "Training teams on effective prompting techniques",
+            "A/B testing different prompt strategies"
+        ],
+        "resources": {
+            "beginner": {
+                "youtube": ["OpenAI", "AI Explained", "Matt Wolfe"],
+                "websites": ["Learn Prompting", "PromptingGuide.ai", "OpenAI Cookbook"]
+            },
+            "intermediate": {
+                "youtube": ["DeepLearningAI", "Prompt Engineering Guide", "AI Jason"],
+                "websites": ["Anthropic Docs", "LangChain Docs", "PromptBase"]
+            },
+            "advanced": {
+                "youtube": ["Andrej Karpathy", "Yannic Kilcher", "AI Coffee Break"],
+                "websites": ["ArXiv.org", "Papers with Code", "HuggingFace Research"]
+            }
+        }
+    },
+    "AI Ethics & Governance": {
+        "category": "Emerging AI Roles",
+        "salary_range": "$150,000 - $230,000",
+        "salary_min": 150000,
+        "salary_max": 230000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+55%",
+            "open_positions": "8,000+",
+            "top_employers": ["OpenAI", "Google", "Meta", "Microsoft", "Anthropic", "Partnership on AI"],
+            "related_roles": ["AI Ethics Officer", "Responsible AI Lead", "AI Policy Analyst", "AI Governance Specialist"]
+        },
+        "description": "An expert focused on ensuring AI systems are fair, transparent, and unbiased, addressing regulatory and societal concerns in AI deployment.",
+        "key_responsibilities": [
+            "Auditing AI systems for bias and fairness",
+            "Developing ethical guidelines and frameworks",
+            "Conducting AI impact assessments",
+            "Advising on compliance with AI regulations (EU AI Act, etc.)",
+            "Stakeholder communication on AI ethics"
+        ],
+        "resources": {
+            "beginner": {
+                "youtube": ["TED-Ed", "Computerphile", "CrashCourse AI Ethics"],
+                "websites": ["AI Ethics Guidelines", "Ethics.ai", "Partnership on AI"]
+            },
+            "intermediate": {
+                "youtube": ["Stanford HAI", "Montreal AI Ethics Institute", "DeepLearningAI"],
+                "websites": ["Futureoflife.org", "AI Index Stanford", "FAccT Conference"]
+            },
+            "advanced": {
+                "youtube": ["Timnit Gebru", "Kate Crawford", "Joy Buolamwini"],
+                "websites": ["FAccT Conference", "AIES Conference", "ArXiv AI Ethics"]
+            }
+        }
+    },
+    "AI Auditing": {
+        "category": "Emerging AI Roles",
+        "salary_range": "$130,000 - $200,000",
+        "salary_min": 130000,
+        "salary_max": 200000,
+        "market_info": {
+            "demand": "High",
+            "growth_rate": "+50%",
+            "open_positions": "6,000+",
+            "top_employers": ["Deloitte", "PwC", "KPMG", "EY", "Tech Companies", "Financial Institutions"],
+            "related_roles": ["AI Auditor", "ML Compliance Specialist", "AI Risk Analyst", "Algorithm Auditor"]
+        },
+        "description": "A role involving the inspection of AI systems for accuracy, security, and explainability, similar to financial auditing but for algorithms.",
+        "key_responsibilities": [
+            "Performing AI risk assessments",
+            "Documenting AI decision processes",
+            "Verifying model performance and accuracy",
+            "Reporting on vulnerabilities or errors",
+            "Ensuring regulatory compliance"
+        ],
+        "resources": {
+            "beginner": {
+                "youtube": ["AI Auditing Basics", "Computerphile", "freeCodeCamp.org"],
+                "websites": ["ISO AI Standards", "NIST AI Framework", "Coursera"]
+            },
+            "intermediate": {
+                "youtube": ["DeepLearningAI", "Stanford AI Audit", "AI Explained"],
+                "websites": ["SHAP Documentation", "LIME Tutorials", "TowardsDataScience"]
+            },
+            "advanced": {
+                "youtube": ["NeurIPS Talks", "ICML Tutorials", "AI Audit Research"],
+                "websites": ["ArXiv.org", "AI Audit Tools", "Explainable AI Research"]
+            }
+        }
+    },
+    "Generative AI Engineering": {
+        "category": "Emerging AI Roles",
+        "salary_range": "$160,000 - $250,000",
+        "salary_min": 160000,
+        "salary_max": 250000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+70%",
+            "open_positions": "15,000+",
+            "top_employers": ["OpenAI", "Stability AI", "Midjourney", "Google", "Meta", "Adobe"],
+            "related_roles": ["Generative AI Engineer", "GenAI Developer", "Diffusion Model Specialist", "Creative AI Engineer"]
+        },
+        "description": "A developer specializing in building and deploying generative models for content creation (text, images, video), fueled by tools like DALL-E and Stable Diffusion.",
+        "key_responsibilities": [
+            "Integrating generative AI into applications",
+            "Fine-tuning models for specific use cases",
+            "Optimizing for scalability and performance",
+            "Ensuring output quality and safety",
+            "Building APIs for generative models"
+        ],
+        "resources": {
+            "beginner": {
+                "youtube": ["Sentdex", "freeCodeCamp.org", "AI Explained"],
+                "websites": ["HuggingFace.co", "Stability AI Docs", "OpenAI Platform"]
+            },
+            "intermediate": {
+                "youtube": ["DeepLearningAI", "Andrej Karpathy", "Two Minute Papers"],
+                "websites": ["PyTorch.org", "TensorFlow.org", "Papers with Code"]
+            },
+            "advanced": {
+                "youtube": ["Yannic Kilcher", "AI Coffee Break", "CVPR Talks"],
+                "websites": ["ArXiv.org", "Distill.pub", "NeurIPS Papers"]
+            }
+        }
+    },
+    "Human-AI Collaboration": {
+        "category": "Emerging AI Roles",
+        "salary_range": "$120,000 - $190,000",
+        "salary_min": 120000,
+        "salary_max": 190000,
+        "market_info": {
+            "demand": "High",
+            "growth_rate": "+45%",
+            "open_positions": "7,000+",
+            "top_employers": ["Microsoft", "Google", "Salesforce", "Adobe", "Notion", "Figma"],
+            "related_roles": ["Human-AI Collaboration Specialist", "AI UX Designer", "Augmented Intelligence Designer", "AI Product Designer"]
+        },
+        "description": "A professional designing workflows where humans and AI augment each other, focusing on productivity tools and interface optimization.",
+        "key_responsibilities": [
+            "Creating collaborative AI interfaces",
+            "Training users on AI tools",
+            "Measuring human-AI performance metrics",
+            "Iterating on feedback loops",
+            "Designing AI-augmented workflows"
+        ],
+        "resources": {
+            "beginner": {
+                "youtube": ["DesignCourse", "Flux Academy", "Google Design"],
+                "websites": ["Interaction-design.org", "Nielsen Norman Group", "UX Collective"]
+            },
+            "intermediate": {
+                "youtube": ["Adobe MAX", "Figma Config", "Microsoft Design"],
+                "websites": ["Human-AI Interaction", "ACM CHI", "UX Research Methods"]
+            },
+            "advanced": {
+                "youtube": ["CHI Conference", "CSCW Talks", "HCI Research"],
+                "websites": ["ArXiv HCI", "ACM Digital Library", "Human-AI Research"]
+            }
+        }
+    },
+    "AI Agent Architecture": {
+        "category": "Emerging AI Roles",
+        "salary_range": "$170,000 - $260,000",
+        "salary_min": 170000,
+        "salary_max": 260000,
+        "market_info": {
+            "demand": "Very High",
+            "growth_rate": "+65%",
+            "open_positions": "10,000+",
+            "top_employers": ["OpenAI", "Anthropic", "Google DeepMind", "Salesforce", "Microsoft", "Startups"],
+            "related_roles": ["AI Agent Architect", "Agentic AI Engineer", "Multi-Agent Systems Developer", "Autonomous AI Engineer"]
+        },
+        "description": "An engineer who designs autonomous AI agents capable of multi-step tasks (planning, decision-making), rising with agentic AI advancements like Salesforce's Agentforce.",
+        "key_responsibilities": [
+            "Architecting agent frameworks and systems",
+            "Handling multi-agent coordination",
+            "Ensuring reliability and error-handling",
+            "Scaling for enterprise use",
+            "Implementing ethical AI safeguards"
+        ],
+        "resources": {
+            "beginner": {
+                "youtube": ["DeepLearningAI", "Sentdex", "AI Explained"],
+                "websites": ["LangChain Docs", "AutoGPT", "AgentGPT"]
+            },
+            "intermediate": {
+                "youtube": ["Andrej Karpathy", "Two Minute Papers", "AI Agent Tutorials"],
+                "websites": ["LangGraph", "CrewAI", "Multi-Agent Systems"]
+            },
+            "advanced": {
+                "youtube": ["Yannic Kilcher", "AI Coffee Break", "NeurIPS Talks"],
+                "websites": ["ArXiv.org", "Reinforcement Learning", "Agent Research Papers"]
+            }
+        }
+    },
+}
+def get_skill_info(skill_name: str, expertise_level: str = "intermediate") -> dict:
+    """
+    Get skill information including salary and resources filtered by expertise level.
+    Args:
+        skill_name: Name of the skill (case-insensitive)
+        expertise_level: User's expertise level (beginner, intermediate, advanced)
+    Returns:
+        Dictionary with skill information including filtered resources
+    """
+    # Normalize skill name
+    skill_key = None
+    for key in SKILLS_DATABASE.keys():
+        if key.lower() == skill_name.lower():
+            skill_key = key
+            break
+    if not skill_key:
+        # Return default data if skill not found
+        return {
+            "salary_range": "$80,000 - $150,000",
+            "market_info": {
+                "demand": "Moderate",
+                "growth_rate": "+20%",
+                "open_positions": "10,000+",
+                "top_employers": ["Tech Companies", "Startups", "Enterprises"],
+                "related_roles": ["Software Engineer", "Developer", "Technical Specialist"]
+            },
+            "resources": {
+                "youtube": ["freeCodeCamp.org", "Traversy Media", "The Net Ninja"],
+                "websites": ["Coursera", "Udemy", "FreeCodeCamp"]
+            }
+        }
+    skill_data = SKILLS_DATABASE[skill_key].copy()
+    # Filter resources by expertise level
+    expertise_level = expertise_level.lower()
+    if expertise_level not in ["beginner", "intermediate", "advanced"]:
+        expertise_level = "intermediate"
+    if "resources" in skill_data and expertise_level in skill_data["resources"]:
+        # Replace full resources dict with just the relevant level
+        skill_data["resources"] = skill_data["resources"][expertise_level]
+    elif "resources" in skill_data:
+        # Fallback to intermediate if level not found
+        skill_data["resources"] = skill_data["resources"].get("intermediate", {
+            "youtube": ["freeCodeCamp.org", "Traversy Media"],
+            "websites": ["Coursera", "Udemy"]
+        })
+    return skill_data
+def get_all_categories() -> list:
+    """Get list of all unique categories."""
+    categories = set()
+    for skill_data in SKILLS_DATABASE.values():
+        categories.add(skill_data["category"])
+    return sorted(list(categories))
+def get_skills_by_category(category: str) -> list:
+    """Get all skills in a specific category."""
+    skills = []
+    for skill_name, skill_data in SKILLS_DATABASE.items():
+        if skill_data["category"] == category:
+            skills.append(skill_name)
+    return sorted(skills)

src/data/vector_store.py ADDED Viewed

	@@ -0,0 +1,173 @@

+"""
+Vector store implementation for RAG capabilities.
+"""
+from typing import List, Dict, Any, Optional
+import json
+import os
+from pathlib import Path
+from langchain.vectorstores import FAISS
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import DirectoryLoader
+class VectorStore:
+    """
+    Manages vector storage for RAG capabilities.
+    """
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize the vector store.
+        Args:
+            api_key: Optional OpenAI API key
+        """
+        self.api_key = api_key
+        # Use free sentence-transformers embeddings (no API key needed)
+        try:
+            from langchain.embeddings import HuggingFaceEmbeddings
+            self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+            print("✅ Using free HuggingFace embeddings")
+        except ImportError:
+            # Fallback to OpenAI if HuggingFace not available
+            if api_key:
+                from langchain.embeddings import OpenAIEmbeddings
+                self.embeddings = OpenAIEmbeddings(api_key=api_key)
+                print("✅ Using OpenAI embeddings")
+            else:
+                raise ValueError("HuggingFace not available and no OpenAI API key provided")
+        self.vector_store_path = Path("vector_db")
+        self.vector_store_path.mkdir(exist_ok=True)
+        self.vector_store = None
+    def load_documents(self, directory: str = None) -> None:
+        """
+        Load documents from a directory and create embeddings.
+        If no directory is provided, creates a minimal default vector store.
+        Args:
+            directory: Optional path to directory containing documents
+        """
+        try:
+            # If no directory provided, create a minimal vector store
+            if directory is None:
+                self._create_minimal_vector_store()
+                return
+            # Check if directory exists
+            if not os.path.exists(directory):
+                print(f"Warning: Document directory {directory} not found. Creating minimal vector store.")
+                self._create_minimal_vector_store()
+                return
+            # Try to load documents
+            loader = DirectoryLoader(directory)
+            documents = loader.load()
+            if not documents:
+                print("Warning: No documents found in directory. Creating minimal vector store.")
+                self._create_minimal_vector_store()
+                return
+            # Process documents
+            text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=1000,
+                chunk_overlap=200,
+            )
+            texts = text_splitter.split_documents(documents)
+            # Create or update vector store
+            if os.path.exists(self.vector_store_path / "index.faiss"):
+                self.vector_store = FAISS.load_local(
+                    str(self.vector_store_path),
+                    self.embeddings
+                )
+                self.vector_store.add_documents(texts)
+            else:
+                self.vector_store = FAISS.from_documents(
+                    texts,
+                    self.embeddings
+                )
+                self.vector_store.save_local(str(self.vector_store_path))
+        except Exception as e:
+            print(f"Error loading documents: {str(e)}")
+            self._create_minimal_vector_store()
+    def _create_minimal_vector_store(self) -> None:
+        """Create a minimal vector store with default content."""
+        try:
+            default_texts = [
+                "This is a default document. The vector store was initialized with minimal content.",
+                "You can add your own documents to the vector store by placing them in the vector_db/documents directory.",
+                "The application will automatically load and index any text files found in that directory."
+            ]
+            if os.path.exists(self.vector_store_path / "index.faiss"):
+                self.vector_store = FAISS.load_local(
+                    str(self.vector_store_path),
+                    self.embeddings
+                )
+            else:
+                self.vector_store = FAISS.from_texts(
+                    default_texts,
+                    self.embeddings
+                )
+                self.vector_store.save_local(str(self.vector_store_path))
+        except Exception as e:
+            print(f"Error creating minimal vector store: {str(e)}")
+            # Create an empty FAISS index as a last resort
+            self.vector_store = FAISS.from_texts(
+                ["Default document"],
+                self.embeddings
+            )
+    def search(self, query: str, k: int = 4, documents: List[str] = None) -> List[Dict[str, Any]]:
+        """
+        Search for relevant documents based on query.
+        Args:
+            query: Search query
+            k: Number of results to return
+            documents: Optional list of documents to search through (fallback)
+        Returns:
+            List of relevant documents with scores
+        """
+        # If vector store is not available, fall back to simple text search
+        if not self.vector_store:
+            if not documents:
+                return []
+            # Simple text-based search as fallback
+            query = query.lower()
+            return [
+                {"content": doc, "score": 1.0, "metadata": {}}
+                for doc in documents
+                if query in doc.lower()
+            ][:k]
+        try:
+            results = self.vector_store.similarity_search_with_score(query, k=k)
+            formatted_results = []
+            for doc, score in results:
+                formatted_results.append({
+                    "content": doc.page_content,
+                    "metadata": getattr(doc, 'metadata', {}),
+                    "score": float(score) if hasattr(score, '__float__') else 0.0
+                })
+            return formatted_results
+        except Exception as e:
+            print(f"Error in vector store search: {str(e)}")
+            # Fall back to simple text search if available
+            if documents:
+                query = query.lower()
+                return [
+                    {"content": doc, "score": 1.0, "metadata": {}}
+                    for doc in documents
+                    if query in doc.lower()
+                ][:k]
+            return []

src/direct_openai.py ADDED Viewed

	@@ -0,0 +1,107 @@

+"""
+Direct OpenAI API handler to bypass any potential middleware issues.
+"""
+import os
+import json
+import requests
+from typing import Dict, Any, List, Optional
+from langsmith import traceable as langsmith_traceable
+@langsmith_traceable(name="OpenAI_Direct_Call")
+def generate_completion(
+    prompt: str,
+    system_message: str = "You are an expert educational AI assistant that specializes in creating personalized learning paths.",
+    model: str = "gpt-3.5-turbo",
+    temperature: float = 0.7,
+    max_tokens: int = 1000,
+    timeout: int = 120
+) -> str:
+    """
+    Generate a completion using direct HTTP requests to OpenAI API.
+    Args:
+        prompt: The user prompt
+        system_message: Optional system message
+        model: The OpenAI model to use
+        temperature: Sampling temperature
+        max_tokens: Maximum tokens to generate
+    Returns:
+        The generated text
+    """
+    # Get API key from environment or directly from file if needed
+    api_key = os.environ.get("OPENAI_API_KEY")
+    # Fallback to direct read if environment variable isn't working
+    if not api_key or len(api_key) < 20:
+        try:
+            with open('.env', 'r') as f:
+                for line in f:
+                    if line.startswith('OPENAI_API_KEY='):
+                        api_key = line.strip().split('=', 1)[1]
+                        break
+        except Exception as e:
+            print(f"Error reading API key from file: {e}")
+    if not api_key:
+        raise ValueError("OpenAI API key not found in environment variables or .env file")
+    print(f"Using API key starting with: {api_key[:10]}...")
+    # API endpoint
+    url = "https://api.openai.com/v1/chat/completions"
+    # Request headers
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    # Request payload
+    payload = {
+        "model": model,
+        "messages": [
+            {"role": "system", "content": system_message},
+            {"role": "user", "content": prompt}
+        ],
+        "temperature": temperature,
+        "max_tokens": max_tokens
+    }
+    print("Making direct API request to OpenAI...")
+    # Make the request
+    try:
+        response = requests.post(
+            url,
+            headers=headers,
+            json=payload,
+            timeout=timeout
+        )
+        # Check if request was successful
+        response.raise_for_status()
+        # Parse response
+        result = response.json()
+        print("Received response from OpenAI API")
+        # Extract and return the generated text
+        if "choices" in result and len(result["choices"]) > 0:
+            return result["choices"][0]["message"]["content"]
+        else:
+            raise ValueError(f"Unexpected API response: {json.dumps(result)}")
+    except requests.exceptions.RequestException as e:
+        print(f"API request failed: {str(e)}")
+        if hasattr(e, "response") and e.response is not None:
+            status_code = e.response.status_code
+            try:
+                error_data = e.response.json()
+                error_message = f"Error code: {status_code} - {json.dumps(error_data)}"
+            except:
+                error_message = f"Error code: {status_code} - {e.response.text}"
+        else:
+            error_message = str(e)
+        raise ValueError(f"OpenAI API request failed: {error_message}")

src/learning_path.py ADDED Viewed

	@@ -0,0 +1,916 @@

+"""
+Learning path generation logic for the AI Learning Path Generator.
+This module handles the creation and management of personalized learning paths.
+"""
+import datetime
+import json
+import os
+import uuid
+import hashlib
+from pathlib import Path
+import time
+from typing import Any, Dict, List, Optional, Type
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from langchain.output_parsers import PydanticOutputParser
+from pydantic import BaseModel, Field, ValidationError, validator
+from src.data.document_store import DocumentStore
+from src.data.skills_database import get_skill_info
+from src.ml.model_orchestrator import ModelOrchestrator
+from src.ml.job_market import get_job_market_stats
+from src.utils.config import (
+    DEFAULT_REGION,
+    EXPERTISE_LEVELS,
+    LEARNING_STYLES,
+    TIME_COMMITMENTS,
+)
+from src.utils.helpers import (
+    calculate_study_schedule,
+    difficulty_to_score,
+    match_resources_to_learning_style,
+)
+from src.utils.observability import get_observability_manager, traceable
+from src.utils.semantic_cache import SemanticCache
+# Import for OpenAI-powered resource search
+from src.ml.resource_search import search_resources
+class ResourceItem(BaseModel):
+    """A single learning resource."""
+    type: str = Field(description="Type of the resource (e.g., article, video, book)")
+    url: str = Field(description="URL of the resource")
+    description: str = Field(description="Brief description of the resource")
+class JobMarketData(BaseModel):
+    """Job market data for a skill or role."""
+    open_positions: Optional[str] = Field(
+        description="Estimated number of open positions for this role/skill.",
+        default="N/A",
+    )
+    trending_employers: Optional[List[str]] = Field(
+        description="List of companies currently hiring for this role/skill.",
+        default_factory=list,
+    )
+    average_salary: Optional[str] = Field(
+        description="Estimated average salary range for this role/skill.", default="N/A"
+    )
+    related_roles: Optional[List[str]] = Field(
+        description="Related job titles or roles for this skill/role.",
+        default_factory=list,
+    )
+    demand_score: Optional[int] = Field(
+        description="Demand score (0-100) for how hot this skill is right now", default=0
+    )
+    region: Optional[str] = Field(
+        description="Region for which these stats apply", default=None
+    )
+    error: Optional[str] = Field(
+        description="Error message if data could not be fetched.", default=None
+    )
+class Milestone(BaseModel):
+    """A milestone in a learning path."""
+    title: str = Field(description="Short title for the milestone")
+    description: str = Field(description="Detailed description of what will be learned")
+    estimated_hours: int = Field(
+        description="Estimated hours to complete this milestone"
+    )
+    resources: List[ResourceItem] = Field(description="Recommended learning resources")
+    skills_gained: List[str] = Field(
+        description="Skills gained after completing this milestone"
+    )
+    job_market_data: JobMarketData = Field(
+        description="Job market data for the skills gained",
+        default_factory=JobMarketData,
+    )
+    @validator("resources", pre=True, always=True)
+    def check_resources_not_empty(cls, v):
+        if not v:
+            # Instead of raising an error, provide a default resource
+            return [
+                ResourceItem(
+                    type="article",
+                    url="https://example.com/default-resource",
+                    description="Default resource - Please explore additional materials for this milestone",
+                )
+            ]
+        return v
+class LearningPath(BaseModel):
+    """Model representation of a learning path."""
+    id: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    title: str = Field(description="Title of the learning path")
+    description: str = Field(description="Detailed description of the learning path")
+    topic: str = Field(description="Main topic of study")
+    expertise_level: str = Field(description="Starting expertise level")
+    learning_style: str = Field(description="Preferred learning style")
+    time_commitment: str = Field(description="Weekly time commitment")
+    duration_weeks: Optional[int] = Field(
+        description="Total duration in weeks", default=0
+    )
+    goals: List[str] = Field(description="Learning goals and objectives")
+    milestones: List["Milestone"] = Field(description="Weekly or modular breakdown")
+    schedule: Optional[Dict[str, Any]] = Field(
+        default=None, description="The calculated study schedule"
+    )
+    prerequisites: List[str] = Field(description="Prerequisites for this path")
+    total_hours: int = Field(description="Total estimated hours")
+    created_at: str = Field(default_factory=lambda: datetime.datetime.now().isoformat())
+    job_market_data: JobMarketData = Field(
+        description="Aggregated job market data for the main topic",
+        default_factory=JobMarketData,
+    )
+    @validator("goals", pre=True, always=True)
+    def check_goals_not_empty(cls, v):
+        if not v:
+            raise ValueError("Learning path goals list cannot be empty")
+        # Ensure all goals are non-empty strings
+        if not all(isinstance(goal, str) and goal.strip() for goal in v):
+            raise ValueError("All goals must be non-empty strings")
+        return v
+    @validator("milestones", pre=True, always=True)
+    def check_milestones_not_empty(cls, v):
+        if not v:
+            raise ValueError("Learning path milestones list cannot be empty")
+        return v
+class LearningPathGenerator:
+    """
+    Core class responsible for generating personalized learning paths.
+    """
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize the learning path generator.
+        Args:
+            api_key: Optional OpenAI API key (if not provided in environment)
+        """
+        self.model_orchestrator = ModelOrchestrator(api_key)
+        self.document_store = DocumentStore()
+        self.output_parser = PydanticOutputParser(pydantic_object=LearningPath)
+        self.obs_manager = get_observability_manager()
+        # Pass REDIS_URL from environment to SemanticCache
+        self.semantic_cache = SemanticCache(redis_url=os.getenv('REDIS_URL'))
+    def fetch_job_market_data(
+        self,
+        skill_or_role: str,
+        region: Optional[str] = None,
+        expertise_level: str = "intermediate",
+    ) -> JobMarketData:
+        """
+        Fetch job market data for a given skill or role from the skills database.
+        Args:
+            skill_or_role: The skill or role to query job market data for.
+            region: The region to query job market data for (default is DEFAULT_REGION).
+            expertise_level: User's expertise level for resource filtering.
+        Returns:
+            A JobMarketData object containing job market statistics.
+        """
+        try:
+            # Get skill info from database (includes salary and market info)
+            skill_info = get_skill_info(skill_or_role, expertise_level)
+            # Extract market info
+            market_info = skill_info.get("market_info", {})
+            # Create JobMarketData object
+            return JobMarketData(
+                open_positions=market_info.get("open_positions", "10,000+"),
+                average_salary=skill_info.get("salary_range", "$80,000 - $150,000"),
+                trending_employers=market_info.get("top_employers", ["Tech Companies"]),
+                related_roles=market_info.get("related_roles", ["Software Engineer"]),
+                region=region or DEFAULT_REGION
+            )
+        except Exception as e:
+            # Fallback to default data
+            return JobMarketData(
+                open_positions="10,000+",
+                average_salary="$80,000 - $150,000",
+                trending_employers=["Tech Companies", "Startups", "Enterprises"],
+                related_roles=["Software Engineer", "Developer"],
+                region=region or DEFAULT_REGION,
+                error=str(e)
+            )
+    def fetch_related_roles(
+        self, skills: List[str], ai_provider: Optional[str] = None, ai_model: Optional[str] = None
+    ) -> List[str]:
+        """
+        Fetch related job roles for a given list of skills using an LLM.
+        Args:
+            skills: The list of skills to find related job roles for.
+            ai_provider: The AI provider to use (e.g., 'openai').
+            ai_model: The specific AI model to use.
+        Returns:
+            A list of related job role titles.
+        """
+        if not skills:
+            return []
+        skills_str = ", ".join(skills)
+        prompt = f"""
+        Based on the following skills: {skills_str}, what are some relevant job titles or roles that utilize these skills?
+        Please provide a list of job titles. Return the answer as a JSON array of strings.
+        For example: ["Data Scientist", "Machine Learning Engineer", "Business Analyst"]
+        """
+        # Select orchestrator based on provider/model overrides
+        orchestrator_to_use = self.model_orchestrator
+        if ai_provider or ai_model:
+            try:
+                override_provider = ai_provider or self.model_orchestrator.provider
+                orchestrator_to_use = ModelOrchestrator(provider=override_provider)
+                orchestrator_to_use.init_language_model(model_name=ai_model)
+            except Exception as init_error:
+                print(
+                    f"⚠️  Falling back to default orchestrator for related roles: {init_error}"
+                )
+                orchestrator_to_use = self.model_orchestrator
+        try:
+            # Use the selected orchestrator to get the response
+            response_str = orchestrator_to_use.generate_response(
+                prompt,
+                use_cache=False,
+            )
+            # The response is expected to be a JSON string of a list
+            roles = json.loads(response_str)
+            if isinstance(roles, list):
+                return roles
+            return []
+        except json.JSONDecodeError:
+            # Fallback if the response is not valid JSON
+            # Attempt to parse a plain list from the string
+            if "[" in response_str and "]" in response_str:
+                try:
+                    # Extract content between brackets and split by comma
+                    roles_str = response_str[response_str.find('[')+1:response_str.rfind(']')]
+                    return [role.strip().strip('"\'') for role in roles_str.split(',')]
+                except Exception:
+                    return ["Could not parse roles"]
+            return ["Could not determine roles"]
+        except Exception as e:
+            print(f"An unexpected error occurred while fetching related roles: {e}")
+            return []
+    def generate_path(
+        self,
+        topic: str,
+        expertise_level: str,
+        learning_style: str,
+        time_commitment: str = "moderate",
+        duration_weeks: Optional[int] = None,
+        goals: List[str] = None,
+        additional_info: Optional[str] = None,
+        context: List[str] = None,
+        ai_provider: Optional[str] = None,
+        ai_model: Optional[str] = None,
+        user_id: Optional[str] = None,  # For tracking in observability
+    ) -> LearningPath:
+        """
+        Generate a personalized learning path based on user preferences.
+        Args:
+            topic: The main topic of study
+            expertise_level: Starting level of expertise
+            learning_style: Preferred learning style
+            time_commitment: Weekly time commitment
+            duration_weeks: User-specified duration in weeks (overrides calculated duration)
+            goals: List of learning goals
+            additional_info: Any additional information or constraints
+            user_id: Optional user ID for tracking
+        Returns:
+            A complete learning path object
+        """
+        # --- High-Level Cache Check ---
+        # Create a stable cache key by sorting and stringifying all inputs
+        goals_str = json.dumps(sorted(goals) if goals else [])
+        cache_key_data = {
+            "topic": topic.lower().strip(),
+            "expertise_level": expertise_level,
+            "time_commitment": time_commitment,
+            "duration_weeks": duration_weeks,
+            "goals": goals_str,
+            "additional_info": additional_info or ""
+        }
+        cache_key_str = json.dumps(cache_key_data, sort_keys=True).encode('utf-8')
+        cache_key = hashlib.sha256(cache_key_str).hexdigest()
+        cached_path = self.document_store.get_cached_path(cache_key)
+        if cached_path:
+            print(f"✅ Cache hit for learning path: {cache_key[:16]}... (topic: {topic})")
+            # Ensure the cached data is a valid LearningPath object
+            try:
+                return LearningPath(**cached_path)
+            except ValidationError as e:
+                print(f"⚠️ Cached path validation failed, regenerating... Error: {e}")
+        else:
+            print(f"❌ Cache miss for learning path: {cache_key[:16]}... (topic: {topic})")
+        # ---------------------------
+        # Track generation time for observability
+        generation_start_time = time.time()
+        # Log the generation attempt
+        self.obs_manager.log_event("path_generation_started", {
+            "topic": topic,
+            "expertise_level": expertise_level,
+            "learning_style": learning_style,
+            "time_commitment": time_commitment,
+            "user_id": user_id
+        })
+        if goals is None:
+            goals = [f"Master {topic}", f"Build practical skills in {topic}"]
+        if expertise_level not in EXPERTISE_LEVELS:
+            raise ValueError(
+                f"Invalid expertise level. Choose from: {', '.join(EXPERTISE_LEVELS.keys())}"
+            )
+        # Allow None for learning_style and use a default
+        if learning_style is None:
+            learning_style = "visual"  # Default learning style
+        elif learning_style not in LEARNING_STYLES:
+            raise ValueError(
+                f"Invalid learning style. Choose from: {', '.join(LEARNING_STYLES.keys())}"
+            )
+        # Allow None for time_commitment and use a default
+        if time_commitment is None:
+            time_commitment = "moderate"  # Default time commitment
+        elif time_commitment not in TIME_COMMITMENTS:
+            raise ValueError(
+                f"Invalid time commitment. Choose from: {', '.join(TIME_COMMITMENTS.keys())}"
+            )
+        relevant_docs = self.document_store.search_documents(
+            query=topic, filters={"expertise_level": expertise_level}, top_k=10
+        )
+        hours_map = {"minimal": 2, "moderate": 5, "substantial": 8, "intensive": 15}
+        hours_per_week = hours_map.get(time_commitment, 5)
+        # Use user-specified duration if provided, otherwise calculate
+        if duration_weeks and duration_weeks > 0:
+            adjusted_duration = duration_weeks
+            print(f"✅ Using user-specified duration: {adjusted_duration} weeks")
+        else:
+            base_duration = 8
+            intensity_factor = {
+                "minimal": 2.0,
+                "moderate": 1.5,
+                "substantial": 1.0,
+                "intensive": 0.75,
+            }
+            complexity_factor = {
+                "beginner": 1.0,
+                "intermediate": 1.2,
+                "advanced": 1.5,
+                "expert": 2.0,
+            }
+            adjusted_duration = int(
+                base_duration
+                * intensity_factor.get(time_commitment, 1.0)
+                * complexity_factor.get(expertise_level, 1.0)
+            )
+            print(f"📊 Calculated duration: {adjusted_duration} weeks")
+        # Calculate appropriate number of milestones based on duration
+        # Rule: 1 milestone per 1-3 weeks
+        if adjusted_duration <= 4:
+            target_milestones = 3  # Short paths: 3 milestones
+        elif adjusted_duration <= 8:
+            target_milestones = 4  # Medium paths: 4 milestones
+        elif adjusted_duration <= 12:
+            target_milestones = 5  # Standard paths: 5 milestones
+        elif adjusted_duration <= 20:
+            target_milestones = 6  # Long paths: 6 milestones
+        else:
+            target_milestones = 7  # Very long paths: 7 milestones
+        print(f"🎯 Target milestones for {adjusted_duration} weeks: {target_milestones}")
+        # Build semantic cache query signature (captures the high-level intent)
+        semantic_signature = json.dumps(
+            {
+                "topic": topic,
+                "expertise_level": expertise_level,
+                "time_commitment": time_commitment,
+                "duration_weeks": adjusted_duration,
+                "target_milestones": target_milestones,
+                "goals": goals,
+                "additional_info": additional_info,
+            },
+            sort_keys=True,
+        )
+        learning_path: Optional[LearningPath] = None
+        parsed_successfully = False
+        # --- Semantic Cache Check (pre-LLM) ---
+        cached_semantic_path = self.semantic_cache.get(semantic_signature)
+        if cached_semantic_path:
+            try:
+                learning_path = LearningPath(**cached_semantic_path)
+                parsed_successfully = True
+                print("✅ Semantic cache hit for learning path structure")
+            except ValidationError as e:
+                print(f"⚠️ Semantic cache entry invalid, regenerating. Error: {e}")
+                cached_semantic_path = None
+        else:
+            print("❌ Semantic cache miss for learning path structure")
+        # --------------------------------------
+        # Few-Shot Prompting: Provide concrete examples to guide the AI
+        # This dramatically improves output quality and consistency
+        prompt_content = f"""Generate a detailed personalized learning path for the following:
+Topic: {topic}
+Expertise Level: {expertise_level} - {EXPERTISE_LEVELS[expertise_level]}
+Learning Style: {learning_style} - {LEARNING_STYLES[learning_style]}
+Time Commitment: {time_commitment} - {TIME_COMMITMENTS[time_commitment]}
+Duration: {adjusted_duration} weeks
+Target Milestones: {target_milestones} milestones
+Learning Goals: {', '.join(goals)}
+Additional Information: {additional_info or 'None provided'}
+IMPORTANT:
+1. Return ONLY valid JSON matching this exact structure.
+2. Generate EXACTLY {target_milestones} milestones (no more, no less).
+3. Set duration_weeks to EXACTLY {adjusted_duration}.
+4. Distribute the milestones evenly across the {adjusted_duration} weeks.
+=== EXAMPLE 1: Python Programming (Beginner) ===
+{{
+  "title": "Complete Python Programming Journey",
+  "description": "A comprehensive learning path designed for absolute beginners to master Python programming through hands-on projects and real-world applications.",
+  "topic": "Python Programming",
+  "expertise_level": "beginner",
+  "learning_style": "visual",
+  "time_commitment": "moderate",
+  "duration_weeks": 8,
+  "goals": ["Master Python basics", "Build real projects", "Prepare for data science"],
+  "milestones": [
+    {{
+      "title": "Python Fundamentals",
+      "description": "Learn Python syntax, variables, data types, and basic operations",
+      "estimated_hours": 10,
+      "resources": [
+        {{"type": "video", "url": "https://example.com/python-basics", "description": "Python Basics Video Tutorial"}},
+        {{"type": "interactive", "url": "https://example.com/python-exercises", "description": "Interactive Python Exercises"}}
+      ],
+      "skills_gained": ["Python syntax", "Data types", "Variables", "Basic operators"]
+    }},
+    {{
+      "title": "Control Flow and Functions",
+      "description": "Master if statements, loops, and creating reusable functions",
+      "estimated_hours": 12,
+      "resources": [
+        {{"type": "article", "url": "https://example.com/control-flow", "description": "Control Flow Guide"}},
+        {{"type": "video", "url": "https://example.com/functions", "description": "Functions Deep Dive"}}
+      ],
+      "skills_gained": ["Conditional logic", "Loops", "Function creation", "Code organization"]
+    }}
+  ],
+  "prerequisites": ["Basic computer skills", "Text editor familiarity"],
+  "total_hours": 40
+}}
+=== EXAMPLE 2: Machine Learning (Intermediate) ===
+{{
+  "title": "Practical Machine Learning Mastery",
+  "description": "An intermediate-level path to master machine learning algorithms, model training, and deployment for real-world applications.",
+  "topic": "Machine Learning",
+  "expertise_level": "intermediate",
+  "learning_style": "hands-on",
+  "time_commitment": "substantial",
+  "duration_weeks": 12,
+  "goals": ["Build ML models", "Deploy to production", "Understand ML theory"],
+  "milestones": [
+    {{
+      "title": "Supervised Learning Fundamentals",
+      "description": "Master regression and classification algorithms with practical implementations",
+      "estimated_hours": 15,
+      "resources": [
+        {{"type": "course", "url": "https://example.com/supervised-learning", "description": "Supervised Learning Course"}},
+        {{"type": "project", "url": "https://example.com/ml-projects", "description": "Hands-on ML Projects"}}
+      ],
+      "skills_gained": ["Linear regression", "Logistic regression", "Decision trees", "Model evaluation"]
+    }}
+  ],
+  "prerequisites": ["Python programming", "Basic statistics", "Linear algebra basics"],
+  "total_hours": 60
+}}
+=== YOUR TASK ===
+Now generate a similar learning path for:
+Topic: {topic}
+Expertise Level: {expertise_level}
+Learning Style: {learning_style}
+Time Commitment: {time_commitment}
+Goals: {', '.join(goals)}
+Requirements:
+1. Include 3-7 milestones that represent major learning stages
+2. Each milestone should have 2-4 resources tailored to the {learning_style} learning style
+3. Estimate realistic hours for each milestone
+4. List specific skills gained at each milestone
+5. Include relevant prerequisites
+6. Calculate total_hours as sum of all milestone hours
+Return ONLY the JSON object, no markdown formatting or explanation.
+"""
+        prompt_with_context = prompt_content
+        if context:
+            context_text = "\n\nAdditional Context:\n" + "\n".join(context)
+            prompt_with_context += context_text
+        orchestrator_to_use = self.model_orchestrator
+        if ai_provider:
+            custom_orchestrator = ModelOrchestrator(provider=ai_provider)
+            custom_orchestrator.init_language_model(model_name=ai_model)
+            orchestrator_to_use = custom_orchestrator
+        # Attempt up to 3 times to get a valid LearningPath JSON
+        last_error: Optional[Exception] = None
+        if not parsed_successfully:
+            for attempt in range(3):
+                if attempt > 0:
+                    print(f"Retrying learning path generation (attempt {attempt+1}) due to previous validation failure…")
+                response = orchestrator_to_use.generate_structured_response(
+                    prompt=prompt_with_context,
+                    output_schema=self.output_parser.get_format_instructions(),
+                    relevant_documents=(
+                        [doc.page_content for doc in relevant_docs] if relevant_docs else None
+                    ),
+                    temperature=0.6 + 0.1 * attempt,  # vary temperature slightly on retries
+                )
+                try:
+                    learning_path = self.output_parser.parse(response)
+                    parsed_successfully = True
+                    # Store the successful structure for future semantic cache hits
+                    self.semantic_cache.set(semantic_signature, learning_path.dict())
+                    break
+                except ValidationError as ve:
+                    print("Validation failed when parsing AI response as LearningPath:", ve)
+                    print("Offending response:\n", response)
+                    last_error = ve
+                    # Slightly tweak the prompt for the next attempt
+                    prompt_with_context += (
+                        "\n\nIMPORTANT: Your last response did NOT match the schema and was therefore rejected. "
+                        "You MUST return a COMPLETE JSON object that follows the exact LearningPath schema with ALL required fields."
+                    )
+                except Exception as e:
+                    print("Unexpected error while parsing AI response:", e)
+                    print("Offending response:\n", response)
+                    last_error = e
+                    break  # Unexpected errors – don't retry further
+        if not parsed_successfully:
+            raise RuntimeError("LearningPath generation failed after 3 attempts") from last_error
+        # Fetch job market data ONCE for the main topic (not per milestone)
+        # This significantly speeds up generation time
+        print(f"📊 Fetching job market data for main topic: {topic}")
+        aggregated_job_market = self.fetch_job_market_data(topic, expertise_level=expertise_level)
+        learning_path.job_market_data = aggregated_job_market
+        # Fetch related roles once for the main topic
+        all_skills = []
+        for milestone in learning_path.milestones:
+            if milestone.skills_gained:
+                all_skills.extend(
+                    milestone.skills_gained
+                    if isinstance(milestone.skills_gained, list)
+                    else [milestone.skills_gained]
+                )
+        if all_skills:
+            related_roles = self.fetch_related_roles(
+                all_skills[:5],  # Use top 5 skills only
+                ai_provider=ai_provider,
+                ai_model=ai_model,
+            )
+            aggregated_job_market.related_roles = related_roles
+        # Share the aggregated job market snapshot with each milestone if needed downstream
+        for milestone in learning_path.milestones:
+            milestone.job_market_data = aggregated_job_market
+        # Fetch resources for milestones IN PARALLEL (much faster!)
+        print(f"🔍 Fetching resources for {len(learning_path.milestones)} milestones in parallel...")
+        def fetch_milestone_resources(milestone_data):
+            """Helper function to fetch resources for a single milestone"""
+            milestone, index = milestone_data
+            try:
+                print(f"  [{index}/{len(learning_path.milestones)}] Fetching resources for: {milestone.title}")
+                # Get trusted sources from the skills database
+                skill_info = get_skill_info(topic, expertise_level)
+                trusted_sources = skill_info.get("resources", {})
+                # Prepare the trusted sources dict for Perplexity
+                perplexity_sources = None
+                if trusted_sources:
+                    perplexity_sources = {
+                        'youtube': trusted_sources.get('youtube', []),
+                        'websites': trusted_sources.get('websites', [])
+                    }
+                    print(f"  📚 Using curated sources:")
+                    if perplexity_sources.get('youtube'):
+                        print(f"     YouTube: {', '.join(perplexity_sources['youtube'][:3])}{'...' if len(perplexity_sources['youtube']) > 3 else ''}")
+                    if perplexity_sources.get('websites'):
+                        print(f"     Websites: {', '.join(perplexity_sources['websites'][:3])}{'...' if len(perplexity_sources['websites']) > 3 else ''}")
+                else:
+                    print(f"  ⚠️  No curated sources found for '{topic}' - using general search")
+                # Use Perplexity to search within trusted sources
+                contextualized_query = f"{topic}: {milestone.title}"
+                print(f"  🔍 Searching with Perplexity...")
+                perplexity_results = search_resources(
+                    contextualized_query,
+                    k=5,  # Get more resources for better variety
+                    trusted_sources=perplexity_sources
+                )
+                if perplexity_results and len(perplexity_results) > 0:
+                    print(f"  ✓ Found {len(perplexity_results)} specific resources from trusted sources")
+                    return milestone, [ResourceItem(**r) for r in perplexity_results]
+                else:
+                    # Fallback to default resources if Perplexity fails
+                    print(f"  ⚠️ Perplexity search returned no results, using fallback")
+                    return milestone, [
+                        ResourceItem(
+                            type="Video",
+                            url=f"https://www.youtube.com/results?search_query={milestone.title.replace(' ', '+')}",
+                            description=f"YouTube: {milestone.title}"
+                        ),
+                        ResourceItem(
+                            type="Online Course",
+                            url=f"https://www.coursera.org/search?query={milestone.title.replace(' ', '+')}",
+                            description=f"Coursera: {milestone.title}"
+                        )
+                    ]
+            except Exception as _err:
+                print(f"  ⚠️  Resource search failed for {milestone.title}: {_err}")
+                # Return default resources
+                return milestone, [
+                    ResourceItem(
+                        type="Video",
+                        url=f"https://www.youtube.com/results?search_query={milestone.title.replace(' ', '+')}",
+                        description=f"YouTube: {milestone.title}"
+                    ),
+                    ResourceItem(
+                        type="Online Course",
+                        url=f"https://www.coursera.org/search?query={milestone.title.replace(' ', '+')}",
+                        description=f"Coursera: {milestone.title}"
+                    )
+                ]
+        # Use ThreadPoolExecutor to fetch resources in parallel
+        with ThreadPoolExecutor(max_workers=3) as executor:
+            # Submit all tasks
+            milestone_data = [(m, i+1) for i, m in enumerate(learning_path.milestones)]
+            future_to_milestone = {
+                executor.submit(fetch_milestone_resources, data): data[0]
+                for data in milestone_data
+            }
+            # Collect results as they complete
+            for future in as_completed(future_to_milestone):
+                milestone, resources = future.result()
+                milestone.resources = resources
+        print(f"✅ All resources fetched!")
+        # Validate all resources to ensure they're accessible
+        print(f"🔍 Validating resource URLs...")
+        all_resources_to_validate = []
+        for milestone in learning_path.milestones:
+            for resource in milestone.resources:
+                all_resources_to_validate.append({
+                    'url': resource.url,
+                    'title': resource.description,
+                    'type': resource.type
+                })
+        # Run validation asynchronously
+        try:
+            from src.utils.resource_validator import ResourceValidator
+            validator = ResourceValidator(cache_ttl_hours=24, max_retries=2)
+            # Create event loop for async validation
+            import asyncio
+            try:
+                loop = asyncio.get_event_loop()
+            except RuntimeError:
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+            validated_resources = loop.run_until_complete(
+                validator.validate_resources(all_resources_to_validate)
+            )
+            # Update milestones with validation results and filter out invalid resources
+            resource_index = 0
+            for milestone in learning_path.milestones:
+                validated_milestone_resources = []
+                for resource in milestone.resources:
+                    if resource_index < len(validated_resources):
+                        validation = validated_resources[resource_index].get('validation', {})
+                        # Only keep resources with high confidence (valid or temporarily unavailable)
+                        if validation.get('valid', False) or validation.get('confidence', 0) >= 0.5:
+                            validated_milestone_resources.append(resource)
+                            if not validation.get('valid'):
+                                print(f"  ⚠️  Keeping potentially valid resource: {resource.url[:50]}... (confidence: {validation.get('confidence')})")
+                        else:
+                            print(f"  ❌ Filtered out invalid resource: {resource.url[:50]}... ({validation.get('error', 'unknown error')})")
+                        resource_index += 1
+                # Update milestone with validated resources
+                milestone.resources = validated_milestone_resources
+            # Get validation stats
+            stats = validator.get_validation_stats()
+            print(f"✅ Validation complete: {stats['valid_count']}/{stats['total_checked']} resources valid ({stats['success_rate']}%)")
+        except Exception as e:
+            print(f"⚠️  Resource validation failed: {e}")
+            print(f"   Continuing with unvalidated resources...")
+            import traceback
+            traceback.print_exc()
+        # Ensure each milestone has resources after validation; perform general search fallback if needed
+        for milestone in learning_path.milestones:
+            try:
+                if not milestone.resources or len(milestone.resources) == 0:
+                    print(f"  ⚠️  No valid resources after validation for: {milestone.title}. Running general search fallback...")
+                    contextualized_query = f"{topic}: {milestone.title}"
+                    general_results = search_resources(contextualized_query, k=5, trusted_sources=None)
+                    if general_results:
+                        milestone.resources = [ResourceItem(**r) for r in general_results[:3]]
+                if not milestone.resources or len(milestone.resources) == 0:
+                    print(f"  ⚠️  General search returned no results. Adding search links for: {milestone.title}")
+                    yt_q = milestone.title.replace(' ', '+')
+                    g_q = milestone.title.replace(' ', '+')
+                    milestone.resources = [
+                        ResourceItem(
+                            type="Video",
+                            url=f"https://www.youtube.com/results?search_query={yt_q}",
+                            description=f"YouTube: {milestone.title}"
+                        ),
+                        ResourceItem(
+                            type="Web Search",
+                            url=f"https://www.google.com/search?q={g_q}",
+                            description=f"Google: {milestone.title}"
+                        ),
+                    ]
+                if len(milestone.resources) < 2:
+                    print(f"  ℹ️  Topping up resources for: {milestone.title}")
+                    contextualized_query = f"{topic}: {milestone.title}"
+                    more_results = search_resources(contextualized_query, k=5, trusted_sources=None)
+                    if more_results:
+                        for r in more_results:
+                            if len(milestone.resources) >= 3:
+                                break
+                            try:
+                                milestone.resources.append(ResourceItem(**r))
+                            except Exception:
+                                continue
+            except Exception as _e:
+                print(f"  ⚠️  Post-validation fallback failed for {milestone.title}: {_e}")
+        topic_weights = {
+            milestone.title: milestone.estimated_hours
+            for milestone in learning_path.milestones
+        }
+        schedule = calculate_study_schedule(
+            weeks=adjusted_duration,
+            hours_per_week=hours_per_week,
+            topic_weights=topic_weights,
+        )
+        learning_path.schedule = schedule
+        for milestone in learning_path.milestones:
+            milestone.resources = match_resources_to_learning_style(
+                resources=milestone.resources, learning_style=learning_style
+            )
+            learning_path.total_hours = sum(
+                m.estimated_hours for m in learning_path.milestones if m.estimated_hours
+            )
+            learning_path.duration_weeks = adjusted_duration
+            learning_path.id = str(uuid.uuid4())
+            # Mark as successful
+            success = True
+            # Log success metrics
+            generation_time_ms = (time.time() - generation_start_time) * 1000
+            self.obs_manager.log_metric("path_generation_success", 1.0, {
+                "topic": topic,
+                "expertise_level": expertise_level,
+                "duration_ms": generation_time_ms,
+                "milestone_count": len(learning_path.milestones),
+                "user_id": user_id
+            })
+            self.obs_manager.log_event("path_generation_completed", {
+                "topic": topic,
+                "expertise_level": expertise_level,
+                "milestone_count": len(learning_path.milestones),
+                "total_hours": learning_path.total_hours,
+                "duration_weeks": learning_path.duration_weeks,
+                "generation_time_ms": generation_time_ms,
+                "user_id": user_id
+            })
+            # --- Cache the final result ---
+            self.document_store.cache_path(cache_key, learning_path.dict())
+            # ---------------------------
+            return learning_path
+    def save_path(
+        self, learning_path: LearningPath, output_dir: str = "learning_paths"
+    ) -> str:
+        """
+        Save a learning path to file.
+        Args:
+            learning_path (LearningPath): The learning path to save.
+            output_dir (str, optional): Directory to save the path. Defaults to "learning_paths".
+        Returns:
+            str: Path to the saved file.
+        """
+        path_dir = Path(output_dir)
+        path_dir.mkdir(exist_ok=True, parents=True)
+        safe_topic = learning_path.topic.lower().replace(" ", "_")[:30]
+        filename = f"{safe_topic}_{learning_path.id[:8]}.json"
+        file_path = path_dir / filename
+        with open(file_path, "w") as f:
+            f.write(json.dumps(learning_path.dict(), indent=2))
+        return str(file_path)
+    def load_path(
+        self, path_id: str, input_dir: str = "learning_paths"
+    ) -> Optional[LearningPath]:
+        """
+        Load a learning path from file by ID.
+        Args:
+            path_id (str): ID of the learning path to load.
+            input_dir (str, optional): Directory to search for the path. Defaults to "learning_paths".
+        Returns:
+            Optional[LearningPath]: The loaded learning path or None if not found.
+        """
+        path_dir = Path(input_dir)
+        if not path_dir.exists():
+            return None
+        for file_path in path_dir.glob(f"*_{path_id[:8]}.json"):
+            try:
+                with open(file_path, "r") as f:
+                    path_data = json.load(f)
+                    if path_data.get("id", "").startswith(path_id):
+                        return LearningPath(**path_data)
+            except Exception:
+                continue
+        return None

src/ml/context_compressor.py ADDED Viewed

	@@ -0,0 +1,182 @@

+"""
+Contextual compression module for reducing token usage in RAG.
+Contextual compression uses an LLM to extract only the most relevant sentences
+from retrieved documents, significantly reducing token count and cost.
+"""
+import os
+from typing import List, Optional
+from langchain.schema import Document
+from openai import OpenAI
+class ContextCompressor:
+    """
+    LLM-based contextual compressor for RAG optimization.
+    Takes retrieved documents and extracts only the sentences that are
+    directly relevant to the user's query, reducing tokens by 40-60%.
+    """
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: str = "gpt-3.5-turbo",
+        max_tokens: int = 500
+    ):
+        """
+        Initialize context compressor.
+        Args:
+            api_key: OpenAI API key
+            model: Model to use for compression
+            max_tokens: Maximum tokens per compressed chunk
+        """
+        self.api_key = api_key or os.getenv("OPENAI_API_KEY")
+        self.model = model
+        self.max_tokens = max_tokens
+        self.client = None
+        if self.api_key:
+            self.client = OpenAI(api_key=self.api_key)
+            print(f"✅ Context compressor initialized (model: {model})")
+        else:
+            print("❌ OPENAI_API_KEY not set. Compression disabled.")
+    def compress(
+        self,
+        query: str,
+        documents: List[Document]
+    ) -> List[Document]:
+        """
+        Compress documents by extracting only relevant content.
+        Args:
+            query: Original search query
+            documents: List of documents to compress
+        Returns:
+            Compressed documents
+        """
+        if not self.client or not documents:
+            return documents
+        compressed_docs = []
+        total_original_tokens = 0
+        total_compressed_tokens = 0
+        for doc in documents:
+            # Estimate original token count (rough: 1 token ≈ 4 chars)
+            original_tokens = len(doc.page_content) // 4
+            total_original_tokens += original_tokens
+            # Skip compression for very short documents
+            if original_tokens < 100:
+                compressed_docs.append(doc)
+                total_compressed_tokens += original_tokens
+                continue
+            try:
+                # Compress the document
+                compressed_content = self._compress_single(query, doc.page_content)
+                # Create new document with compressed content
+                compressed_doc = Document(
+                    page_content=compressed_content,
+                    metadata={
+                        **doc.metadata,
+                        'compressed': True,
+                        'original_length': len(doc.page_content),
+                        'compressed_length': len(compressed_content)
+                    }
+                )
+                compressed_docs.append(compressed_doc)
+                # Estimate compressed token count
+                compressed_tokens = len(compressed_content) // 4
+                total_compressed_tokens += compressed_tokens
+            except Exception as e:
+                print(f"⚠️  Compression failed for document: {e}")
+                # Keep original if compression fails
+                compressed_docs.append(doc)
+                total_compressed_tokens += original_tokens
+        # Calculate savings
+        if total_original_tokens > 0:
+            savings_pct = ((total_original_tokens - total_compressed_tokens) / total_original_tokens) * 100
+            print(f"📉 Compressed {total_original_tokens} → {total_compressed_tokens} tokens ({savings_pct:.1f}% reduction)")
+        return compressed_docs
+    def _compress_single(self, query: str, content: str) -> str:
+        """
+        Compress a single document.
+        Args:
+            query: Search query
+            content: Document content
+        Returns:
+            Compressed content
+        """
+        prompt = f"""You are a text compression expert. Extract only the sentences from the following text that are directly relevant to answering this query:
+Query: "{query}"
+Text:
+{content}
+Instructions:
+1. Extract ONLY sentences that directly answer or relate to the query
+2. Preserve the original wording - do not paraphrase
+3. Remove redundant or tangential information
+4. Keep the extracted sentences in their original order
+5. If multiple sentences are relevant, separate them with a space
+Relevant sentences:"""
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant that extracts relevant information."},
+                    {"role": "user", "content": prompt}
+                ],
+                temperature=0.1,  # Low temperature for consistency
+                max_tokens=self.max_tokens
+            )
+            compressed = response.choices[0].message.content.strip()
+            # If compression resulted in empty or very short text, keep original
+            if len(compressed) < 50:
+                return content
+            return compressed
+        except Exception as e:
+            print(f"⚠️  Single document compression failed: {e}")
+            return content
+    def compress_batch(
+        self,
+        query: str,
+        documents: List[Document],
+        batch_size: int = 3
+    ) -> List[Document]:
+        """
+        Compress documents in batches for efficiency.
+        Args:
+            query: Search query
+            documents: Documents to compress
+            batch_size: Number of documents to compress per API call
+        Returns:
+            Compressed documents
+        """
+        # For now, process individually
+        # TODO: Implement true batching for better efficiency
+        return self.compress(query, documents)

src/ml/embeddings.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""
+Vector embedding utilities for the AI Learning Path Generator.
+Handles text vectorization for semantic search.
+"""
+from typing import List, Dict, Any, Optional, Union
+import numpy as np
+# Import from langchain (old version compatible with Pydantic v1)
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.schema import Document
+from src.utils.config import OPENAI_API_KEY, EMBEDDING_MODEL
+class EmbeddingService:
+    """
+    Service for generating and managing text embeddings.
+    """
+    def __init__(self, api_key: Optional[str] = None):
+        """
+        Initialize the embedding service.
+        Args:
+            api_key: Optional OpenAI API key
+        """
+        self.api_key = api_key or OPENAI_API_KEY
+        # Try to use free HuggingFace embeddings first, fallback to OpenAI
+        try:
+            from langchain.embeddings import HuggingFaceEmbeddings
+            self.embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+            print("✅ EmbeddingService using free HuggingFace embeddings")
+        except ImportError:
+            if self.api_key:
+                from langchain.embeddings import OpenAIEmbeddings
+                self.embeddings = OpenAIEmbeddings(
+                    api_key=self.api_key,
+                    model=EMBEDDING_MODEL
+                )
+                print("✅ EmbeddingService using OpenAI embeddings")
+            else:
+                raise ValueError("HuggingFace embeddings not available and no OpenAI API key provided")
+        # Initialize text splitter for chunking
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=100,
+            length_function=len,
+        )
+    def embed_text(self, text: str) -> List[float]:
+        """
+        Generate embedding vector for a text string.
+        Args:
+            text: The text to embed
+        Returns:
+            Embedding vector as a list of floats
+        """
+        try:
+            return self.embeddings.embed_query(text)
+        except Exception as e:
+            raise ValueError(f"Failed to generate embedding: {str(e)}")
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """
+        Generate embeddings for multiple texts.
+        Args:
+            texts: List of texts to embed
+        Returns:
+            List of embedding vectors
+        """
+        try:
+            return self.embeddings.embed_documents(texts)
+        except Exception as e:
+            raise ValueError(f"Failed to generate document embeddings: {str(e)}")
+    def chunk_text(
+        self,
+        text: str,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> List[Document]:
+        """
+        Split text into chunks for embedding.
+        Args:
+            text: The text to split
+            metadata: Optional metadata to add to each chunk
+        Returns:
+            List of Document objects with text chunks
+        """
+        # Create a document with metadata
+        doc = Document(page_content=text, metadata=metadata or {})
+        # Split into chunks
+        chunks = self.text_splitter.split_documents([doc])
+        return chunks
+    def calculate_similarity(
+        self,
+        embedding1: List[float],
+        embedding2: List[float]
+    ) -> float:
+        """
+        Calculate cosine similarity between two embeddings.
+        Args:
+            embedding1: First embedding vector
+            embedding2: Second embedding vector
+        Returns:
+            Similarity score (0-1)
+        """
+        # Convert to numpy arrays
+        vec1 = np.array(embedding1)
+        vec2 = np.array(embedding2)
+        # Calculate cosine similarity
+        dot_product = np.dot(vec1, vec2)
+        norm1 = np.linalg.norm(vec1)
+        norm2 = np.linalg.norm(vec2)
+        if norm1 == 0 or norm2 == 0:
+            return 0  # Handle zero vectors
+        return dot_product / (norm1 * norm2)

src/ml/job_market.py ADDED Viewed

	@@ -0,0 +1,177 @@

+"""Helpers to fetch real-time job-market data using Perplexity API.
+The function `get_job_market_stats` queries Perplexity (online search model)
+with a carefully crafted prompt asking for a JSON-only response containing:
+  - open_positions: string (e.g. "15,000+")
+  - average_salary: string (e.g. "$110,000 - $150,000")
+  - trending_employers: array[str] of 3 employer names
+Perplexity provides real-time web search results, making it perfect for
+current job market data. Falls back to OpenAI if Perplexity is unavailable.
+If the API or JSON parsing fails, we return a static fallback so the UI
+still renders a snapshot.
+"""
+from __future__ import annotations
+import os
+import json
+import logging
+from typing import Dict, Any
+from openai import OpenAI
+# Initialize clients
+openai_client = None
+perplexity_client = None
+_DEFAULT_SNAPSHOT: Dict[str, Any] = {
+    "open_positions": "5,000+",
+    "average_salary": "$120,000 - $160,000",
+    "trending_employers": ["Big Tech Co", "Innovative Startup", "Data Insights Inc"],
+}
+PROMPT_TEMPLATE = (
+    "Search the web for current US job market data for '{topic}' roles. "
+    "Provide real-time statistics from job boards like LinkedIn, Indeed, Glassdoor. "
+    "Return ONLY valid JSON (no markdown, no code blocks) with keys: "
+    "open_positions (string like '15,000+'), "
+    "average_salary (string like '$110,000 - $150,000'), "
+    "trending_employers (array of 3 real company names currently hiring)."
+)
+def _call_perplexity(prompt: str, timeout: int = 45) -> str:
+    """Call Perplexity API for real-time web search results."""
+    api_key = os.getenv("PERPLEXITY_API_KEY")
+    if not api_key:
+        raise RuntimeError("PERPLEXITY_API_KEY env var not set")
+    # Perplexity uses OpenAI-compatible API
+    # Use sonar-pro model for online search
+    client = OpenAI(
+        api_key=api_key,
+        base_url="https://api.perplexity.ai"
+    )
+    completion = client.chat.completions.create(
+        model="sonar-pro",  # Online search model
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a helpful assistant that searches the web for current job market data. Always return valid JSON."
+            },
+            {"role": "user", "content": prompt},
+        ],
+        temperature=0.2,
+        max_tokens=500,
+        timeout=timeout,
+    )
+    content = completion.choices[0].message.content
+    return content
+def _call_openai(prompt: str, timeout: int = 45) -> str:
+    """Fallback to OpenAI if Perplexity is unavailable."""
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        raise RuntimeError("OPENAI_API_KEY env var not set")
+    # Get model name from environment (lowercase)
+    model = os.getenv("DEFAULT_MODEL", "gpt-4o-mini")
+    # Use OpenAI client
+    client = OpenAI(api_key=api_key)
+    completion = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant providing job market estimates."},
+            {"role": "user", "content": prompt},
+        ],
+        temperature=0.2,
+        max_tokens=300,
+        timeout=timeout,
+    )
+    content = completion.choices[0].message.content
+    return content
+def _extract_json(text: str) -> Dict[str, Any]:
+    """Extract JSON from response, handling markdown code blocks."""
+    # Remove markdown code blocks if present
+    if "```" in text:
+        parts = text.split("```")
+        for part in parts:
+            if part.strip().startswith("json"):
+                text = part[4:].strip()
+            elif part.strip() and not part.strip().startswith("```"):
+                text = part.strip()
+    # Try direct parse
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        # Try to find JSON object in text
+        start = text.find("{")
+        end = text.rfind("}") + 1
+        if start >= 0 and end > start:
+            try:
+                return json.loads(text[start:end])
+            except json.JSONDecodeError:
+                pass
+    raise ValueError("Unable to parse JSON from API response")
+def get_job_market_stats(topic: str) -> Dict[str, Any]:
+    """Return real-time job-market stats using Perplexity (with OpenAI fallback).
+    Tries Perplexity first for real-time web search results.
+    Falls back to OpenAI if Perplexity unavailable.
+    Returns default snapshot on any failure.
+    """
+    if topic == "__fallback__":
+        return _DEFAULT_SNAPSHOT.copy()
+    prompt = PROMPT_TEMPLATE.format(topic=topic)
+    # Try Perplexity first (real-time web search)
+    perplexity_key = os.getenv("PERPLEXITY_API_KEY")
+    print(f"DEBUG: Perplexity API key present: {bool(perplexity_key)}")
+    if perplexity_key:
+        try:
+            print(f"DEBUG: Attempting Perplexity search for '{topic}'...")
+            logging.info(f"Fetching job market data for '{topic}' using Perplexity (real-time search)...")
+            raw = _call_perplexity(prompt)
+            print(f"DEBUG: Perplexity raw response: {raw[:200]}...")
+            data = _extract_json(raw)
+            print(f"DEBUG: Perplexity parsed data: {data}")
+            # Basic validation
+            if not all(k in data for k in ("open_positions", "average_salary", "trending_employers")):
+                raise ValueError("Missing required keys in Perplexity response")
+            print(f"✅ Successfully fetched real-time job data via Perplexity")
+            logging.info(f"✅ Successfully fetched real-time job data via Perplexity")
+            return data
+        except Exception as exc:
+            print(f"ERROR: Perplexity failed: {exc}")
+            logging.warning(f"Perplexity job-market fetch failed: {exc}. Falling back to OpenAI...")
+    # Fallback to OpenAI
+    try:
+        logging.info(f"Fetching job market data for '{topic}' using OpenAI...")
+        raw = _call_openai(prompt)
+        data = _extract_json(raw)
+        # Basic validation
+        if not all(k in data for k in ("open_positions", "average_salary", "trending_employers")):
+            raise ValueError("Missing required keys in OpenAI response")
+        logging.info(f"✅ Successfully fetched job data via OpenAI")
+        return data
+    except Exception as exc:
+        logging.warning(f"OpenAI job-market fetch failed: {exc}. Using default snapshot.")
+        return _DEFAULT_SNAPSHOT.copy()

src/ml/model_orchestrator.py ADDED Viewed

	@@ -0,0 +1,1187 @@

+"""
+Model orchestrator for the AI Learning Path Generator.
+Handles interactions with language models and embeddings.
+"""
+from langchain.prompts import PromptTemplate, ChatPromptTemplate
+from src.utils.observability import get_observability_manager, estimate_cost
+from src.utils.cache import cache, cached
+from src.utils.helpers import optimize_prompt, count_tokens, estimate_api_cost
+from src.utils.config import (
+    OPENAI_API_KEY,
+    DEEPSEEK_API_KEY,  # Kept for legacy compatibility
+    OPENROUTER_API_KEY,  # OpenRouter support
+    DEFAULT_PROVIDER,
+    DEFAULT_MODEL,
+    OPENROUTER_FREE_MODEL,  # Free model from OpenRouter
+    MAX_TOKENS,
+    TEMPERATURE
+)
+from langchain.chains import LLMChain
+from typing import List, Dict, Any, Optional, Union, TypeVar, Type
+import json
+import os
+# Using Pydantic v1
+import pydantic
+from pydantic import BaseModel as PydanticBaseModel
+# Import from langchain (older version compatible with Pydantic v1)
+from langchain.llms import OpenAI
+from langchain.chat_models import ChatOpenAI
+# For type hints
+T = TypeVar('T', bound='BaseModel')
+class BaseModel(PydanticBaseModel):
+    """Base model using Pydantic v1."""
+    class Config:
+        arbitrary_types_allowed = True
+# We'll use only OpenAI for now to make the application work
+# Both providers will default to using OpenAI
+# Import token optimization utilities for cost savings
+# Import caching utilities to avoid repeated API calls
+# Import observability utilities for LLM monitoring
+class ModelOrchestrator:
+    """
+    Manages AI model interactions with RAG capabilities.
+    """
+    def __init__(self, api_key: Optional[str] = None, provider: Optional[str] = None):
+        print("--- ModelOrchestrator.__init__ started ---")
+        """
+        Initialize the model orchestrator with RAG capabilities.
+        Args:
+            api_key: Optional API key (if not provided, will use from environment)
+            provider: Optional provider name ('openai', 'openrouter', or 'deepseek')
+        """
+        self.provider = provider.lower() if provider else DEFAULT_PROVIDER
+        self.context = []
+        self.goal = None
+        self.planning_enabled = True
+        self.memory = []
+        # Set up API key based on selected provider
+        if self.provider == 'openai':
+            self.api_key = api_key or OPENAI_API_KEY
+            if not self.api_key:
+                raise ValueError(
+                    "OpenAI API key is required. Please provide it or set the OPENAI_API_KEY environment variable.")
+            print(
+                "--- ModelOrchestrator.__init__: Preparing to initialize ChatOpenAI ---")
+            print(
+                f"--- ModelOrchestrator.__init__: API Key: {str(self.api_key)[:15]}..., Model: {DEFAULT_MODEL}, Temp: {TEMPERATURE}, Max Tokens: {MAX_TOKENS} ---")
+            # self.llm = ChatOpenAI(
+            #     api_key=self.api_key,
+            #     model_name=DEFAULT_MODEL,
+            #     temperature=TEMPERATURE,
+            #     max_tokens=MAX_TOKENS
+            # )
+            print("--- ModelOrchestrator.__init__: ChatOpenAI initialization SKIPPED ---")
+            print(
+                "--- ModelOrchestrator.__init__: Preparing to initialize OpenAI (base_llm) ---")
+            # self.base_llm = OpenAI(
+            #     api_key=self.api_key,
+            #     model_name=DEFAULT_MODEL,
+            #     temperature=TEMPERATURE,
+            #     max_tokens=MAX_TOKENS
+            # )
+            print(
+                "--- ModelOrchestrator.__init__: OpenAI (base_llm) initialization SKIPPED ---")
+        elif self.provider == 'deepseek':
+            self.api_key = api_key or DEEPSEEK_API_KEY
+            if not self.api_key:
+                raise ValueError(
+                    "DeepSeek API key is required. Please provide it or set the DEEPSEEK_API_KEY environment variable.")
+            print("--- ModelOrchestrator.__init__: DeepSeek provider selected, client initialization SKIPPED for now ---")
+        elif self.provider == 'openrouter':
+            self.api_key = api_key or OPENROUTER_API_KEY
+            if not self.api_key:
+                raise ValueError(
+                    "OpenRouter API key is required. Please provide it or set the OPENROUTER_API_KEY environment variable.")
+            print(
+                "--- ModelOrchestrator.__init__: OpenRouter provider selected (free models available) ---")
+        # Only OpenAI, OpenRouter and DeepSeek providers are supported now
+        # (OpenAI is the primary and recommended provider)
+        else:
+            raise ValueError(
+                f"Unsupported provider: {self.provider}. Use 'openai', 'openrouter', or 'deepseek'.")
+        # Track current model name
+        self.model_name = DEFAULT_MODEL
+        # Initialize observability manager
+        self.obs_manager = get_observability_manager()
+        # Override default model if DeepSeek provider is selected
+        if self.provider == 'deepseek':
+            # Allow environment variable override but default to the official DeepSeek chat model
+            self.model_name = os.getenv("DEEPSEEK_MODEL", "deepseek-chat")
+            print(
+                f"--- ModelOrchestrator.__init__: DeepSeek provider detected, using model: {self.model_name} ---")
+        # Initialize the language model based on provider
+        print("--- ModelOrchestrator.__init__: Calling init_language_model ---")
+        self.init_language_model()
+        print("--- ModelOrchestrator.__init__ finished (LLM initialized) ---")
+    def init_language_model(self, model_name: Optional[str] = None, temperature: Optional[float] = None):
+        print(
+            f"--- ModelOrchestrator.init_language_model started (provider: {self.provider}, model: {model_name or self.model_name}) ---")
+        """
+        Initialize or switch the language model.
+        Args:
+            model_name: Name of the model to use
+            temperature: Temperature setting for the model
+        """
+        # Update model name if provided
+        if model_name:
+            self.model_name = model_name
+        temp = temperature if temperature is not None else TEMPERATURE
+        # Initialize based on provider
+        try:
+            if self.provider == 'openai':
+                print(
+                    f"--- ModelOrchestrator.init_language_model: Initializing ChatOpenAI for {self.provider} ---")
+                self.llm = ChatOpenAI(
+                    openai_api_key=self.api_key,
+                    model=self.model_name,
+                    temperature=temp,
+                    max_tokens=MAX_TOKENS,
+                )
+                print(
+                    f"--- ModelOrchestrator.init_language_model: ChatOpenAI for {self.provider} initialized ---")
+            elif self.provider == 'openrouter':
+                print(
+                    f"--- ModelOrchestrator.init_language_model: Initializing ChatOpenAI for OpenRouter ---")
+                # Use OpenRouter free model for this provider
+                model_to_use = OPENROUTER_FREE_MODEL
+                self.model_name = model_to_use  # Update model name
+                # OpenRouter uses OpenAI-compatible API with different endpoint
+                self.llm = ChatOpenAI(
+                    openai_api_key=self.api_key,
+                    openai_api_base="https://openrouter.ai/api/v1",
+                    model=model_to_use,
+                    temperature=temp,
+                    max_tokens=MAX_TOKENS,
+                )
+                print(
+                    f"--- ModelOrchestrator.init_language_model: ChatOpenAI for OpenRouter initialized with model: {model_to_use} ---")
+            elif self.provider == 'deepseek':
+                print(
+                    f"--- ModelOrchestrator.init_language_model: Initializing ChatOpenAI for {self.provider} ---")
+                # DeepSeek uses OpenAI-compatible API
+                self.llm = ChatOpenAI(
+                    openai_api_key=self.api_key,
+                    openai_api_base="https://api.deepseek.com/v1",
+                    model=self.model_name,
+                    temperature=temp,
+                    max_tokens=MAX_TOKENS,
+                )
+                print(
+                    f"--- ModelOrchestrator.init_language_model: ChatOpenAI for DeepSeek initialized ---")
+        except Exception as e:
+            print(f"Error initializing language model: {str(e)}")
+            raise
+    def switch_provider(self, provider: str, api_key: Optional[str] = None, model_name: Optional[str] = None):
+        """
+        Switch between AI providers.
+        Args:
+            provider: The provider to switch to ('openai' or 'deepseek')
+            api_key: Optional API key for the provider
+            model_name: Optional model name to use
+        Returns:
+            str: Status message indicating the provider and model in use
+        """
+        try:
+            self.provider = provider.lower()
+            # Update API key if provided
+            if api_key:
+                self.api_key = api_key
+            elif self.provider == 'openai':
+                self.api_key = OPENAI_API_KEY
+            elif self.provider == 'deepseek':
+                self.api_key = DEEPSEEK_API_KEY
+            # OpenAI is the primary provider now
+            else:
+                raise ValueError(
+                    f"Unsupported provider: {provider}. Use 'openai' or 'deepseek'.")
+            # Update model name if provided
+            if model_name:
+                self.model_name = model_name
+            # Re-initialize the language model
+            self.init_language_model()
+            return f"Switched to {self.provider} provider with model {self.model_name}"
+        except Exception as e:
+            error_msg = f"Error switching to provider {provider}: {str(e)}"
+            print(error_msg)
+            # Try to fallback to a working provider
+            if self.provider != 'openai':
+                print("Falling back to OpenAI provider")
+                return self.switch_provider('openai', OPENAI_API_KEY, model_name or DEFAULT_MODEL)
+            raise ValueError(error_msg) from e
+    def generate_response(
+        self,
+        prompt: str,
+        relevant_documents: Optional[List[str]] = None,
+        temperature: Optional[float] = None,
+        use_cache: bool = True  # NEW: Enable caching by default
+    ) -> str:
+        """
+        Generate a text response from the language model.
+        Args:
+            prompt: The prompt for the model
+            relevant_documents: Optional list of relevant documents to add context
+            temperature: Optional override for model temperature
+            use_cache: Whether to use cached responses (default: True)
+        Returns:
+            The generated response as a string
+        """
+        # Check cache first to save money! 💰
+        if use_cache:
+            cache_key = cache.cache_key(
+                "response",
+                prompt[:200],  # First 200 chars of prompt
+                str(relevant_documents)[:100] if relevant_documents else "",
+                self.model_name,
+                temperature or TEMPERATURE
+            )
+            cached_response = cache.get(cache_key)
+            if cached_response:
+                print("💰 Using cached response - $0.00 cost!")
+                return cached_response
+        # Optimize prompt to reduce token usage and save money! 💰
+        full_prompt = optimize_prompt(
+            prompt, relevant_documents, max_tokens=4000)
+        # Log token count and estimated cost for monitoring
+        input_token_count = count_tokens(full_prompt, self.model_name)
+        estimated_input_cost = estimate_api_cost(
+            input_token_count, self.model_name)
+        print(
+            f"💰 Token count: {input_token_count} (~${estimated_input_cost:.4f} input cost)")
+        try:
+            # Set up the temperature
+            temp = temperature if temperature is not None else TEMPERATURE
+            print("DEBUG: About to make OpenAI API call using direct implementation...")
+            import time
+            from src.direct_openai import generate_completion
+            try:
+                start_time = time.time()
+                print(f"DEBUG: Using model: {self.model_name}")
+                print(f"DEBUG: Prompt length: {len(full_prompt)} chars")
+                # Use our direct implementation that bypasses the client library
+                response_text = generate_completion(
+                    prompt=full_prompt,
+                    system_message="You are an expert educational AI assistant that specializes in creating personalized learning paths.",
+                    model=self.model_name,
+                    temperature=temp,
+                    max_tokens=MAX_TOKENS,
+                    timeout=120
+                )
+                latency_ms = (time.time() - start_time) * 1000
+                print(f"DEBUG: API call completed in {latency_ms:.2f}ms")
+                # Estimate output tokens and total cost
+                output_token_count = count_tokens(
+                    response_text, self.model_name) if response_text else 0
+                total_cost = estimate_cost(
+                    self.model_name, input_token_count, output_token_count)
+                # Log to observability platform (LangSmith + W&B)
+                self.obs_manager.log_llm_call(
+                    prompt=full_prompt,
+                    response=response_text,
+                    model=self.model_name,
+                    metadata={
+                        "temperature": temp,
+                        "max_tokens": MAX_TOKENS,
+                        "provider": self.provider,
+                        "cached": False
+                    },
+                    latency_ms=latency_ms,
+                    token_count=input_token_count + output_token_count,
+                    cost=total_cost
+                )
+                # Cache the response for future use (save money!)
+                if use_cache and response_text:
+                    # Cache for 24 hours
+                    cache.set(cache_key, response_text, ttl=86400)
+                return response_text
+            except Exception as e:
+                print(f"DEBUG: API call failed: {str(e)}")
+                raise
+        except Exception as e:
+            error_msg = f"Error generating response: {str(e)}"
+            print(error_msg)
+            # Try to extract more detailed error information
+            try:
+                import traceback
+                error_traceback = traceback.format_exc()
+                print(f"Error traceback:\n{error_traceback}")
+                # Check if it's an OpenAI API error
+                if hasattr(e, 'response') and hasattr(e.response, 'json'):
+                    error_data = e.response.json()
+                    print(f"OpenAI API Error: {error_data}")
+                    error_msg += f"\nAPI Error: {error_data.get('error', {}).get('message', str(e))}"
+            except Exception as inner_e:
+                print(f"Error while processing error: {str(inner_e)}")
+            raise ValueError(error_msg) from e
+    def generate_response_stream(
+        self,
+        prompt: str,
+        relevant_documents: Optional[List[str]] = None,
+        temperature: Optional[float] = None,
+    ):
+        """
+        Generate streaming response for real-time output.
+        Why streaming:
+        - Users see progress immediately
+        - Perceived performance is better
+        - Same cost as regular response!
+        - Better UX = happier users
+        Args:
+            prompt: The prompt for the model
+            relevant_documents: Optional list of relevant documents to add context
+            temperature: Optional override for model temperature
+        Yields:
+            Chunks of response text as they arrive
+        """
+        # Optimize prompt to reduce costs
+        full_prompt = optimize_prompt(
+            prompt, relevant_documents, max_tokens=4000)
+        # Log token count
+        token_count = count_tokens(full_prompt, self.model_name)
+        estimated_cost = estimate_api_cost(token_count, self.model_name)
+        print(
+            f"💰 Streaming - Token count: {token_count} (~${estimated_cost:.4f} input cost)")
+        temp = temperature if temperature is not None else TEMPERATURE
+        try:
+            from openai import OpenAI
+            client = OpenAI(api_key=OPENAI_API_KEY)
+            stream = client.chat.completions.create(
+                model=self.model_name,
+                messages=[
+                    {"role": "system", "content": "You are an expert educational AI assistant that specializes in creating personalized learning paths."},
+                    {"role": "user", "content": full_prompt}
+                ],
+                temperature=temp,
+                max_tokens=MAX_TOKENS,
+                stream=True  # Enable streaming!
+            )
+            for chunk in stream:
+                if chunk.choices[0].delta.content is not None:
+                    yield chunk.choices[0].delta.content
+        except Exception as e:
+            print(f"Streaming error: {str(e)}")
+            yield f"Error: {str(e)}"
+    def generate_structured_response(
+        self,
+        prompt: str,
+        output_schema: str,
+        relevant_documents: Optional[List[str]] = None,
+        temperature: Optional[float] = None,
+        use_cache: bool = True  # NEW: Enable caching by default
+    ) -> str:
+        """
+        Generate a structured response that follows a specific schema.
+        Args:
+            prompt: The prompt for the model
+            output_schema: The schema instructions for the output
+            relevant_documents: Optional list of relevant documents to add context
+            temperature: Optional override for model temperature
+            use_cache: Whether to use cached responses (default: True)
+        Returns:
+            The generated response as a JSON string
+        """
+        # Check cache first to save money! 💰
+        if use_cache:
+            cache_key = cache.cache_key(
+                "structured",
+                prompt[:200],  # First 200 chars of prompt
+                output_schema[:100],  # First 100 chars of schema
+                str(relevant_documents)[:100] if relevant_documents else "",
+                self.model_name,
+                temperature or 0.2
+            )
+            cached_response = cache.get(cache_key)
+            if cached_response:
+                print("💰 Using cached structured response - $0.00 cost!")
+                return cached_response
+        # Determine if this is a learning path generation
+        is_learning_path = 'LearningPath' in output_schema
+        # Prepare the prompt with schema instructions and emphasize required fields
+        required_fields_reminder = ""
+        if is_learning_path:
+            required_fields_reminder = """
+            IMPORTANT: Your response MUST include ALL of these required fields:
+            - title: String title of the learning path
+            - description: Detailed description of the learning path
+            - topic: Main topic of study
+            - expertise_level: Starting expertise level
+            - learning_style: Preferred learning style
+            - time_commitment: Weekly time commitment
+            - duration_weeks: Total duration in weeks (integer)
+            - goals: List of learning goals and objectives
+            - milestones: List of learning milestones
+            - prerequisites: List of prerequisites for this path
+            - total_hours: Total estimated hours (integer)
+            For each milestone, you MUST include:
+            - title: Short title for the milestone
+            - description: Detailed description
+            - estimated_hours: Estimated hours to complete (integer)
+            - resources: List of recommended learning resources
+            - skills_gained: List of skills gained after completion
+            """
+        schema_prompt = f"""
+        {prompt}
+        Your response should follow this schema format:
+        {output_schema}
+        {required_fields_reminder}
+        Please provide a valid JSON response that strictly follows this schema.
+        Do not include any explanatory text outside the JSON structure.
+        """
+        # Optimize prompt with context to reduce token usage 💰
+        full_prompt = optimize_prompt(
+            schema_prompt, relevant_documents, max_tokens=6000)
+        # Log token count and estimated cost
+        token_count = count_tokens(full_prompt, self.model_name)
+        estimated_cost = estimate_api_cost(token_count, self.model_name)
+        print(
+            f"💰 Structured response - Token count: {token_count} (~${estimated_cost:.4f} input cost)")
+        # Set up the temperature - lower for structured outputs
+        temp = temperature if temperature is not None else 0.2
+        # Use our direct implementation that bypasses the client library
+        import time
+        import requests
+        import traceback
+        response_text = None
+        try:
+            start_time = time.time()
+            print(
+                f"DEBUG: Generating structured response using provider: {self.provider}, model: {self.model_name}")
+            print(f"DEBUG: Prompt length: {len(full_prompt)} chars")
+            # Print the first 200 chars of the prompt for debugging
+            print(f"DEBUG: Prompt preview: {full_prompt[:200]}...")
+            # Print API key details for debugging (safely)
+            if self.provider == 'openai':
+                api_key = OPENAI_API_KEY
+                if api_key:
+                    print(
+                        f"DEBUG: Using OpenAI API key starting with: {api_key[:5]}{'*' * 10}")
+                else:
+                    print("DEBUG: WARNING - No OpenAI API key found!")
+            elif self.provider == 'deepseek':
+                api_key = DEEPSEEK_API_KEY
+                if api_key:
+                    print(
+                        f"DEBUG: Using DeepSeek API key starting with: {api_key[:5]}{'*' * 10}")
+                else:
+                    print("DEBUG: WARNING - No DeepSeek API key found!")
+            # OpenAI is the primary provider now
+            if self.provider == 'openai':
+                from src.direct_openai import generate_completion
+                print("Attempting to generate OpenAI completion...")
+                response_text = generate_completion(
+                    prompt=full_prompt,
+                    system_message="You are an expert AI assistant that specializes in generating structured responses following specified schemas. Always include all required fields in your JSON response.",
+                    model=self.model_name,
+                    temperature=temp,
+                    max_tokens=MAX_TOKENS,
+                    timeout=300  # Increase timeout for reliability
+                )
+                print(
+                    f"Successfully generated completion with {len(response_text) if response_text else 0} characters")
+            elif self.provider == 'openrouter':
+                # OpenRouter uses OpenAI-compatible API via direct_openai with custom endpoint
+                from openai import OpenAI as OpenAIClient
+                print("Attempting to generate OpenRouter completion...")
+                client = OpenAIClient(
+                    api_key=self.api_key,
+                    base_url="https://openrouter.ai/api/v1"
+                )
+                # Use free model if not specified
+                model_to_use = self.model_name if self.model_name else OPENROUTER_FREE_MODEL
+                try:
+                    completion = client.chat.completions.create(
+                        model=model_to_use,
+                        messages=[
+                            {"role": "system", "content": "You are an expert AI assistant that specializes in generating structured responses following specified schemas. Always include all required fields in your JSON response."},
+                            {"role": "user", "content": full_prompt}
+                        ],
+                        temperature=temp,
+                        max_tokens=MAX_TOKENS,
+                        timeout=300
+                    )
+                    response_text = completion.choices[0].message.content
+                    print(
+                        f"Successfully generated OpenRouter completion with {len(response_text) if response_text else 0} characters")
+                except Exception as e:
+                    print(f"Error calling OpenRouter API: {e}")
+                    response_text = None
+            elif self.provider == 'deepseek':
+                response_text = self._deepseek_completion(
+                    full_prompt,
+                    temp,
+                    system_message="You are an expert AI assistant that specializes in generating structured responses following specified schemas. Always include all required fields in your JSON response."
+                )
+            # OpenAI is the primary provider now
+            else:
+                raise ValueError(f"Unknown provider: {self.provider}")
+            print(
+                f"DEBUG: API call completed in {time.time() - start_time:.2f} seconds")
+            if response_text:
+                print(
+                    f"DEBUG: Received response with length: {len(response_text)} chars")
+                print(f"DEBUG: Response preview: {response_text[:100]}...")
+            else:
+                print("DEBUG: WARNING - Received empty response from API")
+                if is_learning_path:
+                    # Return a fallback learning path
+                    return self._create_fallback_learning_path()
+                else:
+                    # Return a fallback generic response
+                    return json.dumps({
+                        "summary": "Sorry, I encountered an error retrieving information.",
+                        "key_concepts": ["Error occurred while processing your request"],
+                        "learning_path": ["Please try again with a different query"],
+                        "resources": [],
+                        "code_examples": [],
+                        "advanced_topics": []
+                    })
+        except Exception as e:
+            print(f"DEBUG: Structured response generation failed: {str(e)}")
+            print(traceback.format_exc())
+            if is_learning_path:
+                # Return a fallback learning path
+                return self._create_fallback_learning_path()
+            else:
+                # Return a fallback generic response
+                return json.dumps({
+                    "summary": f"Sorry, I encountered an error: {str(e)}",
+                    "key_concepts": ["Unable to extract structured information"],
+                    "learning_path": ["Please try asking in a different way"],
+                    "resources": [],
+                    "code_examples": [],
+                    "advanced_topics": [],
+                    "career_applications": []
+                })
+        # Extract JSON from the response
+        try:
+            # Try to find JSON in the response (may be enclosed in ```json blocks)
+            if "```json" in response_text:
+                json_start = response_text.find("```json") + 7
+                json_end = response_text.find("```", json_start)
+                json_str = response_text[json_start:json_end].strip()
+            elif "```" in response_text:
+                json_start = response_text.find("```") + 3
+                json_end = response_text.find("```", json_start)
+                json_str = response_text[json_start:json_end].strip()
+            else:
+                json_str = response_text.strip()
+            # Validate JSON
+            data = json.loads(json_str)
+            # If expecting a learning path but received a list or wrong type, fallback
+            if is_learning_path and not isinstance(data, dict):
+                print(
+                    "DEBUG: Expected learning path dict but received different type, returning fallback path.")
+                return self._create_fallback_learning_path()
+            # For learning paths, validate that all required fields are present
+            if is_learning_path:
+                required_fields = [
+                    'title', 'description', 'topic', 'expertise_level',
+                    'learning_style', 'time_commitment', 'duration_weeks',
+                    'goals', 'milestones', 'prerequisites', 'total_hours'
+                ]
+                missing_fields = [
+                    field for field in required_fields if field not in data]
+                if missing_fields:
+                    print(
+                        f"DEBUG: Missing required fields in learning path: {missing_fields}")
+                    # If any fields are missing, add them with default values
+                    for field in missing_fields:
+                        if field == 'title':
+                            data['title'] = data.get(
+                                'topic', 'Learning Path') + ' Learning Path'
+                        elif field == 'description':
+                            data[
+                                'description'] = f"A comprehensive learning path for {data.get('topic', 'the requested topic')}."
+                        elif field == 'topic':
+                            data['topic'] = data.get(
+                                'title', 'General Learning').replace(' Learning Path', '')
+                        elif field == 'expertise_level':
+                            data['expertise_level'] = 'beginner'
+                        elif field == 'learning_style':
+                            data['learning_style'] = 'visual'
+                        elif field == 'time_commitment':
+                            data['time_commitment'] = 'moderate'
+                        elif field == 'duration_weeks':
+                            data['duration_weeks'] = 8
+                        elif field == 'goals':
+                            data['goals'] = [
+                                f"Master {data.get('topic', 'the subject')}"]
+                        elif field == 'milestones':
+                            data['milestones'] = [{
+                                'title': 'Getting Started',
+                                'description': f"Introduction to {data.get('topic', 'the subject')}",
+                                'estimated_hours': 10,
+                                'resources': [{'name': 'Online Documentation', 'url': '', 'type': 'documentation'}],
+                                'skills_gained': [f"Basic {data.get('topic', 'subject')} knowledge"]
+                            }]
+                        elif field == 'prerequisites':
+                            data['prerequisites'] = ['None']
+                        elif field == 'total_hours':
+                            data['total_hours'] = 40
+                # Also check that each milestone has the required fields
+                if 'milestones' in data and isinstance(data['milestones'], list):
+                    milestone_required_fields = [
+                        'title', 'description', 'estimated_hours', 'resources', 'skills_gained']
+                    for i, milestone in enumerate(data['milestones']):
+                        milestone_missing_fields = [
+                            field for field in milestone_required_fields if field not in milestone]
+                        if milestone_missing_fields:
+                            print(
+                                f"DEBUG: Missing required fields in milestone {i+1}: {milestone_missing_fields}")
+                            # Add missing fields with default values
+                            for field in milestone_missing_fields:
+                                if field == 'title':
+                                    milestone['title'] = f"Milestone {i+1}"
+                                elif field == 'description':
+                                    milestone['description'] = f"A key learning milestone in this path."
+                                elif field == 'estimated_hours':
+                                    milestone['estimated_hours'] = 10
+                                elif field == 'resources':
+                                    milestone['resources'] = [
+                                        {'name': 'Online Resource', 'url': '', 'type': 'article'}]
+                                elif field == 'skills_gained':
+                                    milestone['skills_gained'] = [
+                                        f"Skills related to {data.get('topic', 'the subject')}"]
+            # Cache the successful response for future use (save money!)
+            json_result = json.dumps(data)
+            if use_cache:
+                # Cache for 24 hours
+                cache.set(cache_key, json_result, ttl=86400)
+            return json_result
+        except Exception as e:
+            print(f"DEBUG: Error parsing initial JSON: {str(e)}")
+            # First cleanup attempt - remove markdown code block wrappers
+            cleaned_response = response_text.strip()
+            # Remove ```json...``` or ```...``` markdown wrappers
+            import re
+            markdown_match = re.search(
+                r'```(?:json)?\s*(.*?)\s*```', response_text, re.DOTALL)
+            if markdown_match:
+                cleaned_response = markdown_match.group(1).strip()
+                print(f"DEBUG: Extracted content from markdown code block")
+            # Remove common text prefixes
+            for prefix in ["+", "-", "*", "#", "Response:", "JSON:", "Here's", "```", "```json"]:
+                if cleaned_response.startswith(prefix):
+                    cleaned_response = cleaned_response[len(prefix):].strip()
+            try:
+                # Try to parse the cleaned response
+                data = json.loads(cleaned_response)
+                print(f"DEBUG: Successfully parsed cleaned JSON")
+                return json.dumps(data)
+            except Exception as e2:
+                print(f"DEBUG: Error parsing cleaned JSON: {str(e2)}")
+                # Second attempt - find the main JSON object (start with first { and match closing })
+                try:
+                    first_brace = cleaned_response.find('{')
+                    if first_brace != -1:
+                        # Count braces to find the matching closing brace
+                        brace_count = 0
+                        end_pos = first_brace
+                        for i in range(first_brace, len(cleaned_response)):
+                            if cleaned_response[i] == '{':
+                                brace_count += 1
+                            elif cleaned_response[i] == '}':
+                                brace_count -= 1
+                                if brace_count == 0:
+                                    end_pos = i + 1
+                                    break
+                        potential_json = cleaned_response[first_brace:end_pos]
+                        print(
+                            f"DEBUG: Extracted JSON from position {first_brace} to {end_pos} ({len(potential_json)} chars)")
+                        data = json.loads(potential_json)
+                        print(f"DEBUG: Successfully parsed extracted JSON")
+                        return json.dumps(data)
+                except Exception as e3:
+                    print(f"DEBUG: Error in brace matching: {str(e3)}")
+                # Return a fallback JSON as last resort instead of raising an exception
+                print("DEBUG: Returning fallback JSON structure due to parsing failure")
+                return json.dumps({
+                    "summary": "Failed to parse the AI's response. The content might not be in the expected JSON format.",
+                    "key_concepts": ["JSON parsing error"],
+                    "learning_path": ["Please try a different query or check the AI provider's output directly if possible."],
+                    "resources": [],
+                    "code_examples": [],
+                    "advanced_topics": [],
+                    "error_details": "The AI's response could not be successfully parsed as JSON after multiple attempts."
+                })
+                return json.dumps({
+                    "summary": f"I processed your request but encountered a formatting issue. Your question was about: {response_text[:100]}...",
+                    "key_concepts": ["Unable to extract structured information"],
+                    "learning_path": ["Please try asking in a different way"],
+                    "resources": [],
+                    "code_examples": [],
+                    "advanced_topics": [],
+                    "career_applications": []
+                })
+    def _deepseek_completion(self, prompt: str, temperature: float, system_message: str = None):
+        """Call DeepSeek API for chat completion.
+        The helper explicitly adds a **system** message reminding the model to comply with the
+        schema and strictly return JSON. We have observed that without this guard-rail the
+        DeepSeek model occasionally omits required fields which later causes Pydantic
+        validation failures. Passing a clear system prompt greatly increases response
+        reliability.
+        """
+        import requests
+        import traceback
+        import json
+        import time
+        api_key = DEEPSEEK_API_KEY
+        url = "https://api.deepseek.com/v1/chat/completions"
+        system_msg = (
+            system_message
+            or "You are an expert AI assistant that MUST output ONLY valid JSON strictly "
+            "following the user's schema instructions. Do not add any commentary, markdown "
+            "code fences or explanations."
+        )
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        }
+        payload_base = {
+            "model": self.model_name if hasattr(self, "model_name") else "deepseek-chat",
+            "temperature": temperature or 0.2,
+            "max_tokens": MAX_TOKENS,
+        }
+        def _post(messages):
+            start = time.time()
+            pl = {**payload_base, "messages": messages}
+            print(
+                f"DEBUG: DeepSeek request with {len(json.dumps(pl))} chars payload, "
+                f"messages={len(messages)}"
+            )
+            resp = requests.post(url, headers=headers, json=pl, timeout=150)
+            resp.raise_for_status()
+            data = resp.json()
+            content = data["choices"][0]["message"]["content"]
+            print(
+                f"DEBUG: DeepSeek response in {time.time()-start:.2f}s with "
+                f"{len(content)} chars"
+            )
+            return content
+        try:
+            # 1st attempt – full prompt
+            messages = [
+                {"role": "system", "content": system_msg},
+                {"role": "user", "content": prompt},
+            ]
+            response_text = _post(messages)
+            # Quick JSON sanity check; if it fails we'll retry with a reduced prompt.
+            try:
+                json.loads(response_text.strip("`"))
+                return response_text
+            except Exception:
+                print(
+                    "DEBUG: DeepSeek response not valid JSON, retrying with simplified instructions...")
+            # 2nd attempt – simplified prompt focusing on schema only
+            simple_prompt = (
+                "Provide ONLY the JSON that matches the schema. Do not wrap it in anything."
+            )
+            messages_retry = [
+                {"role": "system", "content": system_msg},
+                {"role": "user", "content": prompt + "\n\n" + simple_prompt},
+            ]
+            return _post(messages_retry)
+        except Exception as e:
+            print(f"DEBUG: DeepSeek API call failed: {str(e)}")
+            print(traceback.format_exc())
+            raise
+    def _create_fallback_learning_path(self):
+        """
+        Create a fallback learning path with default values when generation fails.
+        """
+        import datetime
+        import uuid
+        fallback_path = {
+            "id": str(uuid.uuid4()),
+            "title": "General Learning Path",
+            "description": "A default learning path created when specific generation failed.",
+            "topic": "General Topic",
+            "expertise_level": "beginner",
+            "learning_style": "visual",
+            "time_commitment": "moderate",
+            "duration_weeks": 8,
+            "goals": ["Build foundational knowledge", "Develop practical skills"],
+            "milestones": [
+                {
+                    "title": "Getting Started",
+                    "description": "Introduction to the fundamentals.",
+                    "estimated_hours": 10,
+                    "resources": [
+                        {"name": "Online Documentation",
+                            "url": "", "type": "documentation"}
+                    ],
+                    "skills_gained": ["Basic knowledge"]
+                },
+                {
+                    "title": "Core Concepts",
+                    "description": "Understanding core principles and practices.",
+                    "estimated_hours": 15,
+                    "resources": [
+                        {"name": "Online Tutorial", "url": "", "type": "tutorial"}
+                    ],
+                    "skills_gained": ["Fundamental concepts"]
+                }
+            ],
+            "prerequisites": ["None"],
+            "total_hours": 25,
+            "created_at": datetime.datetime.now().isoformat()
+        }
+        return json.dumps(fallback_path)
+    def analyze_difficulty(self, content: str) -> float:
+        """
+        Analyze the difficulty level of educational content.
+        Args:
+            content: The content to analyze
+        Returns:
+            Difficulty score between 0 (easiest) and 1 (hardest)
+        """
+        prompt = f"""
+        Analyze the following educational content and rate its difficulty level on a scale from 0 to 1,
+        where 0 is very basic (elementary level) and 1 is extremely advanced (expert/PhD level).
+        Content:
+        {content[:1000]}...
+        Consider factors like:
+        - Technical vocabulary and jargon
+        - Complexity of concepts
+        - Prerequisites required to understand
+        - Density of information
+        Return only a numeric score between 0 and 1 with up to 2 decimal places.
+        """
+        response = self.generate_response(prompt, temperature=0.1)
+        # Extract the numeric score
+        try:
+            # Look for patterns like "0.75" or "Difficulty: 0.75"
+            import re
+            matches = re.findall(r"([0-9]\.[0-9]{1,2})", response)
+            if matches:
+                score = float(matches[0])
+                return max(0.0, min(1.0, score))  # Ensure between 0 and 1
+            # If no decimal found, look for whole numbers
+            matches = re.findall(r"^([0-9])$", response)
+            if matches:
+                score = float(matches[0])
+                return max(0.0, min(1.0, score))  # Ensure between 0 and 1
+            return 0.5  # Default to middle difficulty
+        except Exception:
+            return 0.5  # Default to middle difficulty
+    def generate_resource_recommendations(
+        self,
+        topic: str,
+        learning_style: str,
+        expertise_level: str,
+        count: int = 5
+    ) -> List[Dict[str, Any]]:
+        """
+        Generate tailored resource recommendations for a topic.
+        Args:
+            topic: The topic to find resources for
+            learning_style: Preferred learning style
+            expertise_level: User's expertise level
+            count: Number of resources to recommend
+        Returns:
+            List of resource dictionaries
+        """
+        prompt = f"""
+        Generate {count} learning resources for someone studying {topic}.
+        Their learning style is {learning_style} and their expertise level is {expertise_level}.
+        IMPORTANT: All resources MUST be in English only. Do not include resources in Portuguese, Spanish, or any other language.
+        For each resource, include:
+        1. Title (in English)
+        2. Type (video, article, book, interactive, course, documentation, podcast, project)
+        3. Description (1-2 sentences in English)
+        4. Difficulty level (beginner, intermediate, advanced, expert)
+        5. Estimated time to complete (in minutes or hours)
+        6. URL (create a realistic but fictional URL if needed)
+        Provide the response as a JSON array of resource objects. All text fields must be in English.
+        """
+        response = self.generate_structured_response(
+            prompt=prompt,
+            output_schema="""
+            [
+              {
+                "title": "string",
+                "type": "string",
+                "description": "string",
+                "difficulty": "string",
+                "time_estimate": "string",
+                "url": "string"
+              }
+            ]
+            """,
+            temperature=0.7
+        )
+        try:
+            resources = json.loads(response)
+            return resources
+        except Exception:
+            # Fallback to empty list on parsing error
+            return []
+    def generate_path(self, topic: str, expertise_level: str, learning_style: str, context: List[str] = None) -> str:
+        """
+        Generate a learning path based on user preferences and context using RAG.
+        Args:
+            topic: The learning topic
+            expertise_level: User's expertise level
+            learning_style: User's preferred learning style
+            context: Optional context to consider
+        Returns:
+            Generated learning path
+        """
+        # Combine provided context with stored context
+        full_context = self.context + (context or [])
+        # Plan if planning is enabled
+        if self.planning_enabled and hasattr(self, '_plan_path_generation'):
+            self._plan_path_generation(
+                topic, expertise_level, learning_style, full_context)
+        # Generate path with context
+        prompt = f"""Generate a learning path for the following topic:
+        Topic: {topic}
+        Expertise Level: {expertise_level}
+        Learning Style: {learning_style}
+        Context:
+        {' '.join(full_context)}
+        Previous answers:
+        {' '.join(self.memory)}
+        Generate a structured learning path with milestones and resources.
+        """
+        path = self._generate_text(prompt)
+        # Store path in memory
+        self.memory.append(
+            f"Generated path for {topic} with {expertise_level} level and {learning_style} style")
+        return path
+    def generate_answer(self, question: str, context: Optional[List[str]] = None, temperature: Optional[float] = None) -> str:
+        """
+        Generate an answer to a question using RAG and agentic behavior.
+        Args:
+            question: The question to answer
+            context: Optional context to consider
+            temperature: Optional temperature for response generation
+        Returns:
+            Generated answer
+        """
+        # Combine provided context with stored context
+        full_context = self.context + (context or [])
+        # Plan if planning is enabled
+        if self.planning_enabled and hasattr(self, '_plan_answer_generation'):
+            self._plan_answer_generation(question, full_context)
+        # Generate answer with context
+        prompt = f"""Answer the following question based on the provided context:
+        Context:
+        {' '.join(full_context)}
+        Question: {question}"""
+        # Store question in memory
+        self.memory.append(f"Question: {question}")
+        # Generate and return the answer
+        return self.generate_response(prompt, relevant_documents=full_context, temperature=temperature)
+    def _plan_answer_generation(self, question: str, context: List[str]) -> None:
+        """
+        Plan the answer generation process.
+        Args:
+            question: The question to answer
+            context: Context information
+        """
+        # Analyze the question to determine the best approach
+        question_lower = question.lower()
+        # Determine if we need more context
+        if len(context) < 2 and not any(keyword in question_lower for keyword in ["what", "how", "why", "when", "where", "who"]):
+            self.context.append("Need more context for this question")
+        # Determine the type of question
+        if "how" in question_lower:
+            self.context.append("This is a procedural question")
+        elif "why" in question_lower:
+            self.context.append("This is an explanatory question")
+        elif "what" in question_lower:
+            self.context.append("This is a definitional question")
+        elif "compare" in question_lower or "difference" in question_lower:
+            self.context.append("This is a comparative question")
+    def _plan_path_generation(self, topic: str, expertise_level: str, learning_style: str, context: List[str]) -> None:
+        """
+        Plan the learning path generation process.
+        Args:
+            topic: The learning topic
+            expertise_level: User's expertise level
+            learning_style: User's preferred learning style
+            context: Context information
+        """
+        # Determine the appropriate depth and breadth based on expertise level
+        if expertise_level == "beginner":
+            self.context.append("Focus on fundamentals and basic concepts")
+        elif expertise_level == "intermediate":
+            self.context.append(
+                "Include practical applications and case studies")
+        elif expertise_level == "advanced":
+            self.context.append(
+                "Include advanced techniques and research papers")
+        # Adjust for learning style
+        if learning_style == "visual":
+            self.context.append("Prioritize video resources and diagrams")
+        elif learning_style == "auditory":
+            self.context.append("Prioritize podcasts and audio lectures")
+        elif learning_style == "reading":
+            self.context.append("Prioritize books and articles")
+        elif learning_style == "kinesthetic":
+            self.context.append("Prioritize hands-on projects and exercises")