File size: 3,343 Bytes
a522797
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""

Configuration for different compute environments

Switch between configs based on available hardware

"""

import torch
import os

class Config:
    """Base configuration"""
    # Data paths
    CURRICULUM_DATA = "neu_graph_analyzed_clean.pkl"
    AGENT_DB = "curriculum_agent.db"
    
    # Model settings (override in subclasses)
    LLM_MODEL = None
    EMBEDDING_MODEL = None
    DEVICE = None
    QUANTIZATION = None
    
    # Agent settings
    AGENT_CYCLE_MINUTES = 60
    MAX_COURSES_PER_SEMESTER = 4
    
    @classmethod
    def get_device(cls):
        if cls.DEVICE == "auto":
            return torch.device("cuda" if torch.cuda.is_available() else "cpu")
        return torch.device(cls.DEVICE)


class H200Config(Config):
    """Config for H200 GPU (80GB)"""
    LLM_MODEL = "meta-llama/Llama-3.1-70B-Instruct"
    EMBEDDING_MODEL = "BAAI/bge-large-en-v1.5"
    DEVICE = "cuda"
    QUANTIZATION = None  # No need to quantize with 80GB


class ColabConfig(Config):
    """Config for Google Colab T4 (16GB)"""
    LLM_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
    EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"
    DEVICE = "cuda"
    QUANTIZATION = "4bit"


class LocalGPUConfig(Config):
    """Config for local GPU (8-12GB)"""
    LLM_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
    EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
    DEVICE = "cuda"
    QUANTIZATION = "4bit"


class CPUConfig(Config):
    """Config for CPU only (no GPU)"""
    LLM_MODEL = "microsoft/phi-2"  # 2.7B params
    EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # 22M params
    DEVICE = "cpu"
    QUANTIZATION = None


class MinimalConfig(Config):
    """Minimal config for testing/development"""
    LLM_MODEL = None  # No LLM, just embeddings
    EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
    DEVICE = "cpu"
    QUANTIZATION = None


def get_config():
    """

    Auto-detect best configuration

    """
    # Check environment variable first
    env_config = os.environ.get("CURRICULUM_CONFIG", "").lower()
    
    if env_config == "h200":
        return H200Config
    elif env_config == "colab":
        return ColabConfig
    elif env_config == "cpu":
        return CPUConfig
    elif env_config == "minimal":
        return MinimalConfig
    
    # Auto-detect based on hardware
    if torch.cuda.is_available():
        gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9  # GB
        
        if gpu_mem > 70:
            print(f"Detected high-end GPU ({gpu_mem:.1f}GB), using H200Config")
            return H200Config
        elif gpu_mem > 14:
            print(f"Detected mid-range GPU ({gpu_mem:.1f}GB), using ColabConfig")
            return ColabConfig
        else:
            print(f"Detected small GPU ({gpu_mem:.1f}GB), using LocalGPUConfig")
            return LocalGPUConfig
    else:
        print("No GPU detected, using CPUConfig")
        return CPUConfig


# Usage example
if __name__ == "__main__":
    config = get_config()
    print(f"Selected config: {config.__name__}")
    print(f"LLM: {config.LLM_MODEL}")
    print(f"Embedder: {config.EMBEDDING_MODEL}")
    print(f"Device: {config.DEVICE}")
    print(f"Quantization: {config.QUANTIZATION}")