File size: 4,572 Bytes
40d7073
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
/**
 * Multi-Algorithm Learning Engine
 * Supports 9 RL algorithms for intelligent hooks optimization
 */
export type LearningAlgorithm = 'q-learning' | 'sarsa' | 'double-q' | 'actor-critic' | 'ppo' | 'decision-transformer' | 'monte-carlo' | 'td-lambda' | 'dqn';
export type TaskType = 'agent-routing' | 'error-avoidance' | 'confidence-scoring' | 'trajectory-learning' | 'context-ranking' | 'memory-recall';
export interface LearningConfig {
    algorithm: LearningAlgorithm;
    learningRate: number;
    discountFactor: number;
    epsilon: number;
    lambda?: number;
    clipRange?: number;
    entropyCoef?: number;
    sequenceLength?: number;
}
export interface Experience {
    state: string;
    action: string;
    reward: number;
    nextState: string;
    done: boolean;
    timestamp?: number;
}
export interface LearningTrajectory {
    experiences: Experience[];
    totalReward: number;
    completed: boolean;
}
export interface AlgorithmStats {
    algorithm: LearningAlgorithm;
    updates: number;
    avgReward: number;
    convergenceScore: number;
    lastUpdate: number;
}
export declare class LearningEngine {
    private configs;
    private qTables;
    private qTables2;
    private eligibilityTraces;
    private actorWeights;
    private criticValues;
    private trajectories;
    private stats;
    private rewardHistory;
    constructor();
    /**
     * Configure algorithm for a specific task type
     */
    configure(task: TaskType, config: Partial<LearningConfig>): void;
    /**
     * Get current configuration for a task
     */
    getConfig(task: TaskType): LearningConfig;
    /**
     * Update Q-value using the appropriate algorithm
     */
    update(task: TaskType, experience: Experience): number;
    /**
     * Get best action for a state
     */
    getBestAction(task: TaskType, state: string, actions: string[]): {
        action: string;
        confidence: number;
    };
    /**
     * Get action probabilities (for Actor-Critic and PPO)
     */
    getActionProbabilities(state: string, actions: string[]): Map<string, number>;
    /**
     * Standard Q-Learning: Q(s,a) += α * (r + γ * max_a' Q(s',a') - Q(s,a))
     */
    private qLearningUpdate;
    /**
     * SARSA: On-policy, more conservative
     * Q(s,a) += α * (r + γ * Q(s',a') - Q(s,a))
     */
    private sarsaUpdate;
    /**
     * Double Q-Learning: Reduces overestimation bias
     * Uses two Q-tables, randomly updates one using the other for target
     */
    private doubleQUpdate;
    /**
     * Actor-Critic: Policy gradient with value baseline
     */
    private actorCriticUpdate;
    /**
     * PPO: Clipped policy gradient for stable training
     */
    private ppoUpdate;
    /**
     * TD(λ): Temporal difference with eligibility traces
     */
    private tdLambdaUpdate;
    /**
     * Monte Carlo: Full episode learning
     */
    private monteCarloUpdate;
    /**
     * Decision Transformer: Sequence modeling for trajectories
     */
    private decisionTransformerUpdate;
    /**
     * DQN: Deep Q-Network (simplified without actual neural network)
     * Uses experience replay and target network concepts
     */
    private dqnUpdate;
    private getQTable;
    private getQTable2;
    private getEligibilityTraces;
    private softmaxConfidence;
    private addToCurrentTrajectory;
    private sampleFromReplay;
    private updateStats;
    /**
     * Get statistics for all algorithms
     */
    getStats(): Map<LearningAlgorithm, AlgorithmStats>;
    /**
     * Get statistics summary
     */
    getStatsSummary(): {
        bestAlgorithm: LearningAlgorithm;
        totalUpdates: number;
        avgReward: number;
        algorithms: AlgorithmStats[];
    };
    /**
     * Export state for persistence
     */
    export(): {
        qTables: Record<string, Record<string, number>>;
        qTables2: Record<string, Record<string, number>>;
        criticValues: Record<string, number>;
        trajectories: LearningTrajectory[];
        stats: Record<string, AlgorithmStats>;
        configs: Record<string, LearningConfig>;
        rewardHistory: number[];
    };
    /**
     * Import state from persistence
     */
    import(data: ReturnType<LearningEngine['export']>): void;
    /**
     * Clear all learning data
     */
    clear(): void;
    /**
     * Get available algorithms
     */
    static getAlgorithms(): {
        algorithm: LearningAlgorithm;
        description: string;
        bestFor: string;
    }[];
}
export default LearningEngine;
//# sourceMappingURL=learning-engine.d.ts.map