Spaces:
Sleeping
Sleeping
| /** | |
| * AutoTune Feedback Loop Engine | |
| * | |
| * Collects quality signals (user ratings + automated heuristics) after each response, | |
| * stores them alongside the parameters that produced them, and uses Exponential Moving | |
| * Average to learn optimal parameter adjustments per context type over time. | |
| * | |
| * The learned adjustments blend into AutoTune's parameter selection β more feedback data | |
| * means more influence, capped at 50% weight so base profiles remain the foundation. | |
| */ | |
| import type { AutoTuneParams, ContextType } from './autotune' | |
| // ββ Types ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| export interface ResponseHeuristics { | |
| responseLength: number | |
| repetitionScore: number // 0.0 = no repetition, 1.0 = very repetitive | |
| averageSentenceLength: number | |
| vocabularyDiversity: number // unique words / total words ratio | |
| } | |
| export interface FeedbackRecord { | |
| messageId: string | |
| timestamp: number | |
| contextType: ContextType | |
| model: string | |
| persona: string | |
| params: AutoTuneParams | |
| rating: 1 | -1 // thumbs up / thumbs down | |
| heuristics: ResponseHeuristics | |
| } | |
| export interface LearnedProfile { | |
| contextType: ContextType | |
| sampleCount: number | |
| positiveCount: number | |
| negativeCount: number | |
| positiveParams: AutoTuneParams // EMA of params from upvoted responses | |
| negativeParams: AutoTuneParams // EMA of params from downvoted responses | |
| adjustments: Partial<AutoTuneParams> // computed delta to apply | |
| lastUpdated: number | |
| } | |
| export interface FeedbackState { | |
| history: FeedbackRecord[] | |
| learnedProfiles: Record<ContextType, LearnedProfile> | |
| } | |
| // ββ Constants ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const EMA_ALPHA = 0.3 // Weight for new observations (higher = faster learning) | |
| const MAX_HISTORY = 500 // Cap feedback history to prevent unbounded growth | |
| const MIN_SAMPLES_TO_APPLY = 3 // Minimum feedback samples before learned adjustments kick in | |
| const MAX_LEARNED_WEIGHT = 0.5 // Maximum influence of learned adjustments (50%) | |
| const SAMPLES_FOR_MAX_WEIGHT = 20 // Samples needed to reach maximum weight | |
| // Neutral starting params (middle of each range, used to initialize EMA) | |
| const NEUTRAL_PARAMS: AutoTuneParams = { | |
| temperature: 0.7, | |
| top_p: 0.9, | |
| top_k: 50, | |
| frequency_penalty: 0.2, | |
| presence_penalty: 0.2, | |
| repetition_penalty: 1.1 | |
| } | |
| // ββ Heuristics Engine ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Compute automated quality heuristics for a response. | |
| * These supplement user ratings to provide signal even without explicit feedback. | |
| */ | |
| export function computeHeuristics(response: string): ResponseHeuristics { | |
| const responseLength = response.length | |
| // Repetition score: check for repeated n-grams | |
| const repetitionScore = computeRepetitionScore(response) | |
| // Average sentence length | |
| const sentences = response.split(/[.!?]+/).filter(s => s.trim().length > 0) | |
| const averageSentenceLength = sentences.length > 0 | |
| ? sentences.reduce((sum, s) => sum + s.trim().split(/\s+/).length, 0) / sentences.length | |
| : 0 | |
| // Vocabulary diversity: unique words / total words | |
| const words = response.toLowerCase().split(/\s+/).filter(w => w.length > 0) | |
| const uniqueWords = new Set(words) | |
| const vocabularyDiversity = words.length > 0 ? uniqueWords.size / words.length : 1 | |
| return { | |
| responseLength, | |
| repetitionScore, | |
| averageSentenceLength, | |
| vocabularyDiversity | |
| } | |
| } | |
| /** | |
| * Detect repetition by checking for repeated 3-gram sequences. | |
| * Returns 0.0 (no repetition) to 1.0 (extremely repetitive). | |
| */ | |
| function computeRepetitionScore(text: string): number { | |
| const words = text.toLowerCase().split(/\s+/).filter(w => w.length > 0) | |
| if (words.length < 6) return 0 | |
| const trigrams = new Map<string, number>() | |
| let totalTrigrams = 0 | |
| for (let i = 0; i <= words.length - 3; i++) { | |
| const trigram = `${words[i]} ${words[i + 1]} ${words[i + 2]}` | |
| trigrams.set(trigram, (trigrams.get(trigram) || 0) + 1) | |
| totalTrigrams++ | |
| } | |
| if (totalTrigrams === 0) return 0 | |
| // Count trigrams that appear more than once | |
| let repeatedCount = 0 | |
| trigrams.forEach((count) => { | |
| if (count > 1) { | |
| repeatedCount += count - 1 | |
| } | |
| }) | |
| return Math.min(repeatedCount / totalTrigrams, 1.0) | |
| } | |
| // ββ Learning Engine ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Create initial empty feedback state. | |
| */ | |
| export function createInitialFeedbackState(): FeedbackState { | |
| const contexts: ContextType[] = ['code', 'creative', 'analytical', 'conversational', 'chaotic'] | |
| const learnedProfiles: Record<string, LearnedProfile> = {} | |
| for (const ctx of contexts) { | |
| learnedProfiles[ctx] = { | |
| contextType: ctx, | |
| sampleCount: 0, | |
| positiveCount: 0, | |
| negativeCount: 0, | |
| positiveParams: { ...NEUTRAL_PARAMS }, | |
| negativeParams: { ...NEUTRAL_PARAMS }, | |
| adjustments: {}, | |
| lastUpdated: 0 | |
| } | |
| } | |
| return { | |
| history: [], | |
| learnedProfiles: learnedProfiles as Record<ContextType, LearnedProfile> | |
| } | |
| } | |
| /** | |
| * Process a new feedback record and update learned profiles using EMA. | |
| * Returns the updated feedback state. | |
| */ | |
| export function processFeedback( | |
| state: FeedbackState, | |
| record: FeedbackRecord | |
| ): FeedbackState { | |
| // Add to history (capped) | |
| const newHistory = [...state.history, record] | |
| if (newHistory.length > MAX_HISTORY) { | |
| newHistory.splice(0, newHistory.length - MAX_HISTORY) | |
| } | |
| // Update the learned profile for this context type | |
| const profile = { ...state.learnedProfiles[record.contextType] } | |
| profile.sampleCount++ | |
| profile.lastUpdated = Date.now() | |
| if (record.rating === 1) { | |
| // Positive feedback: update positive EMA | |
| profile.positiveCount++ | |
| profile.positiveParams = emaUpdate(profile.positiveParams, record.params, EMA_ALPHA) | |
| } else { | |
| // Negative feedback: update negative EMA | |
| profile.negativeCount++ | |
| profile.negativeParams = emaUpdate(profile.negativeParams, record.params, EMA_ALPHA) | |
| } | |
| // Recompute adjustments: direction from negative toward positive | |
| profile.adjustments = computeAdjustments(profile) | |
| const newProfiles = { | |
| ...state.learnedProfiles, | |
| [record.contextType]: profile | |
| } | |
| return { | |
| history: newHistory, | |
| learnedProfiles: newProfiles | |
| } | |
| } | |
| /** | |
| * EMA update: new_value = (1 - alpha) * old_value + alpha * observation | |
| */ | |
| function emaUpdate( | |
| current: AutoTuneParams, | |
| observation: AutoTuneParams, | |
| alpha: number | |
| ): AutoTuneParams { | |
| const inv = 1 - alpha | |
| return { | |
| temperature: current.temperature * inv + observation.temperature * alpha, | |
| top_p: current.top_p * inv + observation.top_p * alpha, | |
| top_k: Math.round(current.top_k * inv + observation.top_k * alpha), | |
| frequency_penalty: current.frequency_penalty * inv + observation.frequency_penalty * alpha, | |
| presence_penalty: current.presence_penalty * inv + observation.presence_penalty * alpha, | |
| repetition_penalty: current.repetition_penalty * inv + observation.repetition_penalty * alpha | |
| } | |
| } | |
| /** | |
| * Compute parameter adjustments based on the difference between | |
| * positively-rated and negatively-rated parameter EMAs. | |
| * | |
| * The idea: if upvoted responses used temp=0.3 and downvoted used temp=0.8, | |
| * the adjustment nudges temperature downward. | |
| */ | |
| function computeAdjustments(profile: LearnedProfile): Partial<AutoTuneParams> { | |
| // Need both positive and negative samples to compute a meaningful delta | |
| if (profile.positiveCount < 1 || profile.negativeCount < 1) { | |
| // With only positive data, use the delta from neutral as a mild nudge | |
| if (profile.positiveCount >= MIN_SAMPLES_TO_APPLY) { | |
| return computeDeltaFromNeutral(profile.positiveParams, 0.5) | |
| } | |
| return {} | |
| } | |
| const adj: Partial<AutoTuneParams> = {} | |
| const keys: (keyof AutoTuneParams)[] = [ | |
| 'temperature', 'top_p', 'top_k', | |
| 'frequency_penalty', 'presence_penalty', 'repetition_penalty' | |
| ] | |
| for (const key of keys) { | |
| const posDelta = profile.positiveParams[key] - NEUTRAL_PARAMS[key] | |
| const negDelta = profile.negativeParams[key] - NEUTRAL_PARAMS[key] | |
| // Push toward positive, away from negative | |
| const adjustment = (posDelta - negDelta) * 0.5 | |
| // Only include non-trivial adjustments | |
| if (Math.abs(adjustment) > 0.01) { | |
| adj[key] = adjustment | |
| } | |
| } | |
| return adj | |
| } | |
| /** | |
| * Compute a mild adjustment from neutral toward the positively-rated average. | |
| */ | |
| function computeDeltaFromNeutral( | |
| positiveParams: AutoTuneParams, | |
| scale: number | |
| ): Partial<AutoTuneParams> { | |
| const adj: Partial<AutoTuneParams> = {} | |
| const keys: (keyof AutoTuneParams)[] = [ | |
| 'temperature', 'top_p', 'top_k', | |
| 'frequency_penalty', 'presence_penalty', 'repetition_penalty' | |
| ] | |
| for (const key of keys) { | |
| const delta = (positiveParams[key] - NEUTRAL_PARAMS[key]) * scale | |
| if (Math.abs(delta) > 0.01) { | |
| adj[key] = delta | |
| } | |
| } | |
| return adj | |
| } | |
| // ββ Integration with AutoTune ββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Apply learned adjustments to a base parameter set. | |
| * Weight is determined by how much feedback data exists for this context. | |
| * | |
| * Returns the adjusted params and a note about what was applied. | |
| */ | |
| export function applyLearnedAdjustments( | |
| baseParams: AutoTuneParams, | |
| contextType: ContextType, | |
| learnedProfiles: Record<ContextType, LearnedProfile> | |
| ): { params: AutoTuneParams; applied: boolean; note: string } { | |
| const profile = learnedProfiles[contextType] | |
| if (!profile || profile.sampleCount < MIN_SAMPLES_TO_APPLY || Object.keys(profile.adjustments).length === 0) { | |
| return { params: baseParams, applied: false, note: '' } | |
| } | |
| // Weight scales from 0 to MAX_LEARNED_WEIGHT based on sample count | |
| const weight = Math.min( | |
| (profile.sampleCount / SAMPLES_FOR_MAX_WEIGHT) * MAX_LEARNED_WEIGHT, | |
| MAX_LEARNED_WEIGHT | |
| ) | |
| const adjusted = { ...baseParams } | |
| const appliedKeys: string[] = [] | |
| for (const [key, delta] of Object.entries(profile.adjustments)) { | |
| const k = key as keyof AutoTuneParams | |
| if (delta !== undefined) { | |
| adjusted[k] = (adjusted[k] as number) + (delta as number) * weight | |
| appliedKeys.push(key) | |
| } | |
| } | |
| const note = `Learned: ${appliedKeys.length} params adjusted (${profile.sampleCount} samples, ${Math.round(weight * 100)}% weight)` | |
| return { params: adjusted, applied: true, note } | |
| } | |
| // ββ Stats / Display Helpers ββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Get summary stats for the feedback learning system. | |
| */ | |
| export function getFeedbackStats(state: FeedbackState): { | |
| totalFeedback: number | |
| positiveRate: number | |
| contextBreakdown: Record<ContextType, { total: number; positive: number; negative: number; hasLearned: boolean }> | |
| oldestRecord: number | null | |
| newestRecord: number | null | |
| } { | |
| const contexts: ContextType[] = ['code', 'creative', 'analytical', 'conversational', 'chaotic'] | |
| const totalFeedback = state.history.length | |
| const positiveCount = state.history.filter(r => r.rating === 1).length | |
| const contextBreakdown = {} as Record<ContextType, { total: number; positive: number; negative: number; hasLearned: boolean }> | |
| for (const ctx of contexts) { | |
| const profile = state.learnedProfiles[ctx] | |
| contextBreakdown[ctx] = { | |
| total: profile.sampleCount, | |
| positive: profile.positiveCount, | |
| negative: profile.negativeCount, | |
| hasLearned: profile.sampleCount >= MIN_SAMPLES_TO_APPLY && Object.keys(profile.adjustments).length > 0 | |
| } | |
| } | |
| return { | |
| totalFeedback, | |
| positiveRate: totalFeedback > 0 ? positiveCount / totalFeedback : 0, | |
| contextBreakdown, | |
| oldestRecord: state.history.length > 0 ? state.history[0].timestamp : null, | |
| newestRecord: state.history.length > 0 ? state.history[state.history.length - 1].timestamp : null | |
| } | |
| } | |