|
|
import { config } from '$lib/stores/settings.svelte'; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export class SlotsService { |
|
|
private callbacks: Set<(state: ApiProcessingState | null) => void> = new Set(); |
|
|
private isStreamingActive: boolean = false; |
|
|
private lastKnownState: ApiProcessingState | null = null; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
startStreaming(): void { |
|
|
this.isStreamingActive = true; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
stopStreaming(): void { |
|
|
this.isStreamingActive = false; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
clearState(): void { |
|
|
this.lastKnownState = null; |
|
|
|
|
|
for (const callback of this.callbacks) { |
|
|
try { |
|
|
callback(null); |
|
|
} catch (error) { |
|
|
console.error('Error in clearState callback:', error); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
isStreaming(): boolean { |
|
|
return this.isStreamingActive; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fetchAndNotify(): void { |
|
|
console.warn( |
|
|
'SlotsService.fetchAndNotify() is deprecated - use timing data from ChatService instead' |
|
|
); |
|
|
} |
|
|
|
|
|
subscribe(callback: (state: ApiProcessingState | null) => void): () => void { |
|
|
this.callbacks.add(callback); |
|
|
|
|
|
if (this.lastKnownState) { |
|
|
callback(this.lastKnownState); |
|
|
} |
|
|
|
|
|
return () => { |
|
|
this.callbacks.delete(callback); |
|
|
}; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async updateFromTimingData(timingData: { |
|
|
prompt_n: number; |
|
|
predicted_n: number; |
|
|
predicted_per_second: number; |
|
|
cache_n: number; |
|
|
prompt_progress?: ChatMessagePromptProgress; |
|
|
}): Promise<void> { |
|
|
const processingState = await this.parseCompletionTimingData(timingData); |
|
|
|
|
|
|
|
|
if (processingState === null) { |
|
|
console.warn('Failed to parse timing data - skipping update'); |
|
|
return; |
|
|
} |
|
|
|
|
|
this.lastKnownState = processingState; |
|
|
|
|
|
for (const callback of this.callbacks) { |
|
|
try { |
|
|
callback(processingState); |
|
|
} catch (error) { |
|
|
console.error('Error in timing callback:', error); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private async getContextTotal(): Promise<number | null> { |
|
|
if (this.lastKnownState && this.lastKnownState.contextTotal > 0) { |
|
|
return this.lastKnownState.contextTotal; |
|
|
} |
|
|
|
|
|
try { |
|
|
const currentConfig = config(); |
|
|
const apiKey = currentConfig.apiKey?.toString().trim(); |
|
|
|
|
|
const response = await fetch(`./slots`, { |
|
|
headers: { |
|
|
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) |
|
|
} |
|
|
}); |
|
|
if (response.ok) { |
|
|
const slotsData = await response.json(); |
|
|
if (Array.isArray(slotsData) && slotsData.length > 0) { |
|
|
const slot = slotsData[0]; |
|
|
if (slot.n_ctx && slot.n_ctx > 0) { |
|
|
return slot.n_ctx; |
|
|
} |
|
|
} |
|
|
} |
|
|
} catch (error) { |
|
|
console.warn('Failed to fetch context total from /slots:', error); |
|
|
} |
|
|
|
|
|
return 4096; |
|
|
} |
|
|
|
|
|
private async parseCompletionTimingData( |
|
|
timingData: Record<string, unknown> |
|
|
): Promise<ApiProcessingState | null> { |
|
|
const promptTokens = (timingData.prompt_n as number) || 0; |
|
|
const predictedTokens = (timingData.predicted_n as number) || 0; |
|
|
const tokensPerSecond = (timingData.predicted_per_second as number) || 0; |
|
|
const cacheTokens = (timingData.cache_n as number) || 0; |
|
|
const promptProgress = timingData.prompt_progress as |
|
|
| { |
|
|
total: number; |
|
|
cache: number; |
|
|
processed: number; |
|
|
time_ms: number; |
|
|
} |
|
|
| undefined; |
|
|
|
|
|
const contextTotal = await this.getContextTotal(); |
|
|
|
|
|
if (contextTotal === null) { |
|
|
console.warn('No context total available - cannot calculate processing state'); |
|
|
return null; |
|
|
} |
|
|
|
|
|
const currentConfig = config(); |
|
|
const outputTokensMax = currentConfig.max_tokens || -1; |
|
|
|
|
|
const contextUsed = promptTokens + cacheTokens + predictedTokens; |
|
|
const outputTokensUsed = predictedTokens; |
|
|
|
|
|
const progressPercent = promptProgress |
|
|
? Math.round((promptProgress.processed / promptProgress.total) * 100) |
|
|
: undefined; |
|
|
|
|
|
return { |
|
|
status: predictedTokens > 0 ? 'generating' : promptProgress ? 'preparing' : 'idle', |
|
|
tokensDecoded: predictedTokens, |
|
|
tokensRemaining: outputTokensMax - predictedTokens, |
|
|
contextUsed, |
|
|
contextTotal, |
|
|
outputTokensUsed, |
|
|
outputTokensMax, |
|
|
hasNextToken: predictedTokens > 0, |
|
|
tokensPerSecond, |
|
|
temperature: currentConfig.temperature ?? 0.8, |
|
|
topP: currentConfig.top_p ?? 0.95, |
|
|
speculative: false, |
|
|
progressPercent, |
|
|
promptTokens, |
|
|
cacheTokens |
|
|
}; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async getCurrentState(): Promise<ApiProcessingState | null> { |
|
|
if (this.lastKnownState) { |
|
|
return this.lastKnownState; |
|
|
} |
|
|
try { |
|
|
|
|
|
const { chatStore } = await import('$lib/stores/chat.svelte'); |
|
|
const messages = chatStore.activeMessages; |
|
|
|
|
|
for (let i = messages.length - 1; i >= 0; i--) { |
|
|
const message = messages[i]; |
|
|
if (message.role === 'assistant' && message.timings) { |
|
|
const restoredState = await this.parseCompletionTimingData({ |
|
|
prompt_n: message.timings.prompt_n || 0, |
|
|
predicted_n: message.timings.predicted_n || 0, |
|
|
predicted_per_second: |
|
|
message.timings.predicted_n && message.timings.predicted_ms |
|
|
? (message.timings.predicted_n / message.timings.predicted_ms) * 1000 |
|
|
: 0, |
|
|
cache_n: message.timings.cache_n || 0 |
|
|
}); |
|
|
|
|
|
if (restoredState) { |
|
|
this.lastKnownState = restoredState; |
|
|
return restoredState; |
|
|
} |
|
|
} |
|
|
} |
|
|
} catch (error) { |
|
|
console.warn('Failed to restore timing data from messages:', error); |
|
|
} |
|
|
|
|
|
return null; |
|
|
} |
|
|
} |
|
|
|
|
|
export const slotsService = new SlotsService(); |
|
|
|