Aliguinga01's picture
Upload folder using huggingface_hub
4d35814 verified
import { config } from '$lib/stores/settings.svelte';
/**
* SlotsService - Real-time processing state monitoring and token rate calculation
*
* This service provides real-time information about generation progress, token rates,
* and context usage based on timing data from ChatService streaming responses.
* It manages streaming session tracking and provides accurate processing state updates.
*
* **Architecture & Relationships:**
* - **SlotsService** (this class): Processing state monitoring
* - Receives timing data from ChatService streaming responses
* - Calculates token generation rates and context usage
* - Manages streaming session lifecycle
* - Provides real-time updates to UI components
*
* - **ChatService**: Provides timing data from `/chat/completions` streaming
* - **UI Components**: Subscribe to processing state for progress indicators
*
* **Key Features:**
* - **Real-time Monitoring**: Live processing state during generation
* - **Token Rate Calculation**: Accurate tokens/second from timing data
* - **Context Tracking**: Current context usage and remaining capacity
* - **Streaming Lifecycle**: Start/stop tracking for streaming sessions
* - **Timing Data Processing**: Converts streaming timing data to structured state
* - **Error Handling**: Graceful handling when timing data is unavailable
*
* **Processing States:**
* - `idle`: No active processing
* - `generating`: Actively generating tokens
*
* **Token Rate Calculation:**
* Uses timing data from `/chat/completions` streaming response for accurate
* real-time token generation rate measurement.
*/
export class SlotsService {
private callbacks: Set<(state: ApiProcessingState | null) => void> = new Set();
private isStreamingActive: boolean = false;
private lastKnownState: ApiProcessingState | null = null;
/**
* Start streaming session tracking
*/
startStreaming(): void {
this.isStreamingActive = true;
}
/**
* Stop streaming session tracking
*/
stopStreaming(): void {
this.isStreamingActive = false;
}
/**
* Clear the current processing state
* Used when switching to a conversation without timing data
*/
clearState(): void {
this.lastKnownState = null;
for (const callback of this.callbacks) {
try {
callback(null);
} catch (error) {
console.error('Error in clearState callback:', error);
}
}
}
/**
* Check if currently in a streaming session
*/
isStreaming(): boolean {
return this.isStreamingActive;
}
/**
* @deprecated Polling is no longer used - timing data comes from ChatService streaming response
* This method logs a warning if called to help identify outdated usage
*/
fetchAndNotify(): void {
console.warn(
'SlotsService.fetchAndNotify() is deprecated - use timing data from ChatService instead'
);
}
subscribe(callback: (state: ApiProcessingState | null) => void): () => void {
this.callbacks.add(callback);
if (this.lastKnownState) {
callback(this.lastKnownState);
}
return () => {
this.callbacks.delete(callback);
};
}
/**
* Updates processing state with timing data from ChatService streaming response
*/
async updateFromTimingData(timingData: {
prompt_n: number;
predicted_n: number;
predicted_per_second: number;
cache_n: number;
prompt_progress?: ChatMessagePromptProgress;
}): Promise<void> {
const processingState = await this.parseCompletionTimingData(timingData);
// Only update if we successfully parsed the state
if (processingState === null) {
console.warn('Failed to parse timing data - skipping update');
return;
}
this.lastKnownState = processingState;
for (const callback of this.callbacks) {
try {
callback(processingState);
} catch (error) {
console.error('Error in timing callback:', error);
}
}
}
/**
* Gets context total from last known slots data or fetches from server
*/
private async getContextTotal(): Promise<number | null> {
if (this.lastKnownState && this.lastKnownState.contextTotal > 0) {
return this.lastKnownState.contextTotal;
}
try {
const currentConfig = config();
const apiKey = currentConfig.apiKey?.toString().trim();
const response = await fetch(`./slots`, {
headers: {
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {})
}
});
if (response.ok) {
const slotsData = await response.json();
if (Array.isArray(slotsData) && slotsData.length > 0) {
const slot = slotsData[0];
if (slot.n_ctx && slot.n_ctx > 0) {
return slot.n_ctx;
}
}
}
} catch (error) {
console.warn('Failed to fetch context total from /slots:', error);
}
return 4096;
}
private async parseCompletionTimingData(
timingData: Record<string, unknown>
): Promise<ApiProcessingState | null> {
const promptTokens = (timingData.prompt_n as number) || 0;
const predictedTokens = (timingData.predicted_n as number) || 0;
const tokensPerSecond = (timingData.predicted_per_second as number) || 0;
const cacheTokens = (timingData.cache_n as number) || 0;
const promptProgress = timingData.prompt_progress as
| {
total: number;
cache: number;
processed: number;
time_ms: number;
}
| undefined;
const contextTotal = await this.getContextTotal();
if (contextTotal === null) {
console.warn('No context total available - cannot calculate processing state');
return null;
}
const currentConfig = config();
const outputTokensMax = currentConfig.max_tokens || -1;
const contextUsed = promptTokens + cacheTokens + predictedTokens;
const outputTokensUsed = predictedTokens;
const progressPercent = promptProgress
? Math.round((promptProgress.processed / promptProgress.total) * 100)
: undefined;
return {
status: predictedTokens > 0 ? 'generating' : promptProgress ? 'preparing' : 'idle',
tokensDecoded: predictedTokens,
tokensRemaining: outputTokensMax - predictedTokens,
contextUsed,
contextTotal,
outputTokensUsed,
outputTokensMax,
hasNextToken: predictedTokens > 0,
tokensPerSecond,
temperature: currentConfig.temperature ?? 0.8,
topP: currentConfig.top_p ?? 0.95,
speculative: false,
progressPercent,
promptTokens,
cacheTokens
};
}
/**
* Get current processing state
* Returns the last known state from timing data, or null if no data available
*/
async getCurrentState(): Promise<ApiProcessingState | null> {
if (this.lastKnownState) {
return this.lastKnownState;
}
try {
// Import dynamically to avoid circular dependency
const { chatStore } = await import('$lib/stores/chat.svelte');
const messages = chatStore.activeMessages;
for (let i = messages.length - 1; i >= 0; i--) {
const message = messages[i];
if (message.role === 'assistant' && message.timings) {
const restoredState = await this.parseCompletionTimingData({
prompt_n: message.timings.prompt_n || 0,
predicted_n: message.timings.predicted_n || 0,
predicted_per_second:
message.timings.predicted_n && message.timings.predicted_ms
? (message.timings.predicted_n / message.timings.predicted_ms) * 1000
: 0,
cache_n: message.timings.cache_n || 0
});
if (restoredState) {
this.lastKnownState = restoredState;
return restoredState;
}
}
}
} catch (error) {
console.warn('Failed to restore timing data from messages:', error);
}
return null;
}
}
export const slotsService = new SlotsService();