|
|
import { config } from '$lib/stores/settings.svelte'; |
|
|
import { slotsService } from './slots'; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
export class ChatService { |
|
|
private abortController: AbortController | null = null; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async sendMessage( |
|
|
messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[], |
|
|
options: SettingsChatServiceOptions = {} |
|
|
): Promise<string | void> { |
|
|
const { |
|
|
stream, |
|
|
onChunk, |
|
|
onComplete, |
|
|
onError, |
|
|
|
|
|
temperature, |
|
|
max_tokens, |
|
|
|
|
|
dynatemp_range, |
|
|
dynatemp_exponent, |
|
|
top_k, |
|
|
top_p, |
|
|
min_p, |
|
|
xtc_probability, |
|
|
xtc_threshold, |
|
|
typ_p, |
|
|
|
|
|
repeat_last_n, |
|
|
repeat_penalty, |
|
|
presence_penalty, |
|
|
frequency_penalty, |
|
|
dry_multiplier, |
|
|
dry_base, |
|
|
dry_allowed_length, |
|
|
dry_penalty_last_n, |
|
|
|
|
|
samplers, |
|
|
custom, |
|
|
timings_per_token |
|
|
} = options; |
|
|
|
|
|
const currentConfig = config(); |
|
|
|
|
|
|
|
|
this.abort(); |
|
|
this.abortController = new AbortController(); |
|
|
|
|
|
|
|
|
const normalizedMessages: ApiChatMessageData[] = messages |
|
|
.map((msg) => { |
|
|
|
|
|
if ('id' in msg && 'convId' in msg && 'timestamp' in msg) { |
|
|
|
|
|
const dbMsg = msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] }; |
|
|
return ChatService.convertMessageToChatServiceData(dbMsg); |
|
|
} else { |
|
|
|
|
|
return msg as ApiChatMessageData; |
|
|
} |
|
|
}) |
|
|
.filter((msg) => { |
|
|
|
|
|
if (msg.role === 'system') { |
|
|
const content = typeof msg.content === 'string' ? msg.content : ''; |
|
|
|
|
|
return content.trim().length > 0; |
|
|
} |
|
|
|
|
|
return true; |
|
|
}); |
|
|
|
|
|
|
|
|
const processedMessages = this.injectSystemMessage(normalizedMessages); |
|
|
|
|
|
const requestBody: ApiChatCompletionRequest = { |
|
|
messages: processedMessages.map((msg: ApiChatMessageData) => ({ |
|
|
role: msg.role, |
|
|
content: msg.content |
|
|
})), |
|
|
stream |
|
|
}; |
|
|
|
|
|
requestBody.reasoning_format = currentConfig.disableReasoningFormat ? 'none' : 'auto'; |
|
|
|
|
|
if (temperature !== undefined) requestBody.temperature = temperature; |
|
|
if (max_tokens !== undefined) { |
|
|
|
|
|
requestBody.max_tokens = max_tokens !== null && max_tokens !== 0 ? max_tokens : -1; |
|
|
} |
|
|
|
|
|
if (dynatemp_range !== undefined) requestBody.dynatemp_range = dynatemp_range; |
|
|
if (dynatemp_exponent !== undefined) requestBody.dynatemp_exponent = dynatemp_exponent; |
|
|
if (top_k !== undefined) requestBody.top_k = top_k; |
|
|
if (top_p !== undefined) requestBody.top_p = top_p; |
|
|
if (min_p !== undefined) requestBody.min_p = min_p; |
|
|
if (xtc_probability !== undefined) requestBody.xtc_probability = xtc_probability; |
|
|
if (xtc_threshold !== undefined) requestBody.xtc_threshold = xtc_threshold; |
|
|
if (typ_p !== undefined) requestBody.typ_p = typ_p; |
|
|
|
|
|
if (repeat_last_n !== undefined) requestBody.repeat_last_n = repeat_last_n; |
|
|
if (repeat_penalty !== undefined) requestBody.repeat_penalty = repeat_penalty; |
|
|
if (presence_penalty !== undefined) requestBody.presence_penalty = presence_penalty; |
|
|
if (frequency_penalty !== undefined) requestBody.frequency_penalty = frequency_penalty; |
|
|
if (dry_multiplier !== undefined) requestBody.dry_multiplier = dry_multiplier; |
|
|
if (dry_base !== undefined) requestBody.dry_base = dry_base; |
|
|
if (dry_allowed_length !== undefined) requestBody.dry_allowed_length = dry_allowed_length; |
|
|
if (dry_penalty_last_n !== undefined) requestBody.dry_penalty_last_n = dry_penalty_last_n; |
|
|
|
|
|
if (samplers !== undefined) { |
|
|
requestBody.samplers = |
|
|
typeof samplers === 'string' |
|
|
? samplers.split(';').filter((s: string) => s.trim()) |
|
|
: samplers; |
|
|
} |
|
|
|
|
|
if (timings_per_token !== undefined) requestBody.timings_per_token = timings_per_token; |
|
|
|
|
|
if (custom) { |
|
|
try { |
|
|
const customParams = typeof custom === 'string' ? JSON.parse(custom) : custom; |
|
|
Object.assign(requestBody, customParams); |
|
|
} catch (error) { |
|
|
console.warn('Failed to parse custom parameters:', error); |
|
|
} |
|
|
} |
|
|
|
|
|
try { |
|
|
const apiKey = currentConfig.apiKey?.toString().trim(); |
|
|
|
|
|
const response = await fetch(`./v1/chat/completions`, { |
|
|
method: 'POST', |
|
|
headers: { |
|
|
'Content-Type': 'application/json', |
|
|
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) |
|
|
}, |
|
|
body: JSON.stringify(requestBody), |
|
|
signal: this.abortController.signal |
|
|
}); |
|
|
|
|
|
if (!response.ok) { |
|
|
|
|
|
const error = await this.parseErrorResponse(response); |
|
|
if (onError) { |
|
|
onError(error); |
|
|
} |
|
|
throw error; |
|
|
} |
|
|
|
|
|
if (stream) { |
|
|
return this.handleStreamResponse( |
|
|
response, |
|
|
onChunk, |
|
|
onComplete, |
|
|
onError, |
|
|
options.onReasoningChunk |
|
|
); |
|
|
} else { |
|
|
return this.handleNonStreamResponse(response, onComplete, onError); |
|
|
} |
|
|
} catch (error) { |
|
|
if (error instanceof Error && error.name === 'AbortError') { |
|
|
console.log('Chat completion request was aborted'); |
|
|
return; |
|
|
} |
|
|
|
|
|
let userFriendlyError: Error; |
|
|
|
|
|
if (error instanceof Error) { |
|
|
if (error.name === 'TypeError' && error.message.includes('fetch')) { |
|
|
userFriendlyError = new Error( |
|
|
'Unable to connect to server - please check if the server is running' |
|
|
); |
|
|
userFriendlyError.name = 'NetworkError'; |
|
|
} else if (error.message.includes('ECONNREFUSED')) { |
|
|
userFriendlyError = new Error('Connection refused - server may be offline'); |
|
|
userFriendlyError.name = 'NetworkError'; |
|
|
} else if (error.message.includes('ETIMEDOUT')) { |
|
|
userFriendlyError = new Error('Request timed out - the server took too long to respond'); |
|
|
userFriendlyError.name = 'TimeoutError'; |
|
|
} else { |
|
|
userFriendlyError = error; |
|
|
} |
|
|
} else { |
|
|
userFriendlyError = new Error('Unknown error occurred while sending message'); |
|
|
} |
|
|
|
|
|
console.error('Error in sendMessage:', error); |
|
|
if (onError) { |
|
|
onError(userFriendlyError); |
|
|
} |
|
|
throw userFriendlyError; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private async handleStreamResponse( |
|
|
response: Response, |
|
|
onChunk?: (chunk: string) => void, |
|
|
onComplete?: ( |
|
|
response: string, |
|
|
reasoningContent?: string, |
|
|
timings?: ChatMessageTimings |
|
|
) => void, |
|
|
onError?: (error: Error) => void, |
|
|
onReasoningChunk?: (chunk: string) => void |
|
|
): Promise<void> { |
|
|
const reader = response.body?.getReader(); |
|
|
|
|
|
if (!reader) { |
|
|
throw new Error('No response body'); |
|
|
} |
|
|
|
|
|
const decoder = new TextDecoder(); |
|
|
let aggregatedContent = ''; |
|
|
let fullReasoningContent = ''; |
|
|
let hasReceivedData = false; |
|
|
let lastTimings: ChatMessageTimings | undefined; |
|
|
let streamFinished = false; |
|
|
|
|
|
try { |
|
|
let chunk = ''; |
|
|
while (true) { |
|
|
const { done, value } = await reader.read(); |
|
|
if (done) break; |
|
|
|
|
|
chunk += decoder.decode(value, { stream: true }); |
|
|
const lines = chunk.split('\n'); |
|
|
chunk = lines.pop() || ''; |
|
|
|
|
|
for (const line of lines) { |
|
|
if (line.startsWith('data: ')) { |
|
|
const data = line.slice(6); |
|
|
if (data === '[DONE]') { |
|
|
streamFinished = true; |
|
|
continue; |
|
|
} |
|
|
|
|
|
try { |
|
|
const parsed: ApiChatCompletionStreamChunk = JSON.parse(data); |
|
|
|
|
|
const content = parsed.choices[0]?.delta?.content; |
|
|
const reasoningContent = parsed.choices[0]?.delta?.reasoning_content; |
|
|
const timings = parsed.timings; |
|
|
const promptProgress = parsed.prompt_progress; |
|
|
|
|
|
if (timings || promptProgress) { |
|
|
this.updateProcessingState(timings, promptProgress); |
|
|
|
|
|
|
|
|
if (timings) { |
|
|
lastTimings = timings; |
|
|
} |
|
|
} |
|
|
|
|
|
if (content) { |
|
|
hasReceivedData = true; |
|
|
aggregatedContent += content; |
|
|
onChunk?.(content); |
|
|
} |
|
|
|
|
|
if (reasoningContent) { |
|
|
hasReceivedData = true; |
|
|
fullReasoningContent += reasoningContent; |
|
|
onReasoningChunk?.(reasoningContent); |
|
|
} |
|
|
} catch (e) { |
|
|
console.error('Error parsing JSON chunk:', e); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
if (streamFinished) { |
|
|
if (!hasReceivedData && aggregatedContent.length === 0) { |
|
|
const noResponseError = new Error('No response received from server. Please try again.'); |
|
|
throw noResponseError; |
|
|
} |
|
|
|
|
|
onComplete?.(aggregatedContent, fullReasoningContent || undefined, lastTimings); |
|
|
} |
|
|
} catch (error) { |
|
|
const err = error instanceof Error ? error : new Error('Stream error'); |
|
|
|
|
|
onError?.(err); |
|
|
|
|
|
throw err; |
|
|
} finally { |
|
|
reader.releaseLock(); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private async handleNonStreamResponse( |
|
|
response: Response, |
|
|
onComplete?: ( |
|
|
response: string, |
|
|
reasoningContent?: string, |
|
|
timings?: ChatMessageTimings |
|
|
) => void, |
|
|
onError?: (error: Error) => void |
|
|
): Promise<string> { |
|
|
try { |
|
|
const responseText = await response.text(); |
|
|
|
|
|
if (!responseText.trim()) { |
|
|
const noResponseError = new Error('No response received from server. Please try again.'); |
|
|
throw noResponseError; |
|
|
} |
|
|
|
|
|
const data: ApiChatCompletionResponse = JSON.parse(responseText); |
|
|
const content = data.choices[0]?.message?.content || ''; |
|
|
const reasoningContent = data.choices[0]?.message?.reasoning_content; |
|
|
|
|
|
if (reasoningContent) { |
|
|
console.log('Full reasoning content:', reasoningContent); |
|
|
} |
|
|
|
|
|
if (!content.trim()) { |
|
|
const noResponseError = new Error('No response received from server. Please try again.'); |
|
|
throw noResponseError; |
|
|
} |
|
|
|
|
|
onComplete?.(content, reasoningContent); |
|
|
|
|
|
return content; |
|
|
} catch (error) { |
|
|
const err = error instanceof Error ? error : new Error('Parse error'); |
|
|
|
|
|
onError?.(err); |
|
|
|
|
|
throw err; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static convertMessageToChatServiceData( |
|
|
message: DatabaseMessage & { extra?: DatabaseMessageExtra[] } |
|
|
): ApiChatMessageData { |
|
|
if (!message.extra || message.extra.length === 0) { |
|
|
return { |
|
|
role: message.role as 'user' | 'assistant' | 'system', |
|
|
content: message.content |
|
|
}; |
|
|
} |
|
|
|
|
|
const contentParts: ApiChatMessageContentPart[] = []; |
|
|
|
|
|
if (message.content) { |
|
|
contentParts.push({ |
|
|
type: 'text', |
|
|
text: message.content |
|
|
}); |
|
|
} |
|
|
|
|
|
const imageFiles = message.extra.filter( |
|
|
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile => |
|
|
extra.type === 'imageFile' |
|
|
); |
|
|
|
|
|
for (const image of imageFiles) { |
|
|
contentParts.push({ |
|
|
type: 'image_url', |
|
|
image_url: { url: image.base64Url } |
|
|
}); |
|
|
} |
|
|
|
|
|
const textFiles = message.extra.filter( |
|
|
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraTextFile => |
|
|
extra.type === 'textFile' |
|
|
); |
|
|
|
|
|
for (const textFile of textFiles) { |
|
|
contentParts.push({ |
|
|
type: 'text', |
|
|
text: `\n\n--- File: ${textFile.name} ---\n${textFile.content}` |
|
|
}); |
|
|
} |
|
|
|
|
|
const audioFiles = message.extra.filter( |
|
|
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile => |
|
|
extra.type === 'audioFile' |
|
|
); |
|
|
|
|
|
for (const audio of audioFiles) { |
|
|
contentParts.push({ |
|
|
type: 'input_audio', |
|
|
input_audio: { |
|
|
data: audio.base64Data, |
|
|
format: audio.mimeType.includes('wav') ? 'wav' : 'mp3' |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
const pdfFiles = message.extra.filter( |
|
|
(extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile => |
|
|
extra.type === 'pdfFile' |
|
|
); |
|
|
|
|
|
for (const pdfFile of pdfFiles) { |
|
|
if (pdfFile.processedAsImages && pdfFile.images) { |
|
|
for (let i = 0; i < pdfFile.images.length; i++) { |
|
|
contentParts.push({ |
|
|
type: 'image_url', |
|
|
image_url: { url: pdfFile.images[i] } |
|
|
}); |
|
|
} |
|
|
} else { |
|
|
contentParts.push({ |
|
|
type: 'text', |
|
|
text: `\n\n--- PDF File: ${pdfFile.name} ---\n${pdfFile.content}` |
|
|
}); |
|
|
} |
|
|
} |
|
|
|
|
|
return { |
|
|
role: message.role as 'user' | 'assistant' | 'system', |
|
|
content: contentParts |
|
|
}; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static async getServerProps(): Promise<ApiLlamaCppServerProps> { |
|
|
try { |
|
|
const currentConfig = config(); |
|
|
const apiKey = currentConfig.apiKey?.toString().trim(); |
|
|
|
|
|
const response = await fetch(`./props`, { |
|
|
headers: { |
|
|
'Content-Type': 'application/json', |
|
|
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) |
|
|
} |
|
|
}); |
|
|
|
|
|
if (!response.ok) { |
|
|
throw new Error(`Failed to fetch server props: ${response.status}`); |
|
|
} |
|
|
|
|
|
const data = await response.json(); |
|
|
return data; |
|
|
} catch (error) { |
|
|
console.error('Error fetching server props:', error); |
|
|
throw error; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public abort(): void { |
|
|
if (this.abortController) { |
|
|
this.abortController.abort(); |
|
|
this.abortController = null; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private injectSystemMessage(messages: ApiChatMessageData[]): ApiChatMessageData[] { |
|
|
const currentConfig = config(); |
|
|
const systemMessage = currentConfig.systemMessage?.toString().trim(); |
|
|
|
|
|
if (!systemMessage) { |
|
|
return messages; |
|
|
} |
|
|
|
|
|
if (messages.length > 0 && messages[0].role === 'system') { |
|
|
if (messages[0].content !== systemMessage) { |
|
|
const updatedMessages = [...messages]; |
|
|
updatedMessages[0] = { |
|
|
role: 'system', |
|
|
content: systemMessage |
|
|
}; |
|
|
return updatedMessages; |
|
|
} |
|
|
|
|
|
return messages; |
|
|
} |
|
|
|
|
|
const systemMsg: ApiChatMessageData = { |
|
|
role: 'system', |
|
|
content: systemMessage |
|
|
}; |
|
|
|
|
|
return [systemMsg, ...messages]; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private async parseErrorResponse(response: Response): Promise<Error> { |
|
|
try { |
|
|
const errorText = await response.text(); |
|
|
const errorData: ApiErrorResponse = JSON.parse(errorText); |
|
|
|
|
|
const message = errorData.error?.message || 'Unknown server error'; |
|
|
const error = new Error(message); |
|
|
error.name = response.status === 400 ? 'ServerError' : 'HttpError'; |
|
|
|
|
|
return error; |
|
|
} catch { |
|
|
|
|
|
const fallback = new Error(`Server error (${response.status}): ${response.statusText}`); |
|
|
fallback.name = 'HttpError'; |
|
|
return fallback; |
|
|
} |
|
|
} |
|
|
|
|
|
private updateProcessingState( |
|
|
timings?: ChatMessageTimings, |
|
|
promptProgress?: ChatMessagePromptProgress |
|
|
): void { |
|
|
|
|
|
const tokensPerSecond = |
|
|
timings?.predicted_ms && timings?.predicted_n |
|
|
? (timings.predicted_n / timings.predicted_ms) * 1000 |
|
|
: 0; |
|
|
|
|
|
|
|
|
slotsService |
|
|
.updateFromTimingData({ |
|
|
prompt_n: timings?.prompt_n || 0, |
|
|
predicted_n: timings?.predicted_n || 0, |
|
|
predicted_per_second: tokensPerSecond, |
|
|
cache_n: timings?.cache_n || 0, |
|
|
prompt_progress: promptProgress |
|
|
}) |
|
|
.catch((error) => { |
|
|
console.warn('Failed to update processing state:', error); |
|
|
}); |
|
|
} |
|
|
} |
|
|
|
|
|
export const chatService = new ChatService(); |
|
|
|