File size: 19,031 Bytes
31dd200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
import { SvelteSet } from 'svelte/reactivity';
import { ServerModelStatus, ModelModality } from '$lib/enums';
import { ModelsService, PropsService } from '$lib/services';
import { serverStore } from '$lib/stores/server.svelte';
import { TTLCache } from '$lib/utils';
import { MODEL_PROPS_CACHE_TTL_MS, MODEL_PROPS_CACHE_MAX_ENTRIES } from '$lib/constants/cache';

/**

 * modelsStore - Reactive store for model management in both MODEL and ROUTER modes

 *

 * This store manages:

 * - Available models list

 * - Selected model for new conversations

 * - Loaded models tracking (ROUTER mode)

 * - Model usage tracking per conversation

 * - Automatic unloading of unused models

 *

 * **Architecture & Relationships:**

 * - **ModelsService**: Stateless service for model API communication

 * - **PropsService**: Stateless service for props/modalities fetching

 * - **modelsStore** (this class): Reactive store for model state

 * - **conversationsStore**: Tracks which conversations use which models

 *

 * **API Inconsistency Workaround:**

 * In MODEL mode, `/props` returns modalities for the single model.

 * In ROUTER mode, `/props` has no modalities - must use `/props?model=<id>` per model.

 * This store normalizes this behavior so consumers don't need to know the server mode.

 *

 * **Key Features:**

 * - **MODEL mode**: Single model, always loaded

 * - **ROUTER mode**: Multi-model with load/unload capability

 * - **Auto-unload**: Automatically unloads models not used by any conversation

 * - **Lazy loading**: ensureModelLoaded() loads models on demand

 */
class ModelsStore {
	/**

	 *

	 *

	 * State

	 *

	 *

	 */

	models = $state<ModelOption[]>([]);
	routerModels = $state<ApiModelDataEntry[]>([]);
	loading = $state(false);
	updating = $state(false);
	error = $state<string | null>(null);
	selectedModelId = $state<string | null>(null);
	selectedModelName = $state<string | null>(null);

	private modelUsage = $state<Map<string, SvelteSet<string>>>(new Map());
	private modelLoadingStates = $state<Map<string, boolean>>(new Map());

	/**

	 * Model-specific props cache with TTL

	 * Key: modelId, Value: props data including modalities

	 * TTL: 10 minutes - props don't change frequently

	 */
	private modelPropsCache = new TTLCache<string, ApiLlamaCppServerProps>({
		ttlMs: MODEL_PROPS_CACHE_TTL_MS,
		maxEntries: MODEL_PROPS_CACHE_MAX_ENTRIES
	});
	private modelPropsFetching = $state<Set<string>>(new Set());

	/**

	 * Version counter for props cache - used to trigger reactivity when props are updated

	 */
	propsCacheVersion = $state(0);

	/**

	 *

	 *

	 * Computed Getters

	 *

	 *

	 */

	get selectedModel(): ModelOption | null {
		if (!this.selectedModelId) return null;
		return this.models.find((model) => model.id === this.selectedModelId) ?? null;
	}

	get loadedModelIds(): string[] {
		return this.routerModels
			.filter((m) => m.status.value === ServerModelStatus.LOADED)
			.map((m) => m.id);
	}

	get loadingModelIds(): string[] {
		return Array.from(this.modelLoadingStates.entries())
			.filter(([, loading]) => loading)
			.map(([id]) => id);
	}

	/**

	 * Get model name in MODEL mode (single model).

	 * Extracts from model_path or model_alias from server props.

	 * In ROUTER mode, returns null (model is per-conversation).

	 */
	get singleModelName(): string | null {
		if (serverStore.isRouterMode) return null;

		const props = serverStore.props;
		if (props?.model_alias) return props.model_alias;
		if (!props?.model_path) return null;

		return props.model_path.split(/(\\|\/)/).pop() || null;
	}

	/**

	 *

	 *

	 * Modalities

	 *

	 *

	 */

	/**

	 * Get modalities for a specific model

	 * Returns cached modalities from model props

	 */
	getModelModalities(modelId: string): ModelModalities | null {
		const model = this.models.find((m) => m.model === modelId || m.id === modelId);
		if (model?.modalities) {
			return model.modalities;
		}

		const props = this.modelPropsCache.get(modelId);
		if (props?.modalities) {
			return {
				vision: props.modalities.vision ?? false,
				audio: props.modalities.audio ?? false
			};
		}

		return null;
	}

	/**

	 * Check if a model supports vision modality

	 */
	modelSupportsVision(modelId: string): boolean {
		return this.getModelModalities(modelId)?.vision ?? false;
	}

	/**

	 * Check if a model supports audio modality

	 */
	modelSupportsAudio(modelId: string): boolean {
		return this.getModelModalities(modelId)?.audio ?? false;
	}

	/**

	 * Get model modalities as an array of ModelModality enum values

	 */
	getModelModalitiesArray(modelId: string): ModelModality[] {
		const modalities = this.getModelModalities(modelId);
		if (!modalities) return [];

		const result: ModelModality[] = [];

		if (modalities.vision) result.push(ModelModality.VISION);
		if (modalities.audio) result.push(ModelModality.AUDIO);

		return result;
	}

	/**

	 * Get props for a specific model (from cache)

	 */
	getModelProps(modelId: string): ApiLlamaCppServerProps | null {
		return this.modelPropsCache.get(modelId);
	}

	/**

	 * Get context size (n_ctx) for a specific model from cached props

	 */
	getModelContextSize(modelId: string): number | null {
		const props = this.getModelProps(modelId);
		const nCtx = props?.default_generation_settings?.n_ctx;

		return typeof nCtx === 'number' ? nCtx : null;
	}

	/**

	 * Get context size for the currently selected model or null if no model is selected

	 */
	get selectedModelContextSize(): number | null {
		if (!this.selectedModelName) return null;
		return this.getModelContextSize(this.selectedModelName);
	}

	/**

	 * Check if props are being fetched for a model

	 */
	isModelPropsFetching(modelId: string): boolean {
		return this.modelPropsFetching.has(modelId);
	}

	/**

	 *

	 *

	 * Status Queries

	 *

	 *

	 */

	isModelLoaded(modelId: string): boolean {
		const model = this.routerModels.find((m) => m.id === modelId);
		return model?.status.value === ServerModelStatus.LOADED || false;
	}

	isModelOperationInProgress(modelId: string): boolean {
		return this.modelLoadingStates.get(modelId) ?? false;
	}

	getModelStatus(modelId: string): ServerModelStatus | null {
		const model = this.routerModels.find((m) => m.id === modelId);
		return model?.status.value ?? null;
	}

	getModelUsage(modelId: string): SvelteSet<string> {
		return this.modelUsage.get(modelId) ?? new SvelteSet<string>();
	}

	isModelInUse(modelId: string): boolean {
		const usage = this.modelUsage.get(modelId);
		return usage !== undefined && usage.size > 0;
	}

	/**

	 *

	 *

	 * Data Fetching

	 *

	 *

	 */

	/**

	 * Fetch list of models from server and detect server role

	 * Also fetches modalities for MODEL mode (single model)

	 */
	async fetch(force = false): Promise<void> {
		if (this.loading) return;
		if (this.models.length > 0 && !force) return;

		this.loading = true;
		this.error = null;

		try {
			if (!serverStore.props) {
				await serverStore.fetch();
			}

			const response = await ModelsService.list();

			const models: ModelOption[] = response.data.map((item: ApiModelDataEntry, index: number) => {
				const details = response.models?.[index];
				const rawCapabilities = Array.isArray(details?.capabilities) ? details?.capabilities : [];
				const displayNameSource =
					details?.name && details.name.trim().length > 0 ? details.name : item.id;
				const displayName = this.toDisplayName(displayNameSource);

				return {
					id: item.id,
					name: displayName,
					model: details?.model || item.id,
					description: details?.description,
					capabilities: rawCapabilities.filter((value: unknown): value is string => Boolean(value)),
					details: details?.details,
					meta: item.meta ?? null
				} satisfies ModelOption;
			});

			this.models = models;

			// WORKAROUND: In MODEL mode, /props returns modalities for the single model,
			// but /v1/models doesn't include modalities. We bridge this gap here.
			const serverProps = serverStore.props;
			if (serverStore.isModelMode && this.models.length > 0 && serverProps?.modalities) {
				const modalities: ModelModalities = {
					vision: serverProps.modalities.vision ?? false,
					audio: serverProps.modalities.audio ?? false
				};
				this.modelPropsCache.set(this.models[0].model, serverProps);
				this.models = this.models.map((model, index) =>
					index === 0 ? { ...model, modalities } : model
				);
			}
		} catch (error) {
			this.models = [];
			this.error = error instanceof Error ? error.message : 'Failed to load models';
			throw error;
		} finally {
			this.loading = false;
		}
	}

	/**

	 * Fetch router models with full metadata (ROUTER mode only)

	 * This fetches the /models endpoint which returns status info for each model

	 */
	async fetchRouterModels(): Promise<void> {
		try {
			const response = await ModelsService.listRouter();
			this.routerModels = response.data;
			await this.fetchModalitiesForLoadedModels();

			const o = this.models.filter((option) => {
				const modelProps = this.getModelProps(option.model);

				return modelProps?.webui !== false;
			});

			if (o.length === 1 && this.isModelLoaded(o[0].model)) {
				this.selectModelById(o[0].id);
			}
		} catch (error) {
			console.warn('Failed to fetch router models:', error);
			this.routerModels = [];
		}
	}

	/**

	 * Fetch props for a specific model from /props endpoint

	 * Uses caching to avoid redundant requests

	 *

	 * In ROUTER mode, this will only fetch props if the model is loaded,

	 * since unloaded models return 400 from /props endpoint.

	 *

	 * @param modelId - Model identifier to fetch props for

	 * @returns Props data or null if fetch failed or model not loaded

	 */
	async fetchModelProps(modelId: string): Promise<ApiLlamaCppServerProps | null> {
		const cached = this.modelPropsCache.get(modelId);
		if (cached) return cached;

		if (serverStore.isRouterMode && !this.isModelLoaded(modelId)) {
			return null;
		}

		if (this.modelPropsFetching.has(modelId)) return null;

		this.modelPropsFetching.add(modelId);

		try {
			const props = await PropsService.fetchForModel(modelId);
			this.modelPropsCache.set(modelId, props);
			return props;
		} catch (error) {
			console.warn(`Failed to fetch props for model ${modelId}:`, error);
			return null;
		} finally {
			this.modelPropsFetching.delete(modelId);
		}
	}

	/**

	 * Fetch modalities for all loaded models from /props endpoint

	 * This updates the modalities field in models array

	 */
	async fetchModalitiesForLoadedModels(): Promise<void> {
		const loadedModelIds = this.loadedModelIds;
		if (loadedModelIds.length === 0) return;

		const propsPromises = loadedModelIds.map((modelId) => this.fetchModelProps(modelId));

		try {
			const results = await Promise.all(propsPromises);

			// Update models with modalities
			this.models = this.models.map((model) => {
				const modelIndex = loadedModelIds.indexOf(model.model);
				if (modelIndex === -1) return model;

				const props = results[modelIndex];
				if (!props?.modalities) return model;

				const modalities: ModelModalities = {
					vision: props.modalities.vision ?? false,
					audio: props.modalities.audio ?? false
				};

				return { ...model, modalities };
			});

			this.propsCacheVersion++;
		} catch (error) {
			console.warn('Failed to fetch modalities for loaded models:', error);
		}
	}

	/**

	 * Update modalities for a specific model

	 * Called when a model is loaded or when we need fresh modality data

	 */
	async updateModelModalities(modelId: string): Promise<void> {
		try {
			const props = await this.fetchModelProps(modelId);
			if (!props?.modalities) return;

			const modalities: ModelModalities = {
				vision: props.modalities.vision ?? false,
				audio: props.modalities.audio ?? false
			};

			this.models = this.models.map((model) =>
				model.model === modelId ? { ...model, modalities } : model
			);

			this.propsCacheVersion++;
		} catch (error) {
			console.warn(`Failed to update modalities for model ${modelId}:`, error);
		}
	}

	/**

	 *

	 *

	 * Model Selection

	 *

	 *

	 */

	/**

	 * Select a model for new conversations

	 */
	async selectModelById(modelId: string): Promise<void> {
		if (!modelId || this.updating) return;
		if (this.selectedModelId === modelId) return;

		const option = this.models.find((model) => model.id === modelId);
		if (!option) throw new Error('Selected model is not available');

		this.updating = true;
		this.error = null;

		try {
			this.selectedModelId = option.id;
			this.selectedModelName = option.model;
		} finally {
			this.updating = false;
		}
	}

	/**

	 * Select a model by its model name (used for syncing with conversation model)

	 * @param modelName - Model name to select (e.g., "unsloth/gemma-3-12b-it-GGUF:latest")

	 */
	selectModelByName(modelName: string): void {
		const option = this.models.find((model) => model.model === modelName);
		if (option) {
			this.selectedModelId = option.id;
			this.selectedModelName = option.model;
		}
	}

	clearSelection(): void {
		this.selectedModelId = null;
		this.selectedModelName = null;
	}

	findModelByName(modelName: string): ModelOption | null {
		return this.models.find((model) => model.model === modelName) ?? null;
	}

	findModelById(modelId: string): ModelOption | null {
		return this.models.find((model) => model.id === modelId) ?? null;
	}

	hasModel(modelName: string): boolean {
		return this.models.some((model) => model.model === modelName);
	}

	/**

	 *

	 *

	 * Loading/Unloading Models

	 *

	 *

	 */

	/**

	 * WORKAROUND: Polling for model status after load/unload operations.

	 *

	 * Currently, the `/models/load` and `/models/unload` endpoints return success

	 * before the operation actually completes on the server. This means an immediate

	 * request to `/models` returns stale status (e.g., "loading" after load request,

	 * "loaded" after unload request).

	 *

	 * TODO: Remove this polling once llama-server properly waits for the operation

	 * to complete before returning success from `/load` and `/unload` endpoints.

	 * At that point, a single `fetchRouterModels()` call after the operation will

	 * be sufficient to get the correct status.

	 */

	/** Polling interval in ms for checking model status */
	private static readonly STATUS_POLL_INTERVAL = 500;
	/** Maximum polling attempts before giving up */
	private static readonly STATUS_POLL_MAX_ATTEMPTS = 60; // 30 seconds max

	/**

	 * Poll for expected model status after load/unload operation.

	 * Keeps polling until the model reaches the expected status or max attempts reached.

	 *

	 * @param modelId - Model identifier to check

	 * @param expectedStatus - Expected status to wait for

	 * @returns Promise that resolves when expected status is reached

	 */
	private async pollForModelStatus(
		modelId: string,
		expectedStatus: ServerModelStatus
	): Promise<void> {
		for (let attempt = 0; attempt < ModelsStore.STATUS_POLL_MAX_ATTEMPTS; attempt++) {
			await this.fetchRouterModels();

			const currentStatus = this.getModelStatus(modelId);
			if (currentStatus === expectedStatus) {
				return;
			}

			await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL));
		}

		console.warn(
			`Model ${modelId} did not reach expected status ${expectedStatus} after ${ModelsStore.STATUS_POLL_MAX_ATTEMPTS} attempts`
		);
	}

	/**

	 * Load a model (ROUTER mode)

	 * @param modelId - Model identifier to load

	 */
	async loadModel(modelId: string): Promise<void> {
		if (this.isModelLoaded(modelId)) {
			return;
		}

		if (this.modelLoadingStates.get(modelId)) return;

		this.modelLoadingStates.set(modelId, true);
		this.error = null;

		try {
			await ModelsService.load(modelId);
			await this.pollForModelStatus(modelId, ServerModelStatus.LOADED);

			await this.updateModelModalities(modelId);
		} catch (error) {
			this.error = error instanceof Error ? error.message : 'Failed to load model';
			throw error;
		} finally {
			this.modelLoadingStates.set(modelId, false);
		}
	}

	/**

	 * Unload a model (ROUTER mode)

	 * @param modelId - Model identifier to unload

	 */
	async unloadModel(modelId: string): Promise<void> {
		if (!this.isModelLoaded(modelId)) {
			return;
		}

		if (this.modelLoadingStates.get(modelId)) return;

		this.modelLoadingStates.set(modelId, true);
		this.error = null;

		try {
			await ModelsService.unload(modelId);

			await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED);
		} catch (error) {
			this.error = error instanceof Error ? error.message : 'Failed to unload model';
			throw error;
		} finally {
			this.modelLoadingStates.set(modelId, false);
		}
	}

	/**

	 * Ensure a model is loaded before use

	 * @param modelId - Model identifier to ensure is loaded

	 */
	async ensureModelLoaded(modelId: string): Promise<void> {
		if (this.isModelLoaded(modelId)) {
			return;
		}

		await this.loadModel(modelId);
	}

	/**

	 *

	 *

	 * Utilities

	 *

	 *

	 */

	private toDisplayName(id: string): string {
		const segments = id.split(/\\|\//);
		const candidate = segments.pop();

		return candidate && candidate.trim().length > 0 ? candidate : id;
	}

	clear(): void {
		this.models = [];
		this.routerModels = [];
		this.loading = false;
		this.updating = false;
		this.error = null;
		this.selectedModelId = null;
		this.selectedModelName = null;
		this.modelUsage.clear();
		this.modelLoadingStates.clear();
		this.modelPropsCache.clear();
		this.modelPropsFetching.clear();
	}

	/**

	 * Prune expired entries from caches.

	 * Call periodically for proactive memory cleanup.

	 */
	pruneExpiredCache(): number {
		return this.modelPropsCache.prune();
	}
}

export const modelsStore = new ModelsStore();

export const modelOptions = () => modelsStore.models;
export const routerModels = () => modelsStore.routerModels;
export const modelsLoading = () => modelsStore.loading;
export const modelsUpdating = () => modelsStore.updating;
export const modelsError = () => modelsStore.error;
export const selectedModelId = () => modelsStore.selectedModelId;
export const selectedModelName = () => modelsStore.selectedModelName;
export const selectedModelOption = () => modelsStore.selectedModel;
export const loadedModelIds = () => modelsStore.loadedModelIds;
export const loadingModelIds = () => modelsStore.loadingModelIds;
export const propsCacheVersion = () => modelsStore.propsCacheVersion;
export const singleModelName = () => modelsStore.singleModelName;
export const selectedModelContextSize = () => modelsStore.selectedModelContextSize;