| import { CategoryEmbeddings } from "@midday/categories"; |
| import { logger } from "@midday/logger"; |
| import { eq, inArray } from "drizzle-orm"; |
| import type { Database } from "../client"; |
| import { upsertCategoryEmbedding } from "../queries/transaction-category-embeddings"; |
| import { transactionCategoryEmbeddings } from "../schema"; |
|
|
| export type GenerateCategoryEmbeddingParams = { |
| name: string; |
| system?: boolean; |
| model?: string; |
| }; |
|
|
| |
| |
| |
| |
| export async function generateCategoryEmbedding( |
| db: Database, |
| params: GenerateCategoryEmbeddingParams, |
| ) { |
| const { name, system = false, model } = params; |
|
|
| try { |
| |
| const existingEmbedding = await db |
| .select({ name: transactionCategoryEmbeddings.name }) |
| .from(transactionCategoryEmbeddings) |
| .where(eq(transactionCategoryEmbeddings.name, name)) |
| .limit(1); |
|
|
| if (existingEmbedding.length > 0) { |
| logger.info(`Embedding already exists for category: "${name}"`); |
| return { success: true, existed: true }; |
| } |
|
|
| const embedService = new CategoryEmbeddings(); |
|
|
| |
| const { embedding, model: embeddingModel } = await embedService.embed(name); |
|
|
| |
| await upsertCategoryEmbedding(db, { |
| name, |
| embedding, |
| system, |
| model: model || embeddingModel, |
| }); |
|
|
| logger.info(`Generated embedding for category: "${name}"`); |
| return { success: true, existed: false }; |
| } catch (error) { |
| logger.error(`Failed to generate embedding for "${name}":`, { error }); |
| return { |
| success: false, |
| existed: false, |
| error: error instanceof Error ? error.message : "Unknown error", |
| }; |
| } |
| } |
|
|
| |
| |
| |
| |
| export async function generateCategoryEmbeddingsBatch( |
| db: Database, |
| categories: Array<{ name: string; system?: boolean }>, |
| model?: string, |
| ) { |
| let processed = 0; |
| let skipped = 0; |
| let errors = 0; |
| const results: Array<{ name: string; success: boolean; error?: string }> = []; |
|
|
| try { |
| |
| const categoryNames = categories.map((cat) => cat.name); |
| const existingEmbeddings = |
| categoryNames.length > 0 |
| ? await db |
| .select({ name: transactionCategoryEmbeddings.name }) |
| .from(transactionCategoryEmbeddings) |
| .where(inArray(transactionCategoryEmbeddings.name, categoryNames)) |
| : []; |
|
|
| |
| const existingNames = new Set(existingEmbeddings.map((e) => e.name)); |
| const categoriesToProcess = categories.filter( |
| (cat) => !existingNames.has(cat.name), |
| ); |
|
|
| |
| for (const cat of categories) { |
| if (existingNames.has(cat.name)) { |
| logger.info(`Embedding already exists for category: "${cat.name}"`); |
| results.push({ name: cat.name, success: true }); |
| skipped++; |
| } |
| } |
|
|
| if (categoriesToProcess.length === 0) { |
| return { processed: 0, skipped, errors: 0, results }; |
| } |
|
|
| const embedService = new CategoryEmbeddings(); |
| const newCategoryNames = categoriesToProcess.map((cat) => cat.name); |
|
|
| |
| const { embeddings, model: embeddingModel } = |
| await embedService.embedMany(newCategoryNames); |
|
|
| |
| const promises = categoriesToProcess.map(async (category, index) => { |
| try { |
| const embedding = embeddings[index]; |
| if (!embedding) { |
| throw new Error( |
| `No embedding generated for category: ${category.name}`, |
| ); |
| } |
|
|
| await upsertCategoryEmbedding(db, { |
| name: category.name, |
| embedding, |
| system: category.system ?? false, |
| model: model || embeddingModel, |
| }); |
|
|
| logger.info(`Generated embedding for category: "${category.name}"`); |
| return { |
| name: category.name, |
| success: true, |
| }; |
| } catch (error) { |
| logger.error("Failed to store embedding for category", { |
| name: category.name, |
| error: error instanceof Error ? error.message : "Unknown error", |
| }); |
|
|
| return { |
| name: category.name, |
| success: false, |
| error: error instanceof Error ? error.message : "Unknown error", |
| }; |
| } |
| }); |
|
|
| const batchResults = await Promise.allSettled(promises); |
|
|
| for (const promiseResult of batchResults) { |
| if (promiseResult.status === "fulfilled") { |
| const result = promiseResult.value; |
| results.push(result); |
|
|
| if (result.success) { |
| processed++; |
| } else { |
| errors++; |
| } |
| } else { |
| |
| results.push({ |
| name: "unknown", |
| success: false, |
| error: promiseResult.reason?.message || "Promise rejected", |
| }); |
| errors++; |
| } |
| } |
| } catch (error) { |
| |
| logger.error("Failed to generate batch embeddings:", { error }); |
|
|
| |
| const promises = categories.map(async (category) => { |
| const result = await generateCategoryEmbedding(db, { |
| name: category.name, |
| system: category.system ?? false, |
| model, |
| }); |
|
|
| return { |
| name: category.name, |
| success: result.success, |
| error: result.error, |
| }; |
| }); |
|
|
| const fallbackResults = await Promise.allSettled(promises); |
|
|
| for (const promiseResult of fallbackResults) { |
| if (promiseResult.status === "fulfilled") { |
| const result = promiseResult.value; |
| results.push(result); |
|
|
| if (result.success) { |
| processed++; |
| } else { |
| errors++; |
| } |
| } else { |
| results.push({ |
| name: "unknown", |
| success: false, |
| error: promiseResult.reason?.message || "Promise rejected", |
| }); |
| errors++; |
| } |
| } |
| } |
|
|
| return { processed, skipped, errors, results }; |
| } |
|
|