| import * as fs from 'fs'; |
| import * as path from 'path'; |
| import FormData from 'form-data'; |
| import { logger } from '@librechat/data-schemas'; |
| import { HttpsProxyAgent } from 'https-proxy-agent'; |
| import { |
| FileSources, |
| envVarRegex, |
| extractEnvVariable, |
| extractVariableName, |
| } from 'librechat-data-provider'; |
| import type { TCustomConfig } from 'librechat-data-provider'; |
| import type { AxiosError, AxiosRequestConfig } from 'axios'; |
| import type { |
| MistralFileUploadResponse, |
| MistralSignedUrlResponse, |
| MistralOCRUploadResult, |
| MistralOCRError, |
| OCRResultPage, |
| ServerRequest, |
| OCRResult, |
| OCRImage, |
| } from '~/types'; |
| import { logAxiosError, createAxiosInstance } from '~/utils/axios'; |
| import { readFileAsBuffer } from '~/utils/files'; |
| import { loadServiceKey } from '~/utils/key'; |
|
|
| const axios = createAxiosInstance(); |
| const DEFAULT_MISTRAL_BASE_URL = 'https://api.mistral.ai/v1'; |
| const DEFAULT_MISTRAL_MODEL = 'mistral-ocr-latest'; |
|
|
| |
| interface AuthConfig { |
| apiKey: string; |
| baseURL: string; |
| } |
|
|
| |
| interface GoogleServiceAccount { |
| client_email?: string; |
| private_key?: string; |
| project_id?: string; |
| } |
|
|
| |
| interface OCRContext { |
| req: ServerRequest; |
| file: Express.Multer.File; |
| loadAuthValues: (params: { |
| userId: string; |
| authFields: string[]; |
| optional?: Set<string>; |
| }) => Promise<Record<string, string | undefined>>; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export async function uploadDocumentToMistral({ |
| apiKey, |
| filePath, |
| baseURL = DEFAULT_MISTRAL_BASE_URL, |
| fileName = '', |
| }: { |
| apiKey: string; |
| filePath: string; |
| baseURL?: string; |
| fileName?: string; |
| }): Promise<MistralFileUploadResponse> { |
| const form = new FormData(); |
| form.append('purpose', 'ocr'); |
| const actualFileName = fileName || path.basename(filePath); |
| const fileStream = fs.createReadStream(filePath); |
| form.append('file', fileStream, { filename: actualFileName }); |
|
|
| const config: AxiosRequestConfig = { |
| headers: { |
| Authorization: `Bearer ${apiKey}`, |
| ...form.getHeaders(), |
| }, |
| maxBodyLength: Infinity, |
| maxContentLength: Infinity, |
| }; |
|
|
| if (process.env.PROXY) { |
| config.httpsAgent = new HttpsProxyAgent(process.env.PROXY); |
| } |
|
|
| return axios |
| .post(`${baseURL}/files`, form, config) |
| .then((res) => res.data) |
| .catch((error) => { |
| throw error; |
| }); |
| } |
|
|
| export async function getSignedUrl({ |
| apiKey, |
| fileId, |
| expiry = 24, |
| baseURL = DEFAULT_MISTRAL_BASE_URL, |
| }: { |
| apiKey: string; |
| fileId: string; |
| expiry?: number; |
| baseURL?: string; |
| }): Promise<MistralSignedUrlResponse> { |
| const config: AxiosRequestConfig = { |
| headers: { |
| Authorization: `Bearer ${apiKey}`, |
| }, |
| }; |
|
|
| if (process.env.PROXY) { |
| config.httpsAgent = new HttpsProxyAgent(process.env.PROXY); |
| } |
|
|
| return axios |
| .get(`${baseURL}/files/${fileId}/url?expiry=${expiry}`, config) |
| .then((res) => res.data) |
| .catch((error) => { |
| logger.error('Error fetching signed URL:', error.message); |
| throw error; |
| }); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export async function performOCR({ |
| url, |
| apiKey, |
| model = DEFAULT_MISTRAL_MODEL, |
| baseURL = DEFAULT_MISTRAL_BASE_URL, |
| documentType = 'document_url', |
| }: { |
| url: string; |
| apiKey: string; |
| model?: string; |
| baseURL?: string; |
| documentType?: 'document_url' | 'image_url'; |
| }): Promise<OCRResult> { |
| const documentKey = documentType === 'image_url' ? 'image_url' : 'document_url'; |
|
|
| const config: AxiosRequestConfig = { |
| headers: { |
| 'Content-Type': 'application/json', |
| Authorization: `Bearer ${apiKey}`, |
| }, |
| }; |
|
|
| if (process.env.PROXY) { |
| config.httpsAgent = new HttpsProxyAgent(process.env.PROXY); |
| } |
|
|
| return axios |
| .post( |
| `${baseURL}/ocr`, |
| { |
| model, |
| image_limit: 0, |
| include_image_base64: false, |
| document: { |
| type: documentType, |
| [documentKey]: url, |
| }, |
| }, |
| config, |
| ) |
| .then((res) => res.data) |
| .catch((error) => { |
| logger.error('Error performing OCR:', error.message); |
| throw error; |
| }); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| export async function deleteMistralFile({ |
| fileId, |
| apiKey, |
| baseURL = DEFAULT_MISTRAL_BASE_URL, |
| }: { |
| fileId: string; |
| apiKey: string; |
| baseURL?: string; |
| }): Promise<void> { |
| const config: AxiosRequestConfig = { |
| headers: { |
| Authorization: `Bearer ${apiKey}`, |
| }, |
| }; |
|
|
| if (process.env.PROXY) { |
| config.httpsAgent = new HttpsProxyAgent(process.env.PROXY); |
| } |
|
|
| try { |
| const result = await axios.delete(`${baseURL}/files/${fileId}`, config); |
| logger.debug(`Mistral file ${fileId} deleted successfully:`, result.data); |
| } catch (error) { |
| logger.error(`Error deleting Mistral file ${fileId}:`, error); |
| } |
| } |
|
|
| |
| |
| |
| function needsEnvLoad(value: string): boolean { |
| return envVarRegex.test(value) || !value.trim(); |
| } |
|
|
| |
| |
| |
| function getEnvVarName(configValue: string, defaultName: string): string { |
| if (!envVarRegex.test(configValue)) { |
| return defaultName; |
| } |
| return extractVariableName(configValue) || defaultName; |
| } |
|
|
| |
| |
| |
| async function resolveConfigValue( |
| configValue: string, |
| defaultEnvName: string, |
| authValues: Record<string, string | undefined>, |
| defaultValue?: string, |
| ): Promise<string> { |
| |
| if (!needsEnvLoad(configValue)) { |
| return configValue; |
| } |
|
|
| |
| const envVarName = getEnvVarName(configValue, defaultEnvName); |
| return authValues[envVarName] || defaultValue || ''; |
| } |
|
|
| |
| |
| |
| async function loadAuthConfig(context: OCRContext): Promise<AuthConfig> { |
| const appConfig = context.req.config; |
| const ocrConfig = appConfig?.ocr; |
| const apiKeyConfig = ocrConfig?.apiKey || ''; |
| const baseURLConfig = ocrConfig?.baseURL || ''; |
|
|
| if (!needsEnvLoad(apiKeyConfig) && !needsEnvLoad(baseURLConfig)) { |
| return { |
| apiKey: apiKeyConfig, |
| baseURL: baseURLConfig, |
| }; |
| } |
|
|
| const authFields: string[] = []; |
|
|
| if (needsEnvLoad(baseURLConfig)) { |
| authFields.push(getEnvVarName(baseURLConfig, 'OCR_BASEURL')); |
| } |
|
|
| if (needsEnvLoad(apiKeyConfig)) { |
| authFields.push(getEnvVarName(apiKeyConfig, 'OCR_API_KEY')); |
| } |
|
|
| const authValues = await context.loadAuthValues({ |
| userId: context.req.user?.id || '', |
| authFields, |
| optional: new Set(['OCR_BASEURL']), |
| }); |
|
|
| const apiKey = await resolveConfigValue(apiKeyConfig, 'OCR_API_KEY', authValues); |
| const baseURL = await resolveConfigValue( |
| baseURLConfig, |
| 'OCR_BASEURL', |
| authValues, |
| DEFAULT_MISTRAL_BASE_URL, |
| ); |
|
|
| return { apiKey, baseURL }; |
| } |
|
|
| |
| |
| |
| function getModelConfig(ocrConfig?: TCustomConfig['ocr']): string { |
| const modelConfig = ocrConfig?.mistralModel || ''; |
|
|
| if (!modelConfig.trim()) { |
| return DEFAULT_MISTRAL_MODEL; |
| } |
|
|
| if (envVarRegex.test(modelConfig)) { |
| return extractEnvVariable(modelConfig) || DEFAULT_MISTRAL_MODEL; |
| } |
|
|
| return modelConfig.trim(); |
| } |
|
|
| |
| |
| |
| function getDocumentType(file: Express.Multer.File): 'image_url' | 'document_url' { |
| const mimetype = (file.mimetype || '').toLowerCase(); |
| const originalname = file.originalname || ''; |
| const isImage = |
| mimetype.startsWith('image') || /\.(png|jpe?g|gif|bmp|webp|tiff?)$/i.test(originalname); |
|
|
| return isImage ? 'image_url' : 'document_url'; |
| } |
|
|
| |
| |
| |
| function processOCRResult(ocrResult: OCRResult): { text: string; images: string[] } { |
| let aggregatedText = ''; |
| const images: string[] = []; |
|
|
| ocrResult.pages.forEach((page: OCRResultPage, index: number) => { |
| if (ocrResult.pages.length > 1) { |
| aggregatedText += `# PAGE ${index + 1}\n`; |
| } |
|
|
| aggregatedText += page.markdown + '\n\n'; |
|
|
| if (!page.images || page.images.length === 0) { |
| return; |
| } |
|
|
| page.images.forEach((image: OCRImage) => { |
| if (image.image_base64) { |
| images.push(image.image_base64); |
| } |
| }); |
| }); |
|
|
| return { text: aggregatedText, images }; |
| } |
|
|
| |
| |
| |
| function createOCRError(error: unknown, baseMessage: string): Error { |
| const axiosError = error as AxiosError<MistralOCRError>; |
| const detail = axiosError?.response?.data?.detail; |
| const message = detail || baseMessage; |
|
|
| const responseMessage = axiosError?.response?.data?.message; |
| const errorLog = logAxiosError({ error: axiosError, message }); |
| const fullMessage = responseMessage ? `${errorLog} - ${responseMessage}` : errorLog; |
|
|
| return new Error(fullMessage); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export const uploadMistralOCR = async (context: OCRContext): Promise<MistralOCRUploadResult> => { |
| let mistralFileId: string | undefined; |
| let apiKey: string | undefined; |
| let baseURL: string | undefined; |
|
|
| try { |
| const authConfig = await loadAuthConfig(context); |
| apiKey = authConfig.apiKey; |
| baseURL = authConfig.baseURL; |
| const model = getModelConfig(context.req.config?.ocr); |
|
|
| const mistralFile = await uploadDocumentToMistral({ |
| filePath: context.file.path, |
| fileName: context.file.originalname, |
| apiKey, |
| baseURL, |
| }); |
|
|
| mistralFileId = mistralFile.id; |
|
|
| const signedUrlResponse = await getSignedUrl({ |
| apiKey, |
| baseURL, |
| fileId: mistralFile.id, |
| }); |
|
|
| const documentType = getDocumentType(context.file); |
| const ocrResult = await performOCR({ |
| url: signedUrlResponse.url, |
| documentType, |
| baseURL, |
| apiKey, |
| model, |
| }); |
|
|
| if (!ocrResult || !ocrResult.pages || ocrResult.pages.length === 0) { |
| throw new Error( |
| 'No OCR result returned from service, may be down or the file is not supported.', |
| ); |
| } |
| const { text, images } = processOCRResult(ocrResult); |
|
|
| if (mistralFileId && apiKey && baseURL) { |
| await deleteMistralFile({ fileId: mistralFileId, apiKey, baseURL }); |
| } |
|
|
| return { |
| filename: context.file.originalname, |
| bytes: text.length * 4, |
| filepath: FileSources.mistral_ocr, |
| text, |
| images, |
| }; |
| } catch (error) { |
| if (mistralFileId && apiKey && baseURL) { |
| await deleteMistralFile({ fileId: mistralFileId, apiKey, baseURL }); |
| } |
| throw createOCRError(error, 'Error uploading document to Mistral OCR API:'); |
| } |
| }; |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export const uploadAzureMistralOCR = async ( |
| context: OCRContext, |
| ): Promise<MistralOCRUploadResult> => { |
| try { |
| const { apiKey, baseURL } = await loadAuthConfig(context); |
| const model = getModelConfig(context.req.config?.ocr); |
|
|
| const { content: buffer } = await readFileAsBuffer(context.file.path, { |
| fileSize: context.file.size, |
| }); |
| const base64 = buffer.toString('base64'); |
| |
| const base64Prefix = `data:${context.file.mimetype || 'image/jpeg'};base64,`; |
|
|
| const documentType = getDocumentType(context.file); |
| const ocrResult = await performOCR({ |
| apiKey, |
| baseURL, |
| model, |
| url: `${base64Prefix}${base64}`, |
| documentType, |
| }); |
|
|
| if (!ocrResult || !ocrResult.pages || ocrResult.pages.length === 0) { |
| throw new Error( |
| 'No OCR result returned from service, may be down or the file is not supported.', |
| ); |
| } |
|
|
| const { text, images } = processOCRResult(ocrResult); |
|
|
| return { |
| filename: context.file.originalname, |
| bytes: text.length * 4, |
| filepath: FileSources.azure_mistral_ocr, |
| text, |
| images, |
| }; |
| } catch (error) { |
| throw createOCRError(error, 'Error uploading document to Azure Mistral OCR API:'); |
| } |
| }; |
|
|
| |
| |
| |
| async function loadGoogleAuthConfig(): Promise<{ |
| serviceAccount: GoogleServiceAccount; |
| accessToken: string; |
| }> { |
| |
| const serviceKeyPath = |
| process.env.GOOGLE_SERVICE_KEY_FILE || |
| path.join(__dirname, '..', '..', '..', 'api', 'data', 'auth.json'); |
|
|
| const serviceKey = await loadServiceKey(serviceKeyPath); |
|
|
| if (!serviceKey) { |
| throw new Error( |
| `Google service account not found or could not be loaded from ${serviceKeyPath}`, |
| ); |
| } |
|
|
| if (!serviceKey.client_email || !serviceKey.private_key || !serviceKey.project_id) { |
| throw new Error('Invalid Google service account configuration'); |
| } |
|
|
| const jwt = await createJWT(serviceKey as GoogleServiceAccount); |
| const accessToken = await exchangeJWTForAccessToken(jwt); |
|
|
| return { |
| serviceAccount: serviceKey as GoogleServiceAccount, |
| accessToken, |
| }; |
| } |
|
|
| |
| |
| |
| async function createJWT(serviceKey: GoogleServiceAccount): Promise<string> { |
| const crypto = await import('crypto'); |
|
|
| const header = { |
| alg: 'RS256', |
| typ: 'JWT', |
| }; |
|
|
| const now = Math.floor(Date.now() / 1000); |
| const payload = { |
| iss: serviceKey.client_email, |
| scope: 'https://www.googleapis.com/auth/cloud-platform', |
| aud: 'https://oauth2.googleapis.com/token', |
| exp: now + 3600, |
| iat: now, |
| }; |
|
|
| const encodedHeader = Buffer.from(JSON.stringify(header)).toString('base64url'); |
| const encodedPayload = Buffer.from(JSON.stringify(payload)).toString('base64url'); |
|
|
| const signatureInput = `${encodedHeader}.${encodedPayload}`; |
|
|
| const sign = crypto.createSign('RSA-SHA256'); |
| sign.update(signatureInput); |
| sign.end(); |
|
|
| const signature = sign.sign(serviceKey.private_key!, 'base64url'); |
|
|
| return `${signatureInput}.${signature}`; |
| } |
|
|
| |
| |
| |
| async function exchangeJWTForAccessToken(jwt: string): Promise<string> { |
| const config: AxiosRequestConfig = { |
| headers: { |
| 'Content-Type': 'application/x-www-form-urlencoded', |
| }, |
| }; |
|
|
| if (process.env.PROXY) { |
| config.httpsAgent = new HttpsProxyAgent(process.env.PROXY); |
| } |
|
|
| const response = await axios.post( |
| 'https://oauth2.googleapis.com/token', |
| new URLSearchParams({ |
| grant_type: 'urn:ietf:params:oauth:grant-type:jwt-bearer', |
| assertion: jwt, |
| }), |
| config, |
| ); |
|
|
| if (!response.data?.access_token) { |
| throw new Error('No access token in response'); |
| } |
|
|
| return response.data.access_token; |
| } |
|
|
| |
| |
| |
| async function performGoogleVertexOCR({ |
| url, |
| accessToken, |
| projectId, |
| model, |
| documentType = 'document_url', |
| }: { |
| url: string; |
| accessToken: string; |
| projectId: string; |
| model: string; |
| documentType?: 'document_url' | 'image_url'; |
| }): Promise<OCRResult> { |
| const location = process.env.GOOGLE_LOC || 'us-central1'; |
| const modelId = model || 'mistral-ocr-2505'; |
|
|
| let baseURL: string; |
| if (location === 'global') { |
| baseURL = `https://aiplatform.googleapis.com/v1/projects/${projectId}/locations/global/publishers/mistralai/models/${modelId}:rawPredict`; |
| } else { |
| baseURL = `https://${location}-aiplatform.googleapis.com/v1/projects/${projectId}/locations/${location}/publishers/mistralai/models/${modelId}:rawPredict`; |
| } |
|
|
| const documentKey = documentType === 'image_url' ? 'image_url' : 'document_url'; |
|
|
| const requestBody = { |
| model: modelId, |
| document: { |
| type: documentType, |
| [documentKey]: url, |
| }, |
| include_image_base64: true, |
| }; |
|
|
| logger.debug('Sending request to Google Vertex AI:', { |
| url: baseURL, |
| body: { |
| ...requestBody, |
| document: { ...requestBody.document, [documentKey]: 'base64_data_hidden' }, |
| }, |
| }); |
|
|
| const config: AxiosRequestConfig = { |
| headers: { |
| 'Content-Type': 'application/json', |
| Authorization: `Bearer ${accessToken}`, |
| Accept: 'application/json', |
| }, |
| }; |
|
|
| if (process.env.PROXY) { |
| config.httpsAgent = new HttpsProxyAgent(process.env.PROXY); |
| } |
|
|
| return axios |
| .post(baseURL, requestBody, config) |
| .then((res) => { |
| logger.debug('Google Vertex AI response received'); |
| return res.data; |
| }) |
| .catch((error) => { |
| if (error.response?.data) { |
| logger.error('Vertex AI error response: ' + JSON.stringify(error.response.data, null, 2)); |
| } |
| throw new Error( |
| logAxiosError({ |
| error: error as AxiosError, |
| message: 'Error calling Google Vertex AI Mistral OCR', |
| }), |
| ); |
| }); |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export const uploadGoogleVertexMistralOCR = async ( |
| context: OCRContext, |
| ): Promise<MistralOCRUploadResult> => { |
| try { |
| const { serviceAccount, accessToken } = await loadGoogleAuthConfig(); |
| const model = getModelConfig(context.req.config?.ocr); |
|
|
| const { content: buffer } = await readFileAsBuffer(context.file.path, { |
| fileSize: context.file.size, |
| }); |
| const base64 = buffer.toString('base64'); |
| const base64Prefix = `data:${context.file.mimetype || 'application/pdf'};base64,`; |
|
|
| const documentType = getDocumentType(context.file); |
| const ocrResult = await performGoogleVertexOCR({ |
| url: `${base64Prefix}${base64}`, |
| accessToken, |
| projectId: serviceAccount.project_id!, |
| model, |
| documentType, |
| }); |
|
|
| if (!ocrResult || !ocrResult.pages || ocrResult.pages.length === 0) { |
| throw new Error( |
| 'No OCR result returned from service, may be down or the file is not supported.', |
| ); |
| } |
|
|
| const { text, images } = processOCRResult(ocrResult); |
|
|
| return { |
| filename: context.file.originalname, |
| bytes: text.length * 4, |
| filepath: FileSources.vertexai_mistral_ocr as string, |
| text, |
| images, |
| }; |
| } catch (error) { |
| throw createOCRError(error, 'Error uploading document to Google Vertex AI Mistral OCR:'); |
| } |
| }; |
|
|