| | import axios from 'axios'; |
| | import FormData from 'form-data'; |
| | import { createReadStream } from 'fs'; |
| | import { logger } from '@librechat/data-schemas'; |
| | import { FileSources } from 'librechat-data-provider'; |
| | import type { ServerRequest } from '~/types'; |
| | import { logAxiosError, readFileAsString } from '~/utils'; |
| | import { generateShortLivedToken } from '~/crypto/jwt'; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | export async function parseText({ |
| | req, |
| | file, |
| | file_id, |
| | }: { |
| | req: ServerRequest; |
| | file: Express.Multer.File; |
| | file_id: string; |
| | }): Promise<{ text: string; bytes: number; source: string }> { |
| | if (!process.env.RAG_API_URL) { |
| | logger.debug('[parseText] RAG_API_URL not defined, falling back to native text parsing'); |
| | return parseTextNative(file); |
| | } |
| |
|
| | const userId = req.user?.id; |
| | if (!userId) { |
| | logger.debug('[parseText] No user ID provided, falling back to native text parsing'); |
| | return parseTextNative(file); |
| | } |
| |
|
| | try { |
| | const healthResponse = await axios.get(`${process.env.RAG_API_URL}/health`, { |
| | timeout: 10000, |
| | }); |
| | if (healthResponse?.statusText !== 'OK' && healthResponse?.status !== 200) { |
| | logger.debug('[parseText] RAG API health check failed, falling back to native parsing'); |
| | return parseTextNative(file); |
| | } |
| | } catch (healthError) { |
| | logAxiosError({ |
| | message: '[parseText] RAG API health check failed, falling back to native parsing:', |
| | error: healthError, |
| | }); |
| | return parseTextNative(file); |
| | } |
| |
|
| | try { |
| | const jwtToken = generateShortLivedToken(userId); |
| | const formData = new FormData(); |
| | formData.append('file_id', file_id); |
| | formData.append('file', createReadStream(file.path)); |
| |
|
| | const formHeaders = formData.getHeaders(); |
| |
|
| | const response = await axios.post(`${process.env.RAG_API_URL}/text`, formData, { |
| | headers: { |
| | Authorization: `Bearer ${jwtToken}`, |
| | accept: 'application/json', |
| | ...formHeaders, |
| | }, |
| | timeout: 300000, |
| | }); |
| |
|
| | const responseData = response.data; |
| | logger.debug(`[parseText] RAG API completed successfully (${response.status})`); |
| |
|
| | if (!('text' in responseData)) { |
| | throw new Error('RAG API did not return parsed text'); |
| | } |
| |
|
| | return { |
| | text: responseData.text, |
| | bytes: Buffer.byteLength(responseData.text, 'utf8'), |
| | source: FileSources.text, |
| | }; |
| | } catch (error) { |
| | logAxiosError({ |
| | message: '[parseText] RAG API text parsing failed, falling back to native parsing', |
| | error, |
| | }); |
| | return parseTextNative(file); |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | export async function parseTextNative(file: Express.Multer.File): Promise<{ |
| | text: string; |
| | bytes: number; |
| | source: string; |
| | }> { |
| | const { content: text, bytes } = await readFileAsString(file.path, { |
| | fileSize: file.size, |
| | }); |
| |
|
| | return { |
| | text, |
| | bytes, |
| | source: FileSources.text, |
| | }; |
| | } |
| |
|