| import type { FC } from 'react' |
| import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react' |
| import useSWR from 'swr' |
| import { useRouter } from 'next/navigation' |
| import { useTranslation } from 'react-i18next' |
| import { omit } from 'lodash-es' |
| import { ArrowRightIcon } from '@heroicons/react/24/solid' |
| import { |
| RiErrorWarningFill, |
| } from '@remixicon/react' |
| import s from './index.module.css' |
| import cn from '@/utils/classnames' |
| import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata' |
| import Button from '@/app/components/base/button' |
| import type { FullDocumentDetail, IndexingStatusResponse, ProcessRuleResponse } from '@/models/datasets' |
| import { fetchIndexingStatusBatch as doFetchIndexingStatus, fetchProcessRule } from '@/service/datasets' |
| import { DataSourceType } from '@/models/datasets' |
| import NotionIcon from '@/app/components/base/notion-icon' |
| import PriorityLabel from '@/app/components/billing/priority-label' |
| import { Plan } from '@/app/components/billing/type' |
| import { ZapFast } from '@/app/components/base/icons/src/vender/solid/general' |
| import UpgradeBtn from '@/app/components/billing/upgrade-btn' |
| import { useProviderContext } from '@/context/provider-context' |
| import Tooltip from '@/app/components/base/tooltip' |
| import { sleep } from '@/utils' |
|
|
| type Props = { |
| datasetId: string |
| batchId: string |
| documents?: FullDocumentDetail[] |
| indexingType?: string |
| } |
|
|
| const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) => { |
| const { t } = useTranslation() |
|
|
| const segmentationRuleMap = { |
| mode: t('datasetDocuments.embedding.mode'), |
| segmentLength: t('datasetDocuments.embedding.segmentLength'), |
| textCleaning: t('datasetDocuments.embedding.textCleaning'), |
| } |
|
|
| const getRuleName = (key: string) => { |
| if (key === 'remove_extra_spaces') |
| return t('datasetCreation.stepTwo.removeExtraSpaces') |
|
|
| if (key === 'remove_urls_emails') |
| return t('datasetCreation.stepTwo.removeUrlEmails') |
|
|
| if (key === 'remove_stopwords') |
| return t('datasetCreation.stepTwo.removeStopwords') |
| } |
|
|
| const getValue = useCallback((field: string) => { |
| let value: string | number | undefined = '-' |
| switch (field) { |
| case 'mode': |
| value = sourceData?.mode === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string) |
| break |
| case 'segmentLength': |
| value = sourceData?.rules?.segmentation?.max_tokens |
| break |
| default: |
| value = sourceData?.mode === 'automatic' |
| ? (t('datasetDocuments.embedding.automatic') as string) |
| |
| : sourceData?.rules?.pre_processing_rules?.map((rule) => { |
| if (rule.enabled) |
| return getRuleName(rule.id) |
| }).filter(Boolean).join(';') |
| break |
| } |
| return value |
| }, [sourceData]) |
|
|
| return <div className='flex flex-col pt-8 pb-10 first:mt-0'> |
| {Object.keys(segmentationRuleMap).map((field) => { |
| return <FieldInfo |
| key={field} |
| label={segmentationRuleMap[field as keyof typeof segmentationRuleMap]} |
| displayedValue={String(getValue(field))} |
| /> |
| })} |
| </div> |
| } |
|
|
| const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType }) => { |
| const { t } = useTranslation() |
| const { enableBilling, plan } = useProviderContext() |
|
|
| const getFirstDocument = documents[0] |
|
|
| const [indexingStatusBatchDetail, setIndexingStatusDetail] = useState<IndexingStatusResponse[]>([]) |
| const fetchIndexingStatus = async () => { |
| const status = await doFetchIndexingStatus({ datasetId, batchId }) |
| setIndexingStatusDetail(status.data) |
| return status.data |
| } |
|
|
| const [isStopQuery, setIsStopQuery] = useState(false) |
| const isStopQueryRef = useRef(isStopQuery) |
| useEffect(() => { |
| isStopQueryRef.current = isStopQuery |
| }, [isStopQuery]) |
| const stopQueryStatus = () => { |
| setIsStopQuery(true) |
| } |
|
|
| const startQueryStatus = async () => { |
| if (isStopQueryRef.current) |
| return |
|
|
| try { |
| const indexingStatusBatchDetail = await fetchIndexingStatus() |
| const isCompleted = indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail.indexing_status)) |
| if (isCompleted) { |
| stopQueryStatus() |
| return |
| } |
| await sleep(2500) |
| await startQueryStatus() |
| } |
| catch (e) { |
| await sleep(2500) |
| await startQueryStatus() |
| } |
| } |
|
|
| useEffect(() => { |
| startQueryStatus() |
| return () => { |
| stopQueryStatus() |
| } |
| |
| }, []) |
|
|
| |
| const { data: ruleDetail } = useSWR({ |
| action: 'fetchProcessRule', |
| params: { documentId: getFirstDocument.id }, |
| }, apiParams => fetchProcessRule(omit(apiParams, 'action')), { |
| revalidateOnFocus: false, |
| }) |
|
|
| const router = useRouter() |
| const navToDocumentList = () => { |
| router.push(`/datasets/${datasetId}/documents`) |
| } |
|
|
| const isEmbedding = useMemo(() => { |
| return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || '')) |
| }, [indexingStatusBatchDetail]) |
| const isEmbeddingCompleted = useMemo(() => { |
| return indexingStatusBatchDetail.every(indexingStatusDetail => ['completed', 'error', 'paused'].includes(indexingStatusDetail?.indexing_status || '')) |
| }, [indexingStatusBatchDetail]) |
|
|
| const getSourceName = (id: string) => { |
| const doc = documents.find(document => document.id === id) |
| return doc?.name |
| } |
| const getFileType = (name?: string) => name?.split('.').pop() || 'txt' |
| const getSourcePercent = (detail: IndexingStatusResponse) => { |
| const completedCount = detail.completed_segments || 0 |
| const totalCount = detail.total_segments || 0 |
| if (totalCount === 0) |
| return 0 |
| const percent = Math.round(completedCount * 100 / totalCount) |
| return percent > 100 ? 100 : percent |
| } |
| const getSourceType = (id: string) => { |
| const doc = documents.find(document => document.id === id) |
| return doc?.data_source_type as DataSourceType |
| } |
|
|
| const getIcon = (id: string) => { |
| const doc = documents.find(document => document.id === id) |
|
|
| return doc?.data_source_info.notion_page_icon |
| } |
| const isSourceEmbedding = (detail: IndexingStatusResponse) => ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '') |
|
|
| return ( |
| <> |
| <div className='h-5 flex items-center mb-5'> |
| <div className={s.embeddingStatus}> |
| {isEmbedding && t('datasetDocuments.embedding.processing')} |
| {isEmbeddingCompleted && t('datasetDocuments.embedding.completed')} |
| </div> |
| </div> |
| { |
| enableBilling && plan.type !== Plan.team && ( |
| <div className='flex items-center mb-3 p-3 h-14 bg-white border-[0.5px] border-black/5 shadow-md rounded-xl'> |
| <div className='shrink-0 flex items-center justify-center w-8 h-8 bg-[#FFF6ED] rounded-lg'> |
| <ZapFast className='w-4 h-4 text-[#FB6514]' /> |
| </div> |
| <div className='grow mx-3 text-[13px] font-medium text-gray-700'> |
| {t('billing.plansCommon.documentProcessingPriorityUpgrade')} |
| </div> |
| <UpgradeBtn loc='knowledge-speed-up' /> |
| </div> |
| ) |
| } |
| <div className={s.progressContainer}> |
| {indexingStatusBatchDetail.map(indexingStatusDetail => ( |
| <div key={indexingStatusDetail.id} className={cn( |
| s.sourceItem, |
| indexingStatusDetail.indexing_status === 'error' && s.error, |
| indexingStatusDetail.indexing_status === 'completed' && s.success, |
| )}> |
| {isSourceEmbedding(indexingStatusDetail) && ( |
| <div className={s.progressbar} style={{ width: `${getSourcePercent(indexingStatusDetail)}%` }} /> |
| )} |
| <div className={`${s.info} grow`}> |
| {getSourceType(indexingStatusDetail.id) === DataSourceType.FILE && ( |
| <div className={cn(s.fileIcon, s[getFileType(getSourceName(indexingStatusDetail.id))])} /> |
| )} |
| {getSourceType(indexingStatusDetail.id) === DataSourceType.NOTION && ( |
| <NotionIcon |
| className='shrink-0 mr-1' |
| type='page' |
| src={getIcon(indexingStatusDetail.id)} |
| /> |
| )} |
| <div className={`${s.name} truncate`} title={getSourceName(indexingStatusDetail.id)}>{getSourceName(indexingStatusDetail.id)}</div> |
| { |
| enableBilling && ( |
| <PriorityLabel /> |
| ) |
| } |
| </div> |
| <div className='shrink-0'> |
| {isSourceEmbedding(indexingStatusDetail) && ( |
| <div className={s.percent}>{`${getSourcePercent(indexingStatusDetail)}%`}</div> |
| )} |
| {indexingStatusDetail.indexing_status === 'error' && indexingStatusDetail.error && ( |
| <Tooltip |
| popupContent={( |
| <div className='max-w-[400px]'> |
| {indexingStatusDetail.error} |
| </div> |
| )} |
| > |
| <div className={cn(s.percent, s.error, 'flex items-center')}> |
| Error |
| <RiErrorWarningFill className='ml-1 w-4 h-4' /> |
| </div> |
| </Tooltip> |
| )} |
| {indexingStatusDetail.indexing_status === 'error' && !indexingStatusDetail.error && ( |
| <div className={cn(s.percent, s.error, 'flex items-center')}> |
| Error |
| </div> |
| )} |
| {indexingStatusDetail.indexing_status === 'completed' && ( |
| <div className={cn(s.percent, s.success)}>100%</div> |
| )} |
| </div> |
| </div> |
| ))} |
| </div> |
| <RuleDetail sourceData={ruleDetail} /> |
| <div className='flex items-center gap-2 mt-10'> |
| <Button className='w-fit' variant='primary' onClick={navToDocumentList}> |
| <span>{t('datasetCreation.stepThree.navTo')}</span> |
| <ArrowRightIcon className='h-4 w-4 ml-2 stroke-current stroke-1' /> |
| </Button> |
| </div> |
| </> |
| ) |
| } |
|
|
| export default EmbeddingProcess |
|
|