balibabu
commited on
Commit
·
f28c040
1
Parent(s):
eb0c846
feat: Add delimiter field to naive parsing method #1909 (#1911)
Browse files### What problem does this PR solve?
feat: Add delimiter field to naive parsing method #1909
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
web/src/components/chunk-method-modal/index.tsx
CHANGED
|
@@ -22,6 +22,7 @@ import React, { useEffect, useMemo } from 'react';
|
|
| 22 |
import { useFetchParserListOnMount } from './hooks';
|
| 23 |
|
| 24 |
import { useTranslate } from '@/hooks/common-hooks';
|
|
|
|
| 25 |
import EntityTypesItem from '../entity-types-item';
|
| 26 |
import LayoutRecognize from '../layout-recognize';
|
| 27 |
import ParseConfiguration, {
|
|
@@ -268,7 +269,12 @@ const ChunkMethodModal: React.FC<IProps> = ({
|
|
| 268 |
}
|
| 269 |
</Form.Item>
|
| 270 |
)}
|
| 271 |
-
{showMaxTokenNumber &&
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
{showRaptorParseConfiguration(selectedTag) && (
|
| 273 |
<ParseConfiguration></ParseConfiguration>
|
| 274 |
)}
|
|
|
|
| 22 |
import { useFetchParserListOnMount } from './hooks';
|
| 23 |
|
| 24 |
import { useTranslate } from '@/hooks/common-hooks';
|
| 25 |
+
import Delimiter from '../delimiter';
|
| 26 |
import EntityTypesItem from '../entity-types-item';
|
| 27 |
import LayoutRecognize from '../layout-recognize';
|
| 28 |
import ParseConfiguration, {
|
|
|
|
| 269 |
}
|
| 270 |
</Form.Item>
|
| 271 |
)}
|
| 272 |
+
{showMaxTokenNumber && (
|
| 273 |
+
<>
|
| 274 |
+
<MaxTokenNumber></MaxTokenNumber>
|
| 275 |
+
<Delimiter></Delimiter>
|
| 276 |
+
</>
|
| 277 |
+
)}
|
| 278 |
{showRaptorParseConfiguration(selectedTag) && (
|
| 279 |
<ParseConfiguration></ParseConfiguration>
|
| 280 |
)}
|
web/src/components/delimiter.tsx
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { Form, Input } from 'antd';
|
| 2 |
+
import { useTranslation } from 'react-i18next';
|
| 3 |
+
|
| 4 |
+
interface IProps {
|
| 5 |
+
value?: string | undefined;
|
| 6 |
+
onChange?: (val: string | undefined) => void;
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
const DelimiterInput = ({ value, onChange }: IProps) => {
|
| 10 |
+
const nextValue = value?.replaceAll('\n', '\\n');
|
| 11 |
+
const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
|
| 12 |
+
const val = e.target.value;
|
| 13 |
+
const nextValue = val.replaceAll('\\n', '\n');
|
| 14 |
+
onChange?.(nextValue);
|
| 15 |
+
};
|
| 16 |
+
return <Input value={nextValue} onChange={handleInputChange}></Input>;
|
| 17 |
+
};
|
| 18 |
+
|
| 19 |
+
const Delimiter = () => {
|
| 20 |
+
const { t } = useTranslation();
|
| 21 |
+
|
| 22 |
+
return (
|
| 23 |
+
<Form.Item
|
| 24 |
+
name={['parser_config', 'delimiter']}
|
| 25 |
+
label={t('knowledgeDetails.delimiter')}
|
| 26 |
+
initialValue={`\\n!?;。;!?`}
|
| 27 |
+
rules={[{ required: true }]}
|
| 28 |
+
>
|
| 29 |
+
<DelimiterInput />
|
| 30 |
+
</Form.Item>
|
| 31 |
+
);
|
| 32 |
+
};
|
| 33 |
+
|
| 34 |
+
export default Delimiter;
|
web/src/locales/en.ts
CHANGED
|
@@ -148,6 +148,7 @@ export default {
|
|
| 148 |
rerankTip: `If it's empty. It uses embeddings of query and chunks to compuste vector cosine similarity. Otherwise, it uses rerank score in place of vector cosine similarity.`,
|
| 149 |
topK: 'Top-K',
|
| 150 |
topKTip: `K chunks will be fed into rerank models.`,
|
|
|
|
| 151 |
},
|
| 152 |
knowledgeConfiguration: {
|
| 153 |
titleDescription:
|
|
|
|
| 148 |
rerankTip: `If it's empty. It uses embeddings of query and chunks to compuste vector cosine similarity. Otherwise, it uses rerank score in place of vector cosine similarity.`,
|
| 149 |
topK: 'Top-K',
|
| 150 |
topKTip: `K chunks will be fed into rerank models.`,
|
| 151 |
+
delimiter: `Delimiter`,
|
| 152 |
},
|
| 153 |
knowledgeConfiguration: {
|
| 154 |
titleDescription:
|
web/src/locales/zh-traditional.ts
CHANGED
|
@@ -144,6 +144,7 @@ export default {
|
|
| 144 |
rerankTip: `如果是空的。它使用查詢和塊的嵌入來構成矢量餘弦相似性。否則,它使用rerank評分代替矢量餘弦相似性。`,
|
| 145 |
topK: 'Top-K',
|
| 146 |
topKTip: `K塊將被送入Rerank型號。`,
|
|
|
|
| 147 |
},
|
| 148 |
knowledgeConfiguration: {
|
| 149 |
titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',
|
|
|
|
| 144 |
rerankTip: `如果是空的。它使用查詢和塊的嵌入來構成矢量餘弦相似性。否則,它使用rerank評分代替矢量餘弦相似性。`,
|
| 145 |
topK: 'Top-K',
|
| 146 |
topKTip: `K塊將被送入Rerank型號。`,
|
| 147 |
+
delimiter: `分段標識符`,
|
| 148 |
},
|
| 149 |
knowledgeConfiguration: {
|
| 150 |
titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',
|
web/src/locales/zh.ts
CHANGED
|
@@ -145,6 +145,7 @@ export default {
|
|
| 145 |
rerankTip: `如果是空的。它使用查询和块的嵌入来构成矢量余弦相似性。否则,它使用rerank评分代替矢量余弦相似性。`,
|
| 146 |
topK: 'Top-K',
|
| 147 |
topKTip: `K块将被送入Rerank型号。`,
|
|
|
|
| 148 |
},
|
| 149 |
knowledgeConfiguration: {
|
| 150 |
titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',
|
|
|
|
| 145 |
rerankTip: `如果是空的。它使用查询和块的嵌入来构成矢量余弦相似性。否则,它使用rerank评分代替矢量余弦相似性。`,
|
| 146 |
topK: 'Top-K',
|
| 147 |
topKTip: `K块将被送入Rerank型号。`,
|
| 148 |
+
delimiter: `分段标识符`,
|
| 149 |
},
|
| 150 |
knowledgeConfiguration: {
|
| 151 |
titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',
|
web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import EntityTypesItem from '@/components/entity-types-item';
|
| 2 |
import LayoutRecognize from '@/components/layout-recognize';
|
| 3 |
import MaxTokenNumber from '@/components/max-token-number';
|
|
@@ -111,6 +112,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
|
|
| 111 |
{parserId === 'naive' && (
|
| 112 |
<>
|
| 113 |
<MaxTokenNumber></MaxTokenNumber>
|
|
|
|
| 114 |
<LayoutRecognize></LayoutRecognize>
|
| 115 |
</>
|
| 116 |
)}
|
|
|
|
| 1 |
+
import Delimiter from '@/components/delimiter';
|
| 2 |
import EntityTypesItem from '@/components/entity-types-item';
|
| 3 |
import LayoutRecognize from '@/components/layout-recognize';
|
| 4 |
import MaxTokenNumber from '@/components/max-token-number';
|
|
|
|
| 112 |
{parserId === 'naive' && (
|
| 113 |
<>
|
| 114 |
<MaxTokenNumber></MaxTokenNumber>
|
| 115 |
+
<Delimiter></Delimiter>
|
| 116 |
<LayoutRecognize></LayoutRecognize>
|
| 117 |
</>
|
| 118 |
)}
|