daihui.zhang
commited on
Commit
·
4d9049a
1
Parent(s):
eabbf72
fix bug of punctuation
Browse files
config.py
CHANGED
|
@@ -23,13 +23,13 @@ PAUSE_END_MARKERS = [',', ',', '、']
|
|
| 23 |
# 合并所有标点
|
| 24 |
ALL_MARKERS = SENTENCE_END_MARKERS + PAUSE_END_MARKERS
|
| 25 |
# 构造正则表达式字符类
|
| 26 |
-
REGEX_MARKERS = re.compile(r'[' + re.escape(''.join(ALL_MARKERS)) + r']')
|
| 27 |
|
| 28 |
sentence_end_chars = ''.join([re.escape(char) for char in SENTENCE_END_MARKERS])
|
| 29 |
SENTENCE_END_PATTERN = re.compile(f'[{sentence_end_chars}]')
|
| 30 |
|
| 31 |
# Method 2: Alternative approach with a character class
|
| 32 |
-
pattern_string = '[' + ''.join([re.escape(char) for char in PAUSE_END_MARKERS]) + ']'
|
| 33 |
PAUSEE_END_PATTERN = re.compile(pattern_string)
|
| 34 |
# whisper推理参数
|
| 35 |
WHISPER_PROMPT_ZH = "以下是简体中文普通话的句子。"
|
|
|
|
| 23 |
# 合并所有标点
|
| 24 |
ALL_MARKERS = SENTENCE_END_MARKERS + PAUSE_END_MARKERS
|
| 25 |
# 构造正则表达式字符类
|
| 26 |
+
REGEX_MARKERS = re.compile(r'[' + re.escape(''.join(ALL_MARKERS)) + r']$')
|
| 27 |
|
| 28 |
sentence_end_chars = ''.join([re.escape(char) for char in SENTENCE_END_MARKERS])
|
| 29 |
SENTENCE_END_PATTERN = re.compile(f'[{sentence_end_chars}]')
|
| 30 |
|
| 31 |
# Method 2: Alternative approach with a character class
|
| 32 |
+
pattern_string = '[' + ''.join([re.escape(char) for char in PAUSE_END_MARKERS]) + r']$'
|
| 33 |
PAUSEE_END_PATTERN = re.compile(pattern_string)
|
| 34 |
# whisper推理参数
|
| 35 |
WHISPER_PROMPT_ZH = "以下是简体中文普通话的句子。"
|