Spaces:
Sleeping
Sleeping
Kolesnikov Dmitry
commited on
Commit
·
4c753f6
1
Parent(s):
68545bc
fix: Неверный replace_urls
Browse files- src/streamlit_app.py +2 -2
src/streamlit_app.py
CHANGED
|
@@ -30,7 +30,7 @@ if project_root not in sys.path:
|
|
| 30 |
|
| 31 |
# Импорты наших модулей
|
| 32 |
from src.text_cleaner import clean_text, clean_corpus_jsonl
|
| 33 |
-
from src.universal_preprocessor import UniversalPreprocessor, PreprocessingConfig
|
| 34 |
from src.tokenizers_cmp import TokenizationComparator, load_corpus_from_jsonl
|
| 35 |
from src.train_subword import SubwordModelTrainer, SubwordModelConfig
|
| 36 |
from src.classical_vectorizers import (
|
|
@@ -312,7 +312,7 @@ def main():
|
|
| 312 |
# Применяем предобработку и очистку, параллельно сохраняя обе версии
|
| 313 |
processed_texts = list(raw_texts)
|
| 314 |
if use_preprocessing:
|
| 315 |
-
config =
|
| 316 |
preprocessor = UniversalPreprocessor(config)
|
| 317 |
tmp = []
|
| 318 |
for text in raw_texts:
|
|
|
|
| 30 |
|
| 31 |
# Импорты наших модулей
|
| 32 |
from src.text_cleaner import clean_text, clean_corpus_jsonl
|
| 33 |
+
from src.universal_preprocessor import UniversalPreprocessor, PreprocessingConfig as UniversalPreprocessingConfig
|
| 34 |
from src.tokenizers_cmp import TokenizationComparator, load_corpus_from_jsonl
|
| 35 |
from src.train_subword import SubwordModelTrainer, SubwordModelConfig
|
| 36 |
from src.classical_vectorizers import (
|
|
|
|
| 312 |
# Применяем предобработку и очистку, параллельно сохраняя обе версии
|
| 313 |
processed_texts = list(raw_texts)
|
| 314 |
if use_preprocessing:
|
| 315 |
+
config = UniversalPreprocessingConfig(**preprocessing_options)
|
| 316 |
preprocessor = UniversalPreprocessor(config)
|
| 317 |
tmp = []
|
| 318 |
for text in raw_texts:
|