Alain Vaucher commited on
Commit ·
80ffb8e
1
Parent(s): 4d89100
Explicitly download the CDE data; add logs
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import functools
|
| 2 |
import html
|
| 3 |
import logging
|
|
|
|
| 4 |
import traceback
|
| 5 |
from pathlib import Path
|
| 6 |
from typing import List
|
|
@@ -10,7 +11,7 @@ import pandas as pd
|
|
| 10 |
from rxn.utilities.logging import setup_console_logger
|
| 11 |
from rxn.utilities.strings import remove_postfix
|
| 12 |
|
| 13 |
-
from utils import TranslatorWithSentencePiece, split_into_sentences
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
logger.addHandler(logging.NullHandler())
|
|
@@ -64,6 +65,10 @@ def sentence_and_actions_to_html(
|
|
| 64 |
|
| 65 |
|
| 66 |
def try_action_extraction(model_type: str, text: str, show_sentences: bool) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
model = load_model(model_type)
|
| 68 |
sentences = split_into_sentences(text)
|
| 69 |
action_strings = model.translate(sentences)
|
|
|
|
| 1 |
import functools
|
| 2 |
import html
|
| 3 |
import logging
|
| 4 |
+
import textwrap
|
| 5 |
import traceback
|
| 6 |
from pathlib import Path
|
| 7 |
from typing import List
|
|
|
|
| 11 |
from rxn.utilities.logging import setup_console_logger
|
| 12 |
from rxn.utilities.strings import remove_postfix
|
| 13 |
|
| 14 |
+
from utils import TranslatorWithSentencePiece, download_cde_data, split_into_sentences
|
| 15 |
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
logger.addHandler(logging.NullHandler())
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
def try_action_extraction(model_type: str, text: str, show_sentences: bool) -> str:
|
| 68 |
+
logger.info(f'Extracting actions from paragraph "{textwrap.shorten(text, 60)}".')
|
| 69 |
+
|
| 70 |
+
download_cde_data()
|
| 71 |
+
|
| 72 |
model = load_model(model_type)
|
| 73 |
sentences = split_into_sentences(text)
|
| 74 |
action_strings = model.translate(sentences)
|
utils.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
import logging
|
| 2 |
from typing import Iterable, Iterator, List, Union
|
| 3 |
|
| 4 |
-
import sentencepiece as spm
|
| 5 |
import chemdataextractor
|
|
|
|
|
|
|
| 6 |
from rxn.onmt_utils.internal_translation_utils import TranslationResult
|
| 7 |
from rxn.onmt_utils.translator import Translator
|
| 8 |
|
|
@@ -10,6 +11,16 @@ logger = logging.getLogger(__name__)
|
|
| 10 |
logger.addHandler(logging.NullHandler())
|
| 11 |
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def split_into_sentences(text: str) -> List[str]:
|
| 14 |
paragraph = chemdataextractor.doc.Paragraph(text)
|
| 15 |
return [sentence.text for sentence in paragraph.sentences]
|
|
|
|
| 1 |
import logging
|
| 2 |
from typing import Iterable, Iterator, List, Union
|
| 3 |
|
|
|
|
| 4 |
import chemdataextractor
|
| 5 |
+
import sentencepiece as spm
|
| 6 |
+
from chemdataextractor.data import Package
|
| 7 |
from rxn.onmt_utils.internal_translation_utils import TranslationResult
|
| 8 |
from rxn.onmt_utils.translator import Translator
|
| 9 |
|
|
|
|
| 11 |
logger.addHandler(logging.NullHandler())
|
| 12 |
|
| 13 |
|
| 14 |
+
def download_cde_data() -> None:
|
| 15 |
+
package = Package("models/punkt_chem-1.0.pickle")
|
| 16 |
+
if package.local_exists():
|
| 17 |
+
return
|
| 18 |
+
|
| 19 |
+
logger.info("Downloading the necessary ChemDataExtractor data...")
|
| 20 |
+
package.download()
|
| 21 |
+
logger.info("Downloading the necessary ChemDataExtractor data... Done.")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
def split_into_sentences(text: str) -> List[str]:
|
| 25 |
paragraph = chemdataextractor.doc.Paragraph(text)
|
| 26 |
return [sentence.text for sentence in paragraph.sentences]
|