Spaces:

klasocki
/

comma-fixer

Running

klasocki commited on Aug 25, 2023

Commit

8868222

1 Parent(s): f42d24c

Refactor, introduce CommaFixerInterface and remove duplication

Files changed (6) hide show

commafixer/routers/baseline.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from fastapi import APIRouter, HTTPException
 import logging
 from commafixer.src.baseline import BaselineCommaFixer
 logger = logging.Logger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -16,10 +16,4 @@ router.model = BaselineCommaFixer()
 @router.post('/fix-commas/')
 async def fix_commas_with_baseline(data: dict):
     json_field_name = 's'
-    if json_field_name in data:
-        logger.debug('Fixing commas.')
-        return {json_field_name: router.model.fix_commas(data['s'])}
-    else:
-        msg = f"Text '{json_field_name}' missing"
-        logger.debug(msg)
-        raise HTTPException(status_code=400, detail=msg)

+from fastapi import APIRouter
 import logging
 from commafixer.src.baseline import BaselineCommaFixer
+from common import fix_commas_request_handler
 logger = logging.Logger(__name__)
 logging.basicConfig(level=logging.INFO)
 @router.post('/fix-commas/')
 async def fix_commas_with_baseline(data: dict):
     json_field_name = 's'
+    return fix_commas_request_handler(json_field_name, data, logger, router.model)

commafixer/routers/common.py ADDED Viewed

+from fastapi import HTTPException
+from logging import Logger
+from comma_fixer_interface import CommaFixerInterface
+def fix_commas_request_handler(
+        json_field_name: str,
+        data: dict[str, str],
+        logger: Logger,
+        model: CommaFixerInterface
+) -> dict[str, str]:
+    if json_field_name in data:
+        logger.debug('Fixing commas.')
+        return {json_field_name: model.fix_commas(data['s'])}
+    else:
+        msg = f"Text '{json_field_name}' missing"
+        logger.debug(msg)
+        raise HTTPException(status_code=400, detail=msg)

commafixer/routers/fixer.py CHANGED Viewed

@@ -2,6 +2,7 @@ from fastapi import APIRouter, HTTPException
 import logging
 from commafixer.src.fixer import CommaFixer
 logger = logging.Logger(__name__)
@@ -16,10 +17,4 @@ router.model = CommaFixer()
 @router.post('/')
 async def fix_commas(data: dict):
     json_field_name = 's'
-    if json_field_name in data:
-        logger.debug('Fixing commas.')
-        return {json_field_name: router.model.fix_commas(data['s'])}
-    else:
-        msg = f"Text '{json_field_name}' missing"
-        logger.debug(msg)
-        raise HTTPException(status_code=400, detail=msg)

 import logging
 from commafixer.src.fixer import CommaFixer
+from commafixer.routers.common import fix_commas_request_handler
 logger = logging.Logger(__name__)
 @router.post('/')
 async def fix_commas(data: dict):
     json_field_name = 's'
+    return fix_commas_request_handler(json_field_name, data, logger, router.model)

commafixer/src/baseline.py CHANGED Viewed

@@ -1,8 +1,10 @@
 from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline, NerPipeline
 import re
-class BaselineCommaFixer:
     """
     A wrapper class for the oliverguhr/fullstop-punctuation-multilang-large baseline punctuation restoration model.
     It adapts the model to perform comma fixing instead of full punctuation restoration, that is, removes the

 from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline, NerPipeline
 import re
+from commafixer.src.comma_fixer_interface import CommaFixerInterface
+class BaselineCommaFixer(CommaFixerInterface):
     """
     A wrapper class for the oliverguhr/fullstop-punctuation-multilang-large baseline punctuation restoration model.
     It adapts the model to perform comma fixing instead of full punctuation restoration, that is, removes the

commafixer/src/comma_fixer_interface.py ADDED Viewed

+from abc import ABC, abstractmethod
+class CommaFixerInterface(ABC):
+    @abstractmethod
+    def fix_commas(self, s: str) -> str:
+        pass

commafixer/src/fixer.py CHANGED Viewed

@@ -3,8 +3,10 @@ from transformers import AutoTokenizer, AutoModelForTokenClassification, pipelin
 import nltk
 import re
-class CommaFixer:
     """
     A wrapper class for the fine-tuned comma fixer model.
     """
@@ -84,7 +86,7 @@ def _fix_commas_based_on_labels_and_offsets(
 def _should_insert_comma(label, result, current_offset) -> bool:
     # Only insert commas for the final token of a word, that is, if next word starts with a space.
-    # TODO perharps for low confidence tokens, we should use the original decision of the user in the input?
     return label == 'B-COMMA' and result[current_offset].isspace()

 import nltk
 import re
+from commafixer.src.comma_fixer_interface import CommaFixerInterface
+class CommaFixer(CommaFixerInterface):
     """
     A wrapper class for the fine-tuned comma fixer model.
     """
 def _should_insert_comma(label, result, current_offset) -> bool:
     # Only insert commas for the final token of a word, that is, if next word starts with a space.
+    # TODO perhaps for low confidence tokens, we should use the original decision of the user in the input?
     return label == 'B-COMMA' and result[current_offset].isspace()