Spaces:
Runtime error
Runtime error
remove now unnecessary post processings
Browse files
app.py
CHANGED
|
@@ -14,7 +14,6 @@ def check_additional_requirements():
|
|
| 14 |
os.system("pip install gradio==3.44.3")
|
| 15 |
else:
|
| 16 |
os.system("pip install gradio==3.44.3")
|
| 17 |
-
os.system(os.environ["DD_ADDONS"])
|
| 18 |
return
|
| 19 |
|
| 20 |
|
|
@@ -24,15 +23,12 @@ check_additional_requirements()
|
|
| 24 |
import deepdoctection as dd
|
| 25 |
from deepdoctection.dataflow.serialize import DataFromList
|
| 26 |
import time
|
| 27 |
-
from dd_addons.extern import PdfTextDetector, PostProcessor, get_xsl_path
|
| 28 |
-
from dd_addons.pipe.conn import PostProcessorService
|
| 29 |
import gradio as gr
|
| 30 |
from botocore.config import Config
|
| 31 |
|
| 32 |
|
| 33 |
# work around: https://discuss.huggingface.co/t/how-to-install-a-specific-version-of-gradio-in-spaces/13552
|
| 34 |
_DD_ONE = "conf_dd_one.yaml"
|
| 35 |
-
_XSL_PATH = get_xsl_path()
|
| 36 |
|
| 37 |
dd.ModelCatalog.register("xrf_layout/model_final_inf_only.pt",dd.ModelProfile(
|
| 38 |
name="xrf_layout/model_final_inf_only.pt",
|
|
@@ -106,8 +102,6 @@ categories_item = dd.ModelCatalog.get_profile(cfg.WEIGHTS.D2ITEM).categories
|
|
| 106 |
assert categories_item is not None
|
| 107 |
d_item = dd.D2FrcnnDetector(item_config_path, item_weights_path, categories_item, device=cfg.DEVICE)
|
| 108 |
|
| 109 |
-
# pdf miner
|
| 110 |
-
pdf_text = PdfTextDetector(_XSL_PATH)
|
| 111 |
|
| 112 |
# text detector
|
| 113 |
credentials_kwargs={"aws_access_key_id": os.environ["ACCESS_KEY"],
|
|
@@ -164,10 +158,7 @@ def build_gradio_analyzer():
|
|
| 164 |
|
| 165 |
if cfg.OCR:
|
| 166 |
|
| 167 |
-
|
| 168 |
-
pipe_component_list.append(d_text)
|
| 169 |
-
|
| 170 |
-
t_text = dd.TextExtractionService(tex_text,skip_if_text_extracted=True)
|
| 171 |
pipe_component_list.append(t_text)
|
| 172 |
|
| 173 |
match_words = dd.MatchingService(
|
|
@@ -188,10 +179,6 @@ def build_gradio_analyzer():
|
|
| 188 |
|
| 189 |
pipe = dd.DoctectionPipe(pipeline_component_list=pipe_component_list)
|
| 190 |
|
| 191 |
-
post_processor = PostProcessor("deepdoctection", **credentials_kwargs)
|
| 192 |
-
post_service = PostProcessorService(post_processor)
|
| 193 |
-
pipe_component_list.append(post_service)
|
| 194 |
-
|
| 195 |
return pipe
|
| 196 |
|
| 197 |
|
|
|
|
| 14 |
os.system("pip install gradio==3.44.3")
|
| 15 |
else:
|
| 16 |
os.system("pip install gradio==3.44.3")
|
|
|
|
| 17 |
return
|
| 18 |
|
| 19 |
|
|
|
|
| 23 |
import deepdoctection as dd
|
| 24 |
from deepdoctection.dataflow.serialize import DataFromList
|
| 25 |
import time
|
|
|
|
|
|
|
| 26 |
import gradio as gr
|
| 27 |
from botocore.config import Config
|
| 28 |
|
| 29 |
|
| 30 |
# work around: https://discuss.huggingface.co/t/how-to-install-a-specific-version-of-gradio-in-spaces/13552
|
| 31 |
_DD_ONE = "conf_dd_one.yaml"
|
|
|
|
| 32 |
|
| 33 |
dd.ModelCatalog.register("xrf_layout/model_final_inf_only.pt",dd.ModelProfile(
|
| 34 |
name="xrf_layout/model_final_inf_only.pt",
|
|
|
|
| 102 |
assert categories_item is not None
|
| 103 |
d_item = dd.D2FrcnnDetector(item_config_path, item_weights_path, categories_item, device=cfg.DEVICE)
|
| 104 |
|
|
|
|
|
|
|
| 105 |
|
| 106 |
# text detector
|
| 107 |
credentials_kwargs={"aws_access_key_id": os.environ["ACCESS_KEY"],
|
|
|
|
| 158 |
|
| 159 |
if cfg.OCR:
|
| 160 |
|
| 161 |
+
t_text = dd.TextExtractionService(tex_text)
|
|
|
|
|
|
|
|
|
|
| 162 |
pipe_component_list.append(t_text)
|
| 163 |
|
| 164 |
match_words = dd.MatchingService(
|
|
|
|
| 179 |
|
| 180 |
pipe = dd.DoctectionPipe(pipeline_component_list=pipe_component_list)
|
| 181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
return pipe
|
| 183 |
|
| 184 |
|