Spaces:
Runtime error
Runtime error
update space
Browse files
app.py
CHANGED
|
@@ -1,13 +1,11 @@
|
|
| 1 |
import os
|
| 2 |
-
os.system('pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html')
|
| 3 |
-
|
| 4 |
-
credentials_kwargs={"aws_access_key_id": os.environ["ACCESS_KEY"],"aws_secret_access_key": os.environ["SECRET_KEY"]}
|
| 5 |
|
| 6 |
# work around: https://discuss.huggingface.co/t/how-to-install-a-specific-version-of-gradio-in-spaces/13552
|
| 7 |
os.system("pip uninstall -y gradio")
|
| 8 |
os.system("pip install gradio==3.4.1")
|
| 9 |
os.system(os.environ["DD_ADDONS"])
|
| 10 |
|
|
|
|
| 11 |
from os import getcwd, path, environ
|
| 12 |
import deepdoctection as dd
|
| 13 |
from deepdoctection.dataflow.serialize import DataFromList
|
|
@@ -16,6 +14,7 @@ from dd_addons.extern import PdfTextDetector, PostProcessor, get_xsl_path
|
|
| 16 |
from dd_addons.pipe.conn import PostProcessorService
|
| 17 |
|
| 18 |
import gradio as gr
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
_DD_ONE = "conf_dd_one.yaml"
|
|
@@ -97,6 +96,9 @@ d_item = dd.D2FrcnnDetector(item_config_path, item_weights_path, categories_item
|
|
| 97 |
pdf_text = PdfTextDetector(_XSL_PATH)
|
| 98 |
|
| 99 |
# text detector
|
|
|
|
|
|
|
|
|
|
| 100 |
tex_text = dd.TextractOcrDetector(**credentials_kwargs)
|
| 101 |
|
| 102 |
|
|
@@ -161,10 +163,9 @@ def build_gradio_analyzer():
|
|
| 161 |
|
| 162 |
order = dd.TextOrderService(
|
| 163 |
text_container=cfg.TEXT_ORDERING.TEXT_CONTAINER,
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
)
|
| 168 |
pipe_component_list.append(order)
|
| 169 |
|
| 170 |
pipe = dd.DoctectionPipe(pipeline_component_list=pipe_component_list)
|
|
@@ -182,7 +183,7 @@ def analyze_image(img, pdf, max_datapoints):
|
|
| 182 |
analyzer = build_gradio_analyzer()
|
| 183 |
|
| 184 |
if img is not None:
|
| 185 |
-
image = dd.Image(file_name="
|
| 186 |
image.image = img[:, :, ::-1]
|
| 187 |
|
| 188 |
df = DataFromList(lst=[image])
|
|
|
|
| 1 |
import os
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
# work around: https://discuss.huggingface.co/t/how-to-install-a-specific-version-of-gradio-in-spaces/13552
|
| 4 |
os.system("pip uninstall -y gradio")
|
| 5 |
os.system("pip install gradio==3.4.1")
|
| 6 |
os.system(os.environ["DD_ADDONS"])
|
| 7 |
|
| 8 |
+
import time
|
| 9 |
from os import getcwd, path, environ
|
| 10 |
import deepdoctection as dd
|
| 11 |
from deepdoctection.dataflow.serialize import DataFromList
|
|
|
|
| 14 |
from dd_addons.pipe.conn import PostProcessorService
|
| 15 |
|
| 16 |
import gradio as gr
|
| 17 |
+
from botocore.config import Config
|
| 18 |
|
| 19 |
|
| 20 |
_DD_ONE = "conf_dd_one.yaml"
|
|
|
|
| 96 |
pdf_text = PdfTextDetector(_XSL_PATH)
|
| 97 |
|
| 98 |
# text detector
|
| 99 |
+
credentials_kwargs={"aws_access_key_id": os.environ["ACCESS_KEY"],
|
| 100 |
+
"aws_secret_access_key": os.environ["SECRET_KEY"],
|
| 101 |
+
"config": Config(region_name=os.environ["REGION"])}
|
| 102 |
tex_text = dd.TextractOcrDetector(**credentials_kwargs)
|
| 103 |
|
| 104 |
|
|
|
|
| 163 |
|
| 164 |
order = dd.TextOrderService(
|
| 165 |
text_container=cfg.TEXT_ORDERING.TEXT_CONTAINER,
|
| 166 |
+
floating_text_block_categories=cfg.TEXT_ORDERING.FLOATING_TEXT_BLOCK,
|
| 167 |
+
text_block_categories=cfg.TEXT_ORDERING.TEXT_BLOCK,
|
| 168 |
+
include_residual_text_container=cfg.TEXT_ORDERING.TEXT_CONTAINER_TO_TEXT_BLOCK)
|
|
|
|
| 169 |
pipe_component_list.append(order)
|
| 170 |
|
| 171 |
pipe = dd.DoctectionPipe(pipeline_component_list=pipe_component_list)
|
|
|
|
| 183 |
analyzer = build_gradio_analyzer()
|
| 184 |
|
| 185 |
if img is not None:
|
| 186 |
+
image = dd.Image(file_name=str(time.time()).replace(".","") + ".png", location="")
|
| 187 |
image.image = img[:, :, ::-1]
|
| 188 |
|
| 189 |
df = DataFromList(lst=[image])
|