Spaces:

NeuML
/

txtai

Running

App Files Files Community

davidmezzetti commited on Nov 12, 2021

Commit

0a6ea65

1 Parent(s): daa1a9a

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -5

app.py CHANGED Viewed

@@ -16,8 +16,9 @@ import streamlit as st
 from txtai.embeddings import Documents, Embeddings
 from txtai.pipeline import Segmentation, Summary, Tabular, Translation
-from txtai.workflow import ServiceTask, Task, Workflow
 class Application:
     """
@@ -245,6 +246,7 @@ class Application:
             dict with component settings
         """
         options = {"type": component}
         st.markdown("---")
@@ -268,8 +270,12 @@ class Application:
             options["path"] = self.text("Embeddings model path", config, "path", "sentence-transformers/nli-mpnet-base-v2")
             options["upsert"] = self.boolean("Upsert", config, "upsert")
-        elif component == "segmentation":
-            st.markdown("**Segment**  \n*Split text into semantic units*")
             options["sentences"] = self.boolean("Split sentences", config, "sentences")
             options["lines"] = self.boolean("Split lines", config, "lines")
             options["paragraphs"] = self.boolean("Split paragraphs", config, "paragraphs")
@@ -346,6 +352,10 @@ class Application:
                 self.pipelines[wtype] = Tabular(**self.components["tabular"])
                 tasks.append(Task(self.pipelines[wtype]))
             elif wtype == "translation":
                 self.pipelines[wtype] = Translation()
                 tasks.append(Task(lambda x: self.pipelines["translation"](x, **self.components["translation"])))
@@ -398,6 +408,10 @@ class Application:
                 data[wtype] = component
                 tasks.append({"action": wtype})
             elif wtype == "translation":
                 data[wtype] = {}
                 tasks.append({"action": wtype, "args": list(component.values())})
@@ -519,8 +533,8 @@ class Application:
             st.markdown("---")
             # Component configuration
-            labels = {"segmentation": "segment", "translation": "translate"}
-            components = ["embeddings", "segmentation", "service", "summary", "tabular", "translation"]
             selected, workflow = self.load(components)
             selected = st.multiselect("Select components", components, default=selected, format_func=lambda text: labels.get(text, text))

 from txtai.embeddings import Documents, Embeddings
 from txtai.pipeline import Segmentation, Summary, Tabular, Translation
+from txtai.workflow import ServiceTask, Task, UrlTask, Workflow
+from textractor import Textractor
 class Application:
     """
             dict with component settings
         """
+        # pylint: disable=R0912, R0915
         options = {"type": component}
         st.markdown("---")
             options["path"] = self.text("Embeddings model path", config, "path", "sentence-transformers/nli-mpnet-base-v2")
             options["upsert"] = self.boolean("Upsert", config, "upsert")
+        elif component in ("segmentation", "textractor"):
+            if component == "segmentation":
+                st.markdown("**Segment**  \n*Split text into semantic units*")
+            else:
+                st.markdown("**Textract**  \n*Extract text from documents*")
             options["sentences"] = self.boolean("Split sentences", config, "sentences")
             options["lines"] = self.boolean("Split lines", config, "lines")
             options["paragraphs"] = self.boolean("Split paragraphs", config, "paragraphs")
                 self.pipelines[wtype] = Tabular(**self.components["tabular"])
                 tasks.append(Task(self.pipelines[wtype]))
+            elif wtype == "textractor":
+                self.pipelines[wtype] = Textractor(**self.components["textract"])
+                tasks.append(UrlTask(self.pipelines[wtype]))
             elif wtype == "translation":
                 self.pipelines[wtype] = Translation()
                 tasks.append(Task(lambda x: self.pipelines["translation"](x, **self.components["translation"])))
                 data[wtype] = component
                 tasks.append({"action": wtype})
+            elif wtype == "textractor":
+                data[wtype] = component
+                tasks.append({"action": wtype, "task": "url"})
             elif wtype == "translation":
                 data[wtype] = {}
                 tasks.append({"action": wtype, "args": list(component.values())})
             st.markdown("---")
             # Component configuration
+            labels = {"segmentation": "segment", "textractor": "textract", "translation": "translate"}
+            components = ["embeddings", "segmentation", "service", "summary", "tabular", "textractor", "translation"]
             selected, workflow = self.load(components)
             selected = st.multiselect("Select components", components, default=selected, format_func=lambda text: labels.get(text, text))