Commit
·
0a6ea65
1
Parent(s):
daa1a9a
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,8 +16,9 @@ import streamlit as st
|
|
| 16 |
|
| 17 |
from txtai.embeddings import Documents, Embeddings
|
| 18 |
from txtai.pipeline import Segmentation, Summary, Tabular, Translation
|
| 19 |
-
from txtai.workflow import ServiceTask, Task, Workflow
|
| 20 |
|
|
|
|
| 21 |
|
| 22 |
class Application:
|
| 23 |
"""
|
|
@@ -245,6 +246,7 @@ class Application:
|
|
| 245 |
dict with component settings
|
| 246 |
"""
|
| 247 |
|
|
|
|
| 248 |
options = {"type": component}
|
| 249 |
|
| 250 |
st.markdown("---")
|
|
@@ -268,8 +270,12 @@ class Application:
|
|
| 268 |
options["path"] = self.text("Embeddings model path", config, "path", "sentence-transformers/nli-mpnet-base-v2")
|
| 269 |
options["upsert"] = self.boolean("Upsert", config, "upsert")
|
| 270 |
|
| 271 |
-
elif component
|
| 272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
options["sentences"] = self.boolean("Split sentences", config, "sentences")
|
| 274 |
options["lines"] = self.boolean("Split lines", config, "lines")
|
| 275 |
options["paragraphs"] = self.boolean("Split paragraphs", config, "paragraphs")
|
|
@@ -346,6 +352,10 @@ class Application:
|
|
| 346 |
self.pipelines[wtype] = Tabular(**self.components["tabular"])
|
| 347 |
tasks.append(Task(self.pipelines[wtype]))
|
| 348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
elif wtype == "translation":
|
| 350 |
self.pipelines[wtype] = Translation()
|
| 351 |
tasks.append(Task(lambda x: self.pipelines["translation"](x, **self.components["translation"])))
|
|
@@ -398,6 +408,10 @@ class Application:
|
|
| 398 |
data[wtype] = component
|
| 399 |
tasks.append({"action": wtype})
|
| 400 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
elif wtype == "translation":
|
| 402 |
data[wtype] = {}
|
| 403 |
tasks.append({"action": wtype, "args": list(component.values())})
|
|
@@ -519,8 +533,8 @@ class Application:
|
|
| 519 |
st.markdown("---")
|
| 520 |
|
| 521 |
# Component configuration
|
| 522 |
-
labels = {"segmentation": "segment", "translation": "translate"}
|
| 523 |
-
components = ["embeddings", "segmentation", "service", "summary", "tabular", "translation"]
|
| 524 |
|
| 525 |
selected, workflow = self.load(components)
|
| 526 |
selected = st.multiselect("Select components", components, default=selected, format_func=lambda text: labels.get(text, text))
|
|
|
|
| 16 |
|
| 17 |
from txtai.embeddings import Documents, Embeddings
|
| 18 |
from txtai.pipeline import Segmentation, Summary, Tabular, Translation
|
| 19 |
+
from txtai.workflow import ServiceTask, Task, UrlTask, Workflow
|
| 20 |
|
| 21 |
+
from textractor import Textractor
|
| 22 |
|
| 23 |
class Application:
|
| 24 |
"""
|
|
|
|
| 246 |
dict with component settings
|
| 247 |
"""
|
| 248 |
|
| 249 |
+
# pylint: disable=R0912, R0915
|
| 250 |
options = {"type": component}
|
| 251 |
|
| 252 |
st.markdown("---")
|
|
|
|
| 270 |
options["path"] = self.text("Embeddings model path", config, "path", "sentence-transformers/nli-mpnet-base-v2")
|
| 271 |
options["upsert"] = self.boolean("Upsert", config, "upsert")
|
| 272 |
|
| 273 |
+
elif component in ("segmentation", "textractor"):
|
| 274 |
+
if component == "segmentation":
|
| 275 |
+
st.markdown("**Segment** \n*Split text into semantic units*")
|
| 276 |
+
else:
|
| 277 |
+
st.markdown("**Textract** \n*Extract text from documents*")
|
| 278 |
+
|
| 279 |
options["sentences"] = self.boolean("Split sentences", config, "sentences")
|
| 280 |
options["lines"] = self.boolean("Split lines", config, "lines")
|
| 281 |
options["paragraphs"] = self.boolean("Split paragraphs", config, "paragraphs")
|
|
|
|
| 352 |
self.pipelines[wtype] = Tabular(**self.components["tabular"])
|
| 353 |
tasks.append(Task(self.pipelines[wtype]))
|
| 354 |
|
| 355 |
+
elif wtype == "textractor":
|
| 356 |
+
self.pipelines[wtype] = Textractor(**self.components["textract"])
|
| 357 |
+
tasks.append(UrlTask(self.pipelines[wtype]))
|
| 358 |
+
|
| 359 |
elif wtype == "translation":
|
| 360 |
self.pipelines[wtype] = Translation()
|
| 361 |
tasks.append(Task(lambda x: self.pipelines["translation"](x, **self.components["translation"])))
|
|
|
|
| 408 |
data[wtype] = component
|
| 409 |
tasks.append({"action": wtype})
|
| 410 |
|
| 411 |
+
elif wtype == "textractor":
|
| 412 |
+
data[wtype] = component
|
| 413 |
+
tasks.append({"action": wtype, "task": "url"})
|
| 414 |
+
|
| 415 |
elif wtype == "translation":
|
| 416 |
data[wtype] = {}
|
| 417 |
tasks.append({"action": wtype, "args": list(component.values())})
|
|
|
|
| 533 |
st.markdown("---")
|
| 534 |
|
| 535 |
# Component configuration
|
| 536 |
+
labels = {"segmentation": "segment", "textractor": "textract", "translation": "translate"}
|
| 537 |
+
components = ["embeddings", "segmentation", "service", "summary", "tabular", "textractor", "translation"]
|
| 538 |
|
| 539 |
selected, workflow = self.load(components)
|
| 540 |
selected = st.multiselect("Select components", components, default=selected, format_func=lambda text: labels.get(text, text))
|