Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- .gitattributes +9 -0
- data/datasets/lilac/wikitext-2-raw-v1/config.yml +32 -0
- data/datasets/lilac/wikitext-2-raw-v1/data-00000-of-00001.parquet +3 -0
- data/datasets/lilac/wikitext-2-raw-v1/manifest.json +18 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/hnsw.hnswlib.bin +3 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/hnsw.lookup.pkl +3 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/signal_manifest.json +35 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/spans.pkl +3 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/lang_detection/data-00000-of-00001.parquet +3 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/lang_detection/signal_manifest.json +31 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/near_dup/data-00000-of-00001.parquet +3 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/near_dup/signal_manifest.json +36 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/pii/data-00000-of-00001.parquet +3 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/pii/signal_manifest.json +45 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/spacy_ner/data-00000-of-00001.parquet +3 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/spacy_ner/signal_manifest.json +38 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/text_statistics/data-00000-of-00001.parquet +3 -0
- data/datasets/lilac/wikitext-2-raw-v1/text/text_statistics/signal_manifest.json +59 -0
.gitattributes
CHANGED
|
@@ -76,3 +76,12 @@ data/datasets/lilac/imdb/text/near_dup/data-00000-of-00001.parquet filter=lfs di
|
|
| 76 |
data/datasets/lilac/imdb/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
| 77 |
data/datasets/lilac/imdb/text/spacy_ner/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
| 78 |
data/datasets/lilac/imdb/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
data/datasets/lilac/imdb/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
| 77 |
data/datasets/lilac/imdb/text/spacy_ner/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
| 78 |
data/datasets/lilac/imdb/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
data/datasets/lilac/wikitext-2-raw-v1/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/hnsw.hnswlib.bin filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/hnsw.lookup.pkl filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/spans.pkl filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
data/datasets/lilac/wikitext-2-raw-v1/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
data/datasets/lilac/wikitext-2-raw-v1/text/near_dup/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
data/datasets/lilac/wikitext-2-raw-v1/text/pii/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
data/datasets/lilac/wikitext-2-raw-v1/text/spacy_ner/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
data/datasets/lilac/wikitext-2-raw-v1/text/text_statistics/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
data/datasets/lilac/wikitext-2-raw-v1/config.yml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
embeddings:
|
| 2 |
+
- embedding: gte-small
|
| 3 |
+
path: text
|
| 4 |
+
name: wikitext-2-raw-v1
|
| 5 |
+
namespace: local
|
| 6 |
+
settings:
|
| 7 |
+
preferred_embedding: gte-small
|
| 8 |
+
ui:
|
| 9 |
+
media_paths:
|
| 10 |
+
- text
|
| 11 |
+
signals:
|
| 12 |
+
- path: text
|
| 13 |
+
signal:
|
| 14 |
+
signal_name: pii
|
| 15 |
+
- path: text
|
| 16 |
+
signal:
|
| 17 |
+
signal_name: near_dup
|
| 18 |
+
- path: text
|
| 19 |
+
signal:
|
| 20 |
+
signal_name: lang_detection
|
| 21 |
+
- path: text
|
| 22 |
+
signal:
|
| 23 |
+
signal_name: text_statistics
|
| 24 |
+
- path: text
|
| 25 |
+
signal:
|
| 26 |
+
signal_name: spacy_ner
|
| 27 |
+
source:
|
| 28 |
+
config_name: wikitext-2-raw-v1
|
| 29 |
+
dataset_name: wikitext
|
| 30 |
+
source_name: huggingface
|
| 31 |
+
tags:
|
| 32 |
+
- machine-learning
|
data/datasets/lilac/wikitext-2-raw-v1/data-00000-of-00001.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d2722d7f4c6b75fdd39f4477d67ed046ab1da08f1fa6d757109d8df77a1a4c2
|
| 3 |
+
size 9171984
|
data/datasets/lilac/wikitext-2-raw-v1/manifest.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"files": [
|
| 3 |
+
"data-00000-of-00001.parquet"
|
| 4 |
+
],
|
| 5 |
+
"data_schema": {
|
| 6 |
+
"fields": {
|
| 7 |
+
"text": {
|
| 8 |
+
"dtype": "string"
|
| 9 |
+
},
|
| 10 |
+
"__hfsplit__": {
|
| 11 |
+
"dtype": "string"
|
| 12 |
+
},
|
| 13 |
+
"__rowid__": {
|
| 14 |
+
"dtype": "string"
|
| 15 |
+
}
|
| 16 |
+
}
|
| 17 |
+
}
|
| 18 |
+
}
|
data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/hnsw.hnswlib.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9c5174ef8444ec37b0cc39acb01225d77ef8e131d6af4501ac85e904d80a9b2
|
| 3 |
+
size 91233592
|
data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/hnsw.lookup.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6de9264ae1f50b8dd78b4fec2601cd88625cfe253eaab24542890a870ecd900
|
| 3 |
+
size 1848900
|
data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/signal_manifest.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"files": [],
|
| 3 |
+
"parquet_id": "gte-small(text)",
|
| 4 |
+
"data_schema": {
|
| 5 |
+
"fields": {
|
| 6 |
+
"__rowid__": {
|
| 7 |
+
"dtype": "string"
|
| 8 |
+
},
|
| 9 |
+
"text": {
|
| 10 |
+
"fields": {
|
| 11 |
+
"gte-small": {
|
| 12 |
+
"repeated_field": {
|
| 13 |
+
"fields": {
|
| 14 |
+
"embedding": {
|
| 15 |
+
"dtype": "embedding"
|
| 16 |
+
}
|
| 17 |
+
},
|
| 18 |
+
"dtype": "string_span"
|
| 19 |
+
},
|
| 20 |
+
"signal": {
|
| 21 |
+
"signal_name": "gte-small"
|
| 22 |
+
}
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
},
|
| 28 |
+
"signal": {
|
| 29 |
+
"signal_name": "gte-small"
|
| 30 |
+
},
|
| 31 |
+
"enriched_path": [
|
| 32 |
+
"text"
|
| 33 |
+
],
|
| 34 |
+
"vector_store": "hnsw"
|
| 35 |
+
}
|
data/datasets/lilac/wikitext-2-raw-v1/text/gte-small/spans.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:064d18ca7f0f3b021ebfa93323bcbe9f50271da2ea2234393581a8113322683a
|
| 3 |
+
size 1629720
|
data/datasets/lilac/wikitext-2-raw-v1/text/lang_detection/data-00000-of-00001.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68e4d6290a46a7dac484299be15b8536324cb677ced9721934bd150a277ddaf9
|
| 3 |
+
size 1532348
|
data/datasets/lilac/wikitext-2-raw-v1/text/lang_detection/signal_manifest.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"files": [
|
| 3 |
+
"data-00000-of-00001.parquet"
|
| 4 |
+
],
|
| 5 |
+
"parquet_id": "lang_detection(text)",
|
| 6 |
+
"data_schema": {
|
| 7 |
+
"fields": {
|
| 8 |
+
"__rowid__": {
|
| 9 |
+
"dtype": "string"
|
| 10 |
+
},
|
| 11 |
+
"text": {
|
| 12 |
+
"fields": {
|
| 13 |
+
"lang_detection": {
|
| 14 |
+
"dtype": "string",
|
| 15 |
+
"signal": {
|
| 16 |
+
"split_by_paragraph": false,
|
| 17 |
+
"signal_name": "lang_detection"
|
| 18 |
+
}
|
| 19 |
+
}
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
"signal": {
|
| 25 |
+
"split_by_paragraph": false,
|
| 26 |
+
"signal_name": "lang_detection"
|
| 27 |
+
},
|
| 28 |
+
"enriched_path": [
|
| 29 |
+
"text"
|
| 30 |
+
]
|
| 31 |
+
}
|
data/datasets/lilac/wikitext-2-raw-v1/text/near_dup/data-00000-of-00001.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f316a8a3f031470cf9fa99475b81a04e962f2da7704d61de8a44089bbf7e59b
|
| 3 |
+
size 1703637
|
data/datasets/lilac/wikitext-2-raw-v1/text/near_dup/signal_manifest.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"files": [
|
| 3 |
+
"data-00000-of-00001.parquet"
|
| 4 |
+
],
|
| 5 |
+
"parquet_id": "near_dup(text)",
|
| 6 |
+
"data_schema": {
|
| 7 |
+
"fields": {
|
| 8 |
+
"__rowid__": {
|
| 9 |
+
"dtype": "string"
|
| 10 |
+
},
|
| 11 |
+
"text": {
|
| 12 |
+
"fields": {
|
| 13 |
+
"near_dup": {
|
| 14 |
+
"fields": {
|
| 15 |
+
"cluster_id": {
|
| 16 |
+
"dtype": "uint32",
|
| 17 |
+
"categorical": true
|
| 18 |
+
}
|
| 19 |
+
},
|
| 20 |
+
"signal": {
|
| 21 |
+
"threshold": 0.85,
|
| 22 |
+
"signal_name": "near_dup"
|
| 23 |
+
}
|
| 24 |
+
}
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
},
|
| 29 |
+
"signal": {
|
| 30 |
+
"threshold": 0.85,
|
| 31 |
+
"signal_name": "near_dup"
|
| 32 |
+
},
|
| 33 |
+
"enriched_path": [
|
| 34 |
+
"text"
|
| 35 |
+
]
|
| 36 |
+
}
|
data/datasets/lilac/wikitext-2-raw-v1/text/pii/data-00000-of-00001.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb257c0e65788f446b23d8f389db299c51a896baf0dcbd4d37114b625868f0ad
|
| 3 |
+
size 1517126
|
data/datasets/lilac/wikitext-2-raw-v1/text/pii/signal_manifest.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"files": [
|
| 3 |
+
"data-00000-of-00001.parquet"
|
| 4 |
+
],
|
| 5 |
+
"parquet_id": "pii(text)",
|
| 6 |
+
"data_schema": {
|
| 7 |
+
"fields": {
|
| 8 |
+
"__rowid__": {
|
| 9 |
+
"dtype": "string"
|
| 10 |
+
},
|
| 11 |
+
"text": {
|
| 12 |
+
"fields": {
|
| 13 |
+
"pii": {
|
| 14 |
+
"fields": {
|
| 15 |
+
"emails": {
|
| 16 |
+
"repeated_field": {
|
| 17 |
+
"dtype": "string_span"
|
| 18 |
+
}
|
| 19 |
+
},
|
| 20 |
+
"ip_addresses": {
|
| 21 |
+
"repeated_field": {
|
| 22 |
+
"dtype": "string_span"
|
| 23 |
+
}
|
| 24 |
+
},
|
| 25 |
+
"secrets": {
|
| 26 |
+
"repeated_field": {
|
| 27 |
+
"dtype": "string_span"
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"signal": {
|
| 32 |
+
"signal_name": "pii"
|
| 33 |
+
}
|
| 34 |
+
}
|
| 35 |
+
}
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
},
|
| 39 |
+
"signal": {
|
| 40 |
+
"signal_name": "pii"
|
| 41 |
+
},
|
| 42 |
+
"enriched_path": [
|
| 43 |
+
"text"
|
| 44 |
+
]
|
| 45 |
+
}
|
data/datasets/lilac/wikitext-2-raw-v1/text/spacy_ner/data-00000-of-00001.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84a98ab86da1750c73ce707415d10487541b2b3265844bc7d2d0a5a2eefe8d15
|
| 3 |
+
size 2718467
|
data/datasets/lilac/wikitext-2-raw-v1/text/spacy_ner/signal_manifest.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"files": [
|
| 3 |
+
"data-00000-of-00001.parquet"
|
| 4 |
+
],
|
| 5 |
+
"parquet_id": "spacy_ner(text)",
|
| 6 |
+
"data_schema": {
|
| 7 |
+
"fields": {
|
| 8 |
+
"__rowid__": {
|
| 9 |
+
"dtype": "string"
|
| 10 |
+
},
|
| 11 |
+
"text": {
|
| 12 |
+
"fields": {
|
| 13 |
+
"spacy_ner": {
|
| 14 |
+
"repeated_field": {
|
| 15 |
+
"fields": {
|
| 16 |
+
"label": {
|
| 17 |
+
"dtype": "string"
|
| 18 |
+
}
|
| 19 |
+
},
|
| 20 |
+
"dtype": "string_span"
|
| 21 |
+
},
|
| 22 |
+
"signal": {
|
| 23 |
+
"model": "en_core_web_sm",
|
| 24 |
+
"signal_name": "spacy_ner"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
}
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"signal": {
|
| 32 |
+
"model": "en_core_web_sm",
|
| 33 |
+
"signal_name": "spacy_ner"
|
| 34 |
+
},
|
| 35 |
+
"enriched_path": [
|
| 36 |
+
"text"
|
| 37 |
+
]
|
| 38 |
+
}
|
data/datasets/lilac/wikitext-2-raw-v1/text/text_statistics/data-00000-of-00001.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96b6d56208f0c74e7c9bc1d858ebabaae941eaf2c95cdf9207a7bdd1418a59ce
|
| 3 |
+
size 1827010
|
data/datasets/lilac/wikitext-2-raw-v1/text/text_statistics/signal_manifest.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"files": [
|
| 3 |
+
"data-00000-of-00001.parquet"
|
| 4 |
+
],
|
| 5 |
+
"parquet_id": "text_statistics(text)",
|
| 6 |
+
"data_schema": {
|
| 7 |
+
"fields": {
|
| 8 |
+
"__rowid__": {
|
| 9 |
+
"dtype": "string"
|
| 10 |
+
},
|
| 11 |
+
"text": {
|
| 12 |
+
"fields": {
|
| 13 |
+
"text_statistics": {
|
| 14 |
+
"fields": {
|
| 15 |
+
"num_characters": {
|
| 16 |
+
"dtype": "int32"
|
| 17 |
+
},
|
| 18 |
+
"readability": {
|
| 19 |
+
"dtype": "float32"
|
| 20 |
+
},
|
| 21 |
+
"log(type_token_ratio)": {
|
| 22 |
+
"dtype": "float32"
|
| 23 |
+
},
|
| 24 |
+
"frac_non_ascii": {
|
| 25 |
+
"dtype": "float32",
|
| 26 |
+
"bins": [
|
| 27 |
+
[
|
| 28 |
+
"Low",
|
| 29 |
+
null,
|
| 30 |
+
0.15
|
| 31 |
+
],
|
| 32 |
+
[
|
| 33 |
+
"Medium",
|
| 34 |
+
0.15,
|
| 35 |
+
0.3
|
| 36 |
+
],
|
| 37 |
+
[
|
| 38 |
+
"High",
|
| 39 |
+
0.3,
|
| 40 |
+
null
|
| 41 |
+
]
|
| 42 |
+
]
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
"signal": {
|
| 46 |
+
"signal_name": "text_statistics"
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
}
|
| 52 |
+
},
|
| 53 |
+
"signal": {
|
| 54 |
+
"signal_name": "text_statistics"
|
| 55 |
+
},
|
| 56 |
+
"enriched_path": [
|
| 57 |
+
"text"
|
| 58 |
+
]
|
| 59 |
+
}
|