Spaces:
Runtime error
Runtime error
Commit ·
79ca28d
1
Parent(s): cce51a6
edits
Browse files
app.py
CHANGED
|
@@ -144,9 +144,21 @@ def scisearch(query, language, num_results=10):
|
|
| 144 |
|
| 145 |
return results, highlight_terms
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
def highlight_string(paragraph: str, highlight_terms: list) -> str:
|
| 148 |
for term in highlight_terms:
|
| 149 |
-
paragraph = re.sub(f"\\b{term}\\b", f"<
|
|
|
|
| 150 |
return paragraph
|
| 151 |
|
| 152 |
def process_results(hits: list, highlight_terms: list) -> str:
|
|
@@ -154,7 +166,7 @@ def process_results(hits: list, highlight_terms: list) -> str:
|
|
| 154 |
for i, hit in enumerate(hits):
|
| 155 |
res_head = f"""
|
| 156 |
<div class="searchresult">
|
| 157 |
-
<h2>{i}. Document ID: {hit['docid']}</h2>
|
| 158 |
<p>Language: <string>{hit['lang']}</string>, Score: {round(hit['score'], 2)}</p>
|
| 159 |
"""
|
| 160 |
for subhit in hit['meta']['docs']:
|
|
@@ -221,8 +233,8 @@ if st.sidebar.button("Search"):
|
|
| 221 |
}
|
| 222 |
|
| 223 |
.searchresult h2 {
|
| 224 |
-
font-size:
|
| 225 |
-
line-height:
|
| 226 |
font-weight: normal;
|
| 227 |
color: rgb(7, 111, 222);
|
| 228 |
margin-bottom: 0px;
|
|
|
|
| 144 |
|
| 145 |
return results, highlight_terms
|
| 146 |
|
| 147 |
+
PII_TAGS = {"KEY", "EMAIL", "USER", "IP_ADDRESS", "ID", "IPv4", "IPv6"}
|
| 148 |
+
PII_PREFIX = "PI:"
|
| 149 |
+
|
| 150 |
+
def process_pii(text):
|
| 151 |
+
for tag in PII_TAGS:
|
| 152 |
+
text = text.replace(
|
| 153 |
+
PII_PREFIX + tag,
|
| 154 |
+
"""<b><mark style="background: Fuchsia; color: Lime;">REDACTED {}</mark></b>""".format(tag),
|
| 155 |
+
)
|
| 156 |
+
return text
|
| 157 |
+
|
| 158 |
def highlight_string(paragraph: str, highlight_terms: list) -> str:
|
| 159 |
for term in highlight_terms:
|
| 160 |
+
paragraph = re.sub(f"\\b{term}\\b", f"<b>{term}</b>", paragraph, flags=re.I)
|
| 161 |
+
paragraph = process_pii(paragraph)
|
| 162 |
return paragraph
|
| 163 |
|
| 164 |
def process_results(hits: list, highlight_terms: list) -> str:
|
|
|
|
| 166 |
for i, hit in enumerate(hits):
|
| 167 |
res_head = f"""
|
| 168 |
<div class="searchresult">
|
| 169 |
+
<h2>{i+1}. Document ID: {hit['docid']}</h2>
|
| 170 |
<p>Language: <string>{hit['lang']}</string>, Score: {round(hit['score'], 2)}</p>
|
| 171 |
"""
|
| 172 |
for subhit in hit['meta']['docs']:
|
|
|
|
| 233 |
}
|
| 234 |
|
| 235 |
.searchresult h2 {
|
| 236 |
+
font-size: 19px;
|
| 237 |
+
line-height: 18px;
|
| 238 |
font-weight: normal;
|
| 239 |
color: rgb(7, 111, 222);
|
| 240 |
margin-bottom: 0px;
|