Spaces:
Runtime error
Runtime error
Commit
·
89b186a
1
Parent(s):
bd8327d
Remove subtoken indicators ('##') in token attribution
Browse files
app.py
CHANGED
|
@@ -62,11 +62,17 @@ toxicity_pipeline, cls_explainer = load_pipeline()
|
|
| 62 |
# Auxiliary functions
|
| 63 |
def format_explainer_html(html_string):
|
| 64 |
"""Extract tokens with attribution-based background color."""
|
|
|
|
| 65 |
soup = BeautifulSoup(html_string, 'html.parser')
|
| 66 |
p = soup.new_tag('p')
|
|
|
|
| 67 |
# Select token elements and remove model specific tokens
|
| 68 |
for token in soup.find_all('td')[-1].find_all('mark')[1:-1]:
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
return p.prettify()
|
| 71 |
|
| 72 |
|
|
|
|
| 62 |
# Auxiliary functions
|
| 63 |
def format_explainer_html(html_string):
|
| 64 |
"""Extract tokens with attribution-based background color."""
|
| 65 |
+
inside_token_prefix = '##'
|
| 66 |
soup = BeautifulSoup(html_string, 'html.parser')
|
| 67 |
p = soup.new_tag('p')
|
| 68 |
+
p.append(soup.new_tag('font', attrs={'color': 'black'}))
|
| 69 |
# Select token elements and remove model specific tokens
|
| 70 |
for token in soup.find_all('td')[-1].find_all('mark')[1:-1]:
|
| 71 |
+
text = token.font.text.strip()
|
| 72 |
+
if text.startswith(inside_token_prefix):
|
| 73 |
+
text = text[len(inside_token_prefix):]
|
| 74 |
+
token.string = text
|
| 75 |
+
p.font.append(token)
|
| 76 |
return p.prettify()
|
| 77 |
|
| 78 |
|