Spaces:
Runtime error
Runtime error
Commit
·
c80f1e3
1
Parent(s):
89b186a
Remove padding in words in the token attribution
Browse files
app.py
CHANGED
|
@@ -64,16 +64,35 @@ def format_explainer_html(html_string):
|
|
| 64 |
"""Extract tokens with attribution-based background color."""
|
| 65 |
inside_token_prefix = '##'
|
| 66 |
soup = BeautifulSoup(html_string, 'html.parser')
|
| 67 |
-
p = soup.new_tag('p'
|
| 68 |
-
|
| 69 |
# Select token elements and remove model specific tokens
|
|
|
|
| 70 |
for token in soup.find_all('td')[-1].find_all('mark')[1:-1]:
|
| 71 |
text = token.font.text.strip()
|
| 72 |
if text.startswith(inside_token_prefix):
|
| 73 |
text = text[len(inside_token_prefix):]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
token.string = text
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
def classify_comment(comment):
|
|
|
|
| 64 |
"""Extract tokens with attribution-based background color."""
|
| 65 |
inside_token_prefix = '##'
|
| 66 |
soup = BeautifulSoup(html_string, 'html.parser')
|
| 67 |
+
p = soup.new_tag('p',
|
| 68 |
+
attrs={'style': 'color: black; background-color: white;'})
|
| 69 |
# Select token elements and remove model specific tokens
|
| 70 |
+
current_word = None
|
| 71 |
for token in soup.find_all('td')[-1].find_all('mark')[1:-1]:
|
| 72 |
text = token.font.text.strip()
|
| 73 |
if text.startswith(inside_token_prefix):
|
| 74 |
text = text[len(inside_token_prefix):]
|
| 75 |
+
else:
|
| 76 |
+
# Create a new span for each word (sequence of sub-tokens)
|
| 77 |
+
if current_word is not None:
|
| 78 |
+
p.append(current_word)
|
| 79 |
+
p.append(' ')
|
| 80 |
+
current_word = soup.new_tag('span')
|
| 81 |
+
token.attrs['style'] = f"{token.attrs['style']}; padding: 0.2em 0em;"
|
| 82 |
token.string = text
|
| 83 |
+
current_word.append(token)
|
| 84 |
+
|
| 85 |
+
# Add last word
|
| 86 |
+
p.append(current_word)
|
| 87 |
+
|
| 88 |
+
# Add left and right-padding to each word
|
| 89 |
+
for span in p.find_all('span'):
|
| 90 |
+
span.find_all('mark')[0].attrs['style'] = (
|
| 91 |
+
f"{span.find_all('mark')[0].attrs['style']} padding-left: 0.2em;")
|
| 92 |
+
span.find_all('mark')[-1].attrs['style'] = (
|
| 93 |
+
f"{span.find_all('mark')[-1].attrs['style']} padding-right: 0.2em;")
|
| 94 |
+
|
| 95 |
+
return p
|
| 96 |
|
| 97 |
|
| 98 |
def classify_comment(comment):
|