Spaces:
Runtime error
Runtime error
Commit
·
0242b2e
1
Parent(s):
0c1f2c6
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,18 +2,91 @@ import time
|
|
| 2 |
import streamlit as st
|
| 3 |
import string
|
| 4 |
from io import StringIO
|
|
|
|
| 5 |
import json
|
| 6 |
-
from
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
|
|
|
| 9 |
|
| 10 |
model_names = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
{ "name":"SGPT-125M",
|
| 12 |
"model":"Muennighoff/SGPT-125M-weightedmean-nli-bitfit",
|
| 13 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
"class":"SGPTModel"},
|
| 15 |
-
|
| 16 |
-
|
| 17 |
{ "name":"SGPT-5.8B",
|
| 18 |
"model": "Muennighoff/SGPT-5.8B-weightedmean-msmarco-specb-bitfit" ,
|
| 19 |
"fork_url":"https://github.com/taskswithcode/sgpt",
|
|
@@ -27,28 +100,39 @@ model_names = [
|
|
| 27 |
"mark":True,
|
| 28 |
"class":"SGPTModel"},
|
| 29 |
|
| 30 |
-
{ "name":"
|
| 31 |
-
"model":
|
| 32 |
-
"
|
| 33 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
{
|
| 36 |
-
"model":"
|
| 37 |
-
"fork_url":"https://github.com/taskswithcode/
|
| 38 |
-
"orig_author_url":"https://github.com/
|
| 39 |
-
"orig_author":"
|
| 40 |
"sota_info": {
|
| 41 |
-
"task":"
|
| 42 |
-
"sota_link":"https://
|
| 43 |
},
|
| 44 |
-
"paper_url":"https://arxiv.org/abs/
|
| 45 |
"mark":True,
|
| 46 |
-
"class":"
|
|
|
|
| 47 |
|
| 48 |
]
|
| 49 |
|
| 50 |
|
| 51 |
|
|
|
|
|
|
|
| 52 |
example_file_names = {
|
| 53 |
"Machine learning terms (30+ phrases)": "tests/small_test.txt",
|
| 54 |
"Customer feedback mixed with noise (50+ sentences)":"tests/larger_test.txt"
|
|
@@ -61,15 +145,17 @@ def construct_model_info_for_display():
|
|
| 61 |
for node in model_names:
|
| 62 |
options_arr .append(node["name"])
|
| 63 |
if (node["mark"] == True):
|
| 64 |
-
markdown_str += f"<div style=\"font-size:16px; color: #5f5f5f; text-align: left\"> • Model: <a href=\'{node['paper_url']}\' target='_blank'>{node['name']}</a><br/> Code released by: <a href=\'{node['orig_author_url']}\' target='_blank'>{node['orig_author']}</a><br/> Model info: <a href=\'{node['sota_info']['sota_link']}\' target='_blank'>{node['sota_info']['task']}</a><br
|
| 65 |
markdown_str += "<div style=\"font-size:12px; color: #9f9f9f; text-align: left\"><b>Note:</b><br/>• Uploaded files are loaded into non-persistent memory for the duration of the computation. They are not saved</div>"
|
| 66 |
limit = "{:,}".format(MAX_INPUT)
|
| 67 |
markdown_str += f"<div style=\"font-size:12px; color: #9f9f9f; text-align: left\">• User uploaded file has a maximum limit of {limit} sentences.</div>"
|
| 68 |
return options_arr,markdown_str
|
| 69 |
|
| 70 |
|
| 71 |
-
st.set_page_config(page_title='TWC - Compare state-of-the-art models for Sentence Similarity task', page_icon="logo.jpg", layout='centered', initial_sidebar_state='auto',
|
| 72 |
menu_items={
|
|
|
|
|
|
|
| 73 |
'About': 'This app was created by taskswithcode. http://taskswithcode.com'
|
| 74 |
})
|
| 75 |
col,pad = st.columns([85,15])
|
|
@@ -153,7 +239,7 @@ def init_session():
|
|
| 153 |
|
| 154 |
def main():
|
| 155 |
init_session()
|
| 156 |
-
st.markdown("<
|
| 157 |
|
| 158 |
|
| 159 |
try:
|
|
|
|
| 2 |
import streamlit as st
|
| 3 |
import string
|
| 4 |
from io import StringIO
|
| 5 |
+
import pdb
|
| 6 |
import json
|
| 7 |
+
from twc_embeddings import HFModel,SimCSEModel,SGPTModel
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
MAX_INPUT = 10000
|
| 11 |
|
| 12 |
+
|
| 13 |
+
from transformers import BertTokenizer, BertForMaskedLM
|
| 14 |
|
| 15 |
model_names = [
|
| 16 |
+
|
| 17 |
+
{ "name":"sentence-transformers/all-MiniLM-L6-v2",
|
| 18 |
+
"model":"sentence-transformers/all-MiniLM-L6-v2",
|
| 19 |
+
"fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
|
| 20 |
+
"orig_author_url":"https://github.com/UKPLab",
|
| 21 |
+
"orig_author":"Ubiquitous Knowledge Processing Lab",
|
| 22 |
+
"sota_info": {
|
| 23 |
+
"task":"Over 3.8 million downloads from huggingface",
|
| 24 |
+
"sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
|
| 25 |
+
},
|
| 26 |
+
"paper_url":"https://arxiv.org/abs/1908.10084",
|
| 27 |
+
"mark":True,
|
| 28 |
+
"class":"HFModel"},
|
| 29 |
+
{ "name":"sentence-transformers/paraphrase-MiniLM-L6-v2",
|
| 30 |
+
"model":"sentence-transformers/paraphrase-MiniLM-L6-v2",
|
| 31 |
+
"fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
|
| 32 |
+
"orig_author_url":"https://github.com/UKPLab",
|
| 33 |
+
"orig_author":"Ubiquitous Knowledge Processing Lab",
|
| 34 |
+
"sota_info": {
|
| 35 |
+
"task":"Over 2.4 million downloads from huggingface",
|
| 36 |
+
"sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
|
| 37 |
+
},
|
| 38 |
+
"paper_url":"https://arxiv.org/abs/1908.10084",
|
| 39 |
+
"mark":True,
|
| 40 |
+
"class":"HFModel"},
|
| 41 |
+
{ "name":"sentence-transformers/bert-base-nli-mean-tokens",
|
| 42 |
+
"model":"sentence-transformers/bert-base-nli-mean-tokens",
|
| 43 |
+
"fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
|
| 44 |
+
"orig_author_url":"https://github.com/UKPLab",
|
| 45 |
+
"orig_author":"Ubiquitous Knowledge Processing Lab",
|
| 46 |
+
"sota_info": {
|
| 47 |
+
"task":"Over 700,000 downloads from huggingface",
|
| 48 |
+
"sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
|
| 49 |
+
},
|
| 50 |
+
"paper_url":"https://arxiv.org/abs/1908.10084",
|
| 51 |
+
"mark":True,
|
| 52 |
+
"class":"HFModel"},
|
| 53 |
+
{ "name":"sentence-transformers/all-mpnet-base-v2",
|
| 54 |
+
"model":"sentence-transformers/all-mpnet-base-v2",
|
| 55 |
+
"fork_url":"https://github.com/taskswithcode/sentence_similarity_hf_model",
|
| 56 |
+
"orig_author_url":"https://github.com/UKPLab",
|
| 57 |
+
"orig_author":"Ubiquitous Knowledge Processing Lab",
|
| 58 |
+
"sota_info": {
|
| 59 |
+
"task":"Over 500,000 downloads from huggingface",
|
| 60 |
+
"sota_link":"https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2"
|
| 61 |
+
},
|
| 62 |
+
"paper_url":"https://arxiv.org/abs/1908.10084",
|
| 63 |
+
"mark":True,
|
| 64 |
+
"class":"HFModel"},
|
| 65 |
+
|
| 66 |
{ "name":"SGPT-125M",
|
| 67 |
"model":"Muennighoff/SGPT-125M-weightedmean-nli-bitfit",
|
| 68 |
+
"fork_url":"https://github.com/taskswithcode/sgpt",
|
| 69 |
+
"orig_author_url":"https://github.com/Muennighoff",
|
| 70 |
+
"orig_author":"Niklas Muennighoff",
|
| 71 |
+
"sota_info": {
|
| 72 |
+
"task":"#1 in multiple information retrieval & search tasks(smaller variant)",
|
| 73 |
+
"sota_link":"https://paperswithcode.com/paper/sgpt-gpt-sentence-embeddings-for-semantic",
|
| 74 |
+
},
|
| 75 |
+
"paper_url":"https://arxiv.org/abs/2202.08904v5",
|
| 76 |
+
"mark":True,
|
| 77 |
+
"class":"SGPTModel"},
|
| 78 |
+
{ "name":"SGPT-1.3B",
|
| 79 |
+
"model": "Muennighoff/SGPT-1.3B-weightedmean-msmarco-specb-bitfit",
|
| 80 |
+
"fork_url":"https://github.com/taskswithcode/sgpt",
|
| 81 |
+
"orig_author_url":"https://github.com/Muennighoff",
|
| 82 |
+
"orig_author":"Niklas Muennighoff",
|
| 83 |
+
"sota_info": {
|
| 84 |
+
"task":"#1 in multiple information retrieval & search tasks(smaller variant)",
|
| 85 |
+
"sota_link":"https://paperswithcode.com/paper/sgpt-gpt-sentence-embeddings-for-semantic",
|
| 86 |
+
},
|
| 87 |
+
"paper_url":"https://arxiv.org/abs/2202.08904v5",
|
| 88 |
+
"mark":True,
|
| 89 |
"class":"SGPTModel"},
|
|
|
|
|
|
|
| 90 |
{ "name":"SGPT-5.8B",
|
| 91 |
"model": "Muennighoff/SGPT-5.8B-weightedmean-msmarco-specb-bitfit" ,
|
| 92 |
"fork_url":"https://github.com/taskswithcode/sgpt",
|
|
|
|
| 100 |
"mark":True,
|
| 101 |
"class":"SGPTModel"},
|
| 102 |
|
| 103 |
+
{ "name":"SIMCSE-large" ,
|
| 104 |
+
"model":"princeton-nlp/sup-simcse-roberta-large",
|
| 105 |
+
"fork_url":"https://github.com/taskswithcode/SimCSE",
|
| 106 |
+
"orig_author_url":"https://github.com/princeton-nlp",
|
| 107 |
+
"orig_author":"Princeton Natural Language Processing",
|
| 108 |
+
"sota_info": {
|
| 109 |
+
"task":"Within top 10 in multiple semantic textual similarity tasks",
|
| 110 |
+
"sota_link":"https://paperswithcode.com/paper/simcse-simple-contrastive-learning-of"
|
| 111 |
+
},
|
| 112 |
+
"paper_url":"https://arxiv.org/abs/2104.08821v4",
|
| 113 |
+
"mark":True,
|
| 114 |
+
"class":"SimCSEModel","sota_link":"https://paperswithcode.com/sota/semantic-textual-similarity-on-sick"},
|
| 115 |
|
| 116 |
+
{ "name":"SIMCSE-base" ,
|
| 117 |
+
"model":"princeton-nlp/sup-simcse-roberta-base",
|
| 118 |
+
"fork_url":"https://github.com/taskswithcode/SimCSE",
|
| 119 |
+
"orig_author_url":"https://github.com/princeton-nlp",
|
| 120 |
+
"orig_author":"Princeton Natural Language Processing",
|
| 121 |
"sota_info": {
|
| 122 |
+
"task":"Within top 10 in multiple semantic textual similarity tasks(smaller variant)",
|
| 123 |
+
"sota_link":"https://paperswithcode.com/paper/simcse-simple-contrastive-learning-of"
|
| 124 |
},
|
| 125 |
+
"paper_url":"https://arxiv.org/abs/2104.08821v4",
|
| 126 |
"mark":True,
|
| 127 |
+
"class":"SimCSEModel","sota_link":"https://paperswithcode.com/sota/semantic-textual-similarity-on-sick"},
|
| 128 |
+
|
| 129 |
|
| 130 |
]
|
| 131 |
|
| 132 |
|
| 133 |
|
| 134 |
+
|
| 135 |
+
|
| 136 |
example_file_names = {
|
| 137 |
"Machine learning terms (30+ phrases)": "tests/small_test.txt",
|
| 138 |
"Customer feedback mixed with noise (50+ sentences)":"tests/larger_test.txt"
|
|
|
|
| 145 |
for node in model_names:
|
| 146 |
options_arr .append(node["name"])
|
| 147 |
if (node["mark"] == True):
|
| 148 |
+
markdown_str += f"<div style=\"font-size:16px; color: #5f5f5f; text-align: left\"> • Model: <a href=\'{node['paper_url']}\' target='_blank'>{node['name']}</a><br/> Code released by: <a href=\'{node['orig_author_url']}\' target='_blank'>{node['orig_author']}</a><br/> Model info: <a href=\'{node['sota_info']['sota_link']}\' target='_blank'>{node['sota_info']['task']}</a><br/><br/></div>"
|
| 149 |
markdown_str += "<div style=\"font-size:12px; color: #9f9f9f; text-align: left\"><b>Note:</b><br/>• Uploaded files are loaded into non-persistent memory for the duration of the computation. They are not saved</div>"
|
| 150 |
limit = "{:,}".format(MAX_INPUT)
|
| 151 |
markdown_str += f"<div style=\"font-size:12px; color: #9f9f9f; text-align: left\">• User uploaded file has a maximum limit of {limit} sentences.</div>"
|
| 152 |
return options_arr,markdown_str
|
| 153 |
|
| 154 |
|
| 155 |
+
st.set_page_config(page_title='TWC - Compare popular/state-of-the-art models for Sentence Similarity task', page_icon="logo.jpg", layout='centered', initial_sidebar_state='auto',
|
| 156 |
menu_items={
|
| 157 |
+
'Get help': "mailto:taskswithcode@gmail.com",
|
| 158 |
+
'Report a Bug': "mailto:taskswithcode@gmail.com",
|
| 159 |
'About': 'This app was created by taskswithcode. http://taskswithcode.com'
|
| 160 |
})
|
| 161 |
col,pad = st.columns([85,15])
|
|
|
|
| 239 |
|
| 240 |
def main():
|
| 241 |
init_session()
|
| 242 |
+
st.markdown("<h5 style='text-align: center;'>Compare popular/state-of-the-art models for Sentence Similarity task</h5>", unsafe_allow_html=True)
|
| 243 |
|
| 244 |
|
| 245 |
try:
|