Upload 10 files
Browse files- README.md +457 -0
- config.json +27 -0
- config_sentence_transformers.json +14 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +7 -0
- tokenizer.json +0 -0
- tokenizer_config.json +58 -0
- vocab.txt +0 -0
README.md
ADDED
|
@@ -0,0 +1,457 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:92
|
| 9 |
+
- loss:ContrastiveLoss
|
| 10 |
+
base_model: nlpaueb/legal-bert-base-uncased
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: Party B may terminate this Agreement for convenience.
|
| 13 |
+
sentences:
|
| 14 |
+
- Company at its sole discretion may at any time alter or cease providing the Customer
|
| 15 |
+
Service which it has agreed to provide to Client relating to Client Website pursuant
|
| 16 |
+
to this Agreement without any liability to Company.
|
| 17 |
+
- Except for claims arising out of Articles 4.3 and 7.0, or as may be set forth
|
| 18 |
+
in a SOW, neither Party will be liable for any consequential damages, lost profits,
|
| 19 |
+
lost savings, loss of anticipated revenue, or any exemplary, punitive, special
|
| 20 |
+
or indirect damages, even if advised of their possibility.
|
| 21 |
+
- This Agreement may only be terminated by mutual written agreement of both Parties,
|
| 22 |
+
with each party providing thirty (30) days' prior written notice to the other,
|
| 23 |
+
or upon material breach by either party, provided that the breaching party fails
|
| 24 |
+
to cure such breach within thirty (30) days of receiving written notice of the
|
| 25 |
+
breach.
|
| 26 |
+
- source_sentence: Party A shall not compete with Party B.
|
| 27 |
+
sentences:
|
| 28 |
+
- Party A's liability for any and all claims arising under this Agreement, including
|
| 29 |
+
claims under Section 10 or failures to pay under Section 6, shall in no event
|
| 30 |
+
exceed the total fees actually paid by Boxlot to The Globe hereunder during the
|
| 31 |
+
twelve (12) month period preceding the claim.
|
| 32 |
+
- Regardless of the term, Licensee can terminate Agreement with at least one hundred
|
| 33 |
+
eighty (180) days written notice with no further obligation.
|
| 34 |
+
- Throughout the Term, XSPA shall have the explicit right to sell, offer for sale,
|
| 35 |
+
market, or promote any digital meditation or digital sleep products (other than
|
| 36 |
+
the Products) exclusively through online channels in the Territory without Calm's
|
| 37 |
+
consent. Additionally, Calm shall be permitted to sell digital meditation or digital
|
| 38 |
+
sleep products in airport retail locations solely within the metropolitan area
|
| 39 |
+
of City A for a limited period of three (3) months following termination of this
|
| 40 |
+
Agreement, without XSPA's collaboration or prior written consent.
|
| 41 |
+
- source_sentence: Party B may terminate this Agreement for convenience.
|
| 42 |
+
sentences:
|
| 43 |
+
- EXCEPT FOR [**EXCLUDED DAMAGES**], IN NO EVENT SHALL PARTY A'S TOTAL LIABILITY
|
| 44 |
+
TO PARTY B UNDER OR IN CONNECTION WITH THIS AGREEMENT EXCEED THE TOTAL FEES PAID
|
| 45 |
+
BY PARTY B TO PARTY A UNDER THIS AGREEMENT IN THE 12 MONTH PERIOD PRECEDING THE
|
| 46 |
+
CLAIM.
|
| 47 |
+
- Party A's liability for breach of this Agreement, including but not limited to
|
| 48 |
+
Channel Partner's payment obligations, liability for early termination fees or
|
| 49 |
+
charges, breaches of confidentiality by either party, misappropriation of intellectual
|
| 50 |
+
property rights of the other party, and the parties' indemnification obligations
|
| 51 |
+
under this Agreement, is in all cases limited to the total fees paid or payable
|
| 52 |
+
under this Agreement during the 12 months preceding the event giving rise to such
|
| 53 |
+
liability.
|
| 54 |
+
- The term of this Agreement shall commence on the Effective Date and shall continue
|
| 55 |
+
indefinitely unless terminated (a) by mutual written agreement of both Parties,
|
| 56 |
+
with at least 30 days' prior notice to the other Party, or (b) by either Party
|
| 57 |
+
upon material breach by the other Party, provided that the breaching Party fails
|
| 58 |
+
to cure such breach within 30 days of receiving written notice specifying the
|
| 59 |
+
breach. In no event shall either Party terminate this Agreement for convenience
|
| 60 |
+
alone.
|
| 61 |
+
- source_sentence: Party B may terminate this Agreement for convenience.
|
| 62 |
+
sentences:
|
| 63 |
+
- Except as agreed by the Parties in writing or as otherwise stated in the Exhibits,
|
| 64 |
+
neither Party may terminate any Transition Service or Reverse Transition Service
|
| 65 |
+
for convenience. Termination of any Service shall only be permitted (i) upon mutual
|
| 66 |
+
written agreement of both Parties, effective upon 30 days' prior written notice,
|
| 67 |
+
or (ii) for material breach by the other Party following cure periods specified
|
| 68 |
+
herein. With respect to the Services described in Section G1 of Exhibit A, unless
|
| 69 |
+
otherwise indicated therein, those Services may not be terminated independently
|
| 70 |
+
except in accordance with an agreed Migration Plan. Any unamortized costs associated
|
| 71 |
+
with Provider's purchase of any license or other costs incurred specifically for
|
| 72 |
+
the purpose of providing the Services hereunder will be passed through to the
|
| 73 |
+
Terminating Party.
|
| 74 |
+
- Following the Initial Term, either Party may terminate this Agreement without
|
| 75 |
+
cause upon written notice to the other Party of at least 3 months.
|
| 76 |
+
- The provisions of Section 9.1 and Section 9.2 shall fully apply to limit the Licensee's
|
| 77 |
+
indemnification obligations under Section 8.2, even in the case of the Licensee's
|
| 78 |
+
gross negligence or wilful misconduct, and such indemnification shall in all circumstances
|
| 79 |
+
be limited to the total fees paid by Licensee under this Agreement during the
|
| 80 |
+
twelve (12) months preceding the claim.
|
| 81 |
+
- source_sentence: Party A shall not compete with Party B.
|
| 82 |
+
sentences:
|
| 83 |
+
- '["NOTWITHSTANDING ANY OTHER PROVISION OF THIS AGREEMENT TO THE CONTRARY, EXCEPT
|
| 84 |
+
FOR DAMAGES OR CLAIMS ARISING OUT OF (I) A BREACH OF SECTION 13 OF THIS AGREEMENT,
|
| 85 |
+
(II) CUSTOMER LIABILITIES PURSUANT TO, AND SUBJECT TO THE LIMITATIONS SET FORTH
|
| 86 |
+
IN, SECTION 2.5(E), (III) A PARTY''S OR ITS PERSONNEL''S GROSS NEGLIGENCE, FRAUD
|
| 87 |
+
OR WILLFUL MISCONDUCT, (IV) A PARTY''S WILLFUL BREACH OF THIS AGREEMENT, OR (V)
|
| 88 |
+
A PARTY''S INDEMNIFICATION OBLIGATION WITH RESPECT TO THIRD PARTY CLAIMS UNDER
|
| 89 |
+
SECTION 10.1 OR SECTION 10.2, IN NO EVENT SHALL EITHER PARTY BE LIABLE TO THE
|
| 90 |
+
OTHER PARTY OR ANY INDEMNIFIED PARTY HEREUNDER FOR ANY CONSEQUENTIAL DAMAGES,
|
| 91 |
+
SPECIAL DAMAGES, INCIDENTAL OR INDIRECT DAMAGES, LOSS OF REVENUE OR PROFITS, DIMINUTION
|
| 92 |
+
IN VALUE, DAMAGES BASED ON MULTIPLE OF REVENUE OR EARNINGS OR OTHER PERFORMANCE
|
| 93 |
+
METRIC, LOSS OF BUSINESS REPUTATION, PUNITIVE AND EXEMPLARY DAMAGES OR ANY SIMILAR
|
| 94 |
+
DAMAGES ARISING OR RESULTING FROM OR RELATING TO THIS AGREEMENT, WHETHER SUCH
|
| 95 |
+
ACTION IS BASED ON WARRANTY, CONTRACT, TORT (INCLUDING NEGLIGENCE OR STRICT LIABILITY)
|
| 96 |
+
OR OTHERWISE.", ''Except in the event of (i) Third Party Claims subject to a Party\''s
|
| 97 |
+
indemnification obligations pursuant to Section 10.1, (ii) Third Party Claims
|
| 98 |
+
subject to a Party\''s indemnification obligations pursuant to Section 10.2, (iii)
|
| 99 |
+
the gross negligence, fraud or willful misconduct of a Party or its Personnel,
|
| 100 |
+
(iv) a Party\''s willful breach of this Agreement, (v) a breach of Section 13
|
| 101 |
+
or (vi) customer liabilities pursuant to, and subject to the limitations set forth
|
| 102 |
+
in, Section 2.5(e), neither Party\''s aggregate liability to the other Party (or
|
| 103 |
+
its Personnel that are indemnitees under Section 10.1 or Section 10.2, as applicable)
|
| 104 |
+
under this Agreement for the initial twelve (12) month period immediately following
|
| 105 |
+
the Effective Date, and for any twelve (12) month period thereafter during the
|
| 106 |
+
Term, shall exceed, on a cumulative basis, the amount that is one and one half
|
| 107 |
+
(11∕2) times the aggregate amounts paid or payable pursuant to this Agreement
|
| 108 |
+
in the preceding twelve (12) month period preceding the loss date by Customer
|
| 109 |
+
to Manufacturer but solely with respect to the supply hereunder of Product (or
|
| 110 |
+
Products) for which such corresponding liability arose (the "Affected Products")
|
| 111 |
+
and not any other Products (or if, as of the time the liability arises, this Agreement
|
| 112 |
+
has not been in effect for twelve (12) months, then the amounts paid or payable
|
| 113 |
+
by Customer to Manufacturer hereunder during the period from the Effective Date
|
| 114 |
+
until such time the liability arises, shall be annualized to a full twelve (12)
|
| 115 |
+
months but solely with respect to the supply hereunder of the Affected Product(s)
|
| 116 |
+
and not any other Products).'']'
|
| 117 |
+
- '["EXCEPT FOR LIABILITY ARISING FROM BREACHES OF A PARTY''S CONFIDENTIALITY OBLIGATIONS
|
| 118 |
+
CONTAINED IN THE NON-DISCLOSURE CLAUSE IN SECTION 12.17 OF THE CHINA JV OPERATING
|
| 119 |
+
AGREEMENT, BREACHES OF LICENSE GRANTS CONTAINED HEREIN, AND EXCEPT FOR AMOUNTS
|
| 120 |
+
PAYABLE TO THIRD PARTIES TO FULFILL INDEMNITY OBLIGATIONS DESCRIBED IN ARTICLE
|
| 121 |
+
8, (A) IN NO EVENT SHALL ANY PARTY HAVE ANY LIABILITY TO THE OTHERS, OR TO ANY
|
| 122 |
+
PARTY CLAIMING THROUGH OR UNDER THE OTHER, FOR ANY LOST PROFITS, ANY INDIRECT,
|
| 123 |
+
INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES OF ANY KIND IN ANY WAY ARISING OUT
|
| 124 |
+
OF OR RELATED TO THIS AGREEMENT, HOWEVER CAUSED AND UNDER ANY THEORY OF LIABILITY,
|
| 125 |
+
EVEN IF SUCH PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES; AND (B)
|
| 126 |
+
IN NO EVENT SHALL A PARTY''S CUMULATIVE LIABILITY ARISING OUT OF THIS AGREEMENT
|
| 127 |
+
EXCEED THE AMOUNTS ACTUALLY PAID, PAYABLE, RECEIVED OR RECEIVABLE BY SUCH PARTY
|
| 128 |
+
FOR THE PRODUCTS CONCERNED THEREWITH HEREUNDER PURSUANT TO THIS AGREEMENT DURING
|
| 129 |
+
THE TWELVE (12) MONTHS PRIOR TO THE OCCURRENCE OF THE INITIAL EVENT FOR WHICH
|
| 130 |
+
A PARTY RECOVERS DAMAGES HEREUNDER."]'
|
| 131 |
+
- The Distributor shall not act as the agent or the buying agent, for any person,
|
| 132 |
+
for any goods which are competitive with the Product, **except within the geographic
|
| 133 |
+
area of City Y and for a period not exceeding two years from the effective date
|
| 134 |
+
of this Agreement.**
|
| 135 |
+
pipeline_tag: sentence-similarity
|
| 136 |
+
library_name: sentence-transformers
|
| 137 |
+
---
|
| 138 |
+
|
| 139 |
+
# SentenceTransformer based on nlpaueb/legal-bert-base-uncased
|
| 140 |
+
|
| 141 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [nlpaueb/legal-bert-base-uncased](https://huggingface.co/nlpaueb/legal-bert-base-uncased). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 142 |
+
|
| 143 |
+
## Model Details
|
| 144 |
+
|
| 145 |
+
### Model Description
|
| 146 |
+
- **Model Type:** Sentence Transformer
|
| 147 |
+
- **Base model:** [nlpaueb/legal-bert-base-uncased](https://huggingface.co/nlpaueb/legal-bert-base-uncased) <!-- at revision 15b570cbf88259610b082a167dacc190124f60f6 -->
|
| 148 |
+
- **Maximum Sequence Length:** 512 tokens
|
| 149 |
+
- **Output Dimensionality:** 768 dimensions
|
| 150 |
+
- **Similarity Function:** Cosine Similarity
|
| 151 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 152 |
+
<!-- - **Language:** Unknown -->
|
| 153 |
+
<!-- - **License:** Unknown -->
|
| 154 |
+
|
| 155 |
+
### Model Sources
|
| 156 |
+
|
| 157 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 158 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
|
| 159 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 160 |
+
|
| 161 |
+
### Full Model Architecture
|
| 162 |
+
|
| 163 |
+
```
|
| 164 |
+
SentenceTransformer(
|
| 165 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'BertModel'})
|
| 166 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 167 |
+
)
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
## Usage
|
| 171 |
+
|
| 172 |
+
### Direct Usage (Sentence Transformers)
|
| 173 |
+
|
| 174 |
+
First install the Sentence Transformers library:
|
| 175 |
+
|
| 176 |
+
```bash
|
| 177 |
+
pip install -U sentence-transformers
|
| 178 |
+
```
|
| 179 |
+
|
| 180 |
+
Then you can load this model and run inference.
|
| 181 |
+
```python
|
| 182 |
+
from sentence_transformers import SentenceTransformer
|
| 183 |
+
|
| 184 |
+
# Download from the 🤗 Hub
|
| 185 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
| 186 |
+
# Run inference
|
| 187 |
+
sentences = [
|
| 188 |
+
'Party A shall not compete with Party B.',
|
| 189 |
+
'The Distributor shall not act as the agent or the buying agent, for any person, for any goods which are competitive with the Product, **except within the geographic area of City Y and for a period not exceeding two years from the effective date of this Agreement.**',
|
| 190 |
+
'["EXCEPT FOR LIABILITY ARISING FROM BREACHES OF A PARTY\'S CONFIDENTIALITY OBLIGATIONS CONTAINED IN THE NON-DISCLOSURE CLAUSE IN SECTION 12.17 OF THE CHINA JV OPERATING AGREEMENT, BREACHES OF LICENSE GRANTS CONTAINED HEREIN, AND EXCEPT FOR AMOUNTS PAYABLE TO THIRD PARTIES TO FULFILL INDEMNITY OBLIGATIONS DESCRIBED IN ARTICLE 8, (A) IN NO EVENT SHALL ANY PARTY HAVE ANY LIABILITY TO THE OTHERS, OR TO ANY PARTY CLAIMING THROUGH OR UNDER THE OTHER, FOR ANY LOST PROFITS, ANY INDIRECT, INCIDENTAL, SPECIAL OR CONSEQUENTIAL DAMAGES OF ANY KIND IN ANY WAY ARISING OUT OF OR RELATED TO THIS AGREEMENT, HOWEVER CAUSED AND UNDER ANY THEORY OF LIABILITY, EVEN IF SUCH PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES; AND (B) IN NO EVENT SHALL A PARTY\'S CUMULATIVE LIABILITY ARISING OUT OF THIS AGREEMENT EXCEED THE AMOUNTS ACTUALLY PAID, PAYABLE, RECEIVED OR RECEIVABLE BY SUCH PARTY FOR THE PRODUCTS CONCERNED THEREWITH HEREUNDER PURSUANT TO THIS AGREEMENT DURING THE TWELVE (12) MONTHS PRIOR TO THE OCCURRENCE OF THE INITIAL EVENT FOR WHICH A PARTY RECOVERS DAMAGES HEREUNDER."]',
|
| 191 |
+
]
|
| 192 |
+
embeddings = model.encode(sentences)
|
| 193 |
+
print(embeddings.shape)
|
| 194 |
+
# [3, 768]
|
| 195 |
+
|
| 196 |
+
# Get the similarity scores for the embeddings
|
| 197 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 198 |
+
print(similarities)
|
| 199 |
+
# tensor([[1.0000, 0.7770, 0.7303],
|
| 200 |
+
# [0.7770, 1.0000, 0.9041],
|
| 201 |
+
# [0.7303, 0.9041, 1.0000]])
|
| 202 |
+
```
|
| 203 |
+
|
| 204 |
+
<!--
|
| 205 |
+
### Direct Usage (Transformers)
|
| 206 |
+
|
| 207 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 208 |
+
|
| 209 |
+
</details>
|
| 210 |
+
-->
|
| 211 |
+
|
| 212 |
+
<!--
|
| 213 |
+
### Downstream Usage (Sentence Transformers)
|
| 214 |
+
|
| 215 |
+
You can finetune this model on your own dataset.
|
| 216 |
+
|
| 217 |
+
<details><summary>Click to expand</summary>
|
| 218 |
+
|
| 219 |
+
</details>
|
| 220 |
+
-->
|
| 221 |
+
|
| 222 |
+
<!--
|
| 223 |
+
### Out-of-Scope Use
|
| 224 |
+
|
| 225 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 226 |
+
-->
|
| 227 |
+
|
| 228 |
+
<!--
|
| 229 |
+
## Bias, Risks and Limitations
|
| 230 |
+
|
| 231 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 232 |
+
-->
|
| 233 |
+
|
| 234 |
+
<!--
|
| 235 |
+
### Recommendations
|
| 236 |
+
|
| 237 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 238 |
+
-->
|
| 239 |
+
|
| 240 |
+
## Training Details
|
| 241 |
+
|
| 242 |
+
### Training Dataset
|
| 243 |
+
|
| 244 |
+
#### Unnamed Dataset
|
| 245 |
+
|
| 246 |
+
* Size: 92 training samples
|
| 247 |
+
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
|
| 248 |
+
* Approximate statistics based on the first 92 samples:
|
| 249 |
+
| | sentence_0 | sentence_1 | label |
|
| 250 |
+
|:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:--------------------------------------------------------------|
|
| 251 |
+
| type | string | string | float |
|
| 252 |
+
| details | <ul><li>min: 11 tokens</li><li>mean: 13.09 tokens</li><li>max: 17 tokens</li></ul> | <ul><li>min: 22 tokens</li><li>mean: 99.57 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.5</li><li>max: 1.0</li></ul> |
|
| 253 |
+
* Samples:
|
| 254 |
+
| sentence_0 | sentence_1 | label |
|
| 255 |
+
|:---------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
|
| 256 |
+
| <code>Party A's liability for breach of this Agreement is uncapped.</code> | <code>["NOTWITHSTANDING ANY OTHER PROVISION OF THIS AGREEMENT TO THE CONTRARY, EXCEPT FOR DAMAGES OR CLAIMS ARISING OUT OF (I) A BREACH OF SECTION 13 OF THIS AGREEMENT, (II) CUSTOMER LIABILITIES PURSUANT TO, AND SUBJECT TO THE LIMITATIONS SET FORTH IN, SECTION 2.5(E), (III) A PARTY'S OR ITS PERSONNEL'S GROSS NEGLIGENCE, FRAUD OR WILLFUL MISCONDUCT, (IV) A PARTY'S WILLFUL BREACH OF THIS AGREEMENT, OR (V) A PARTY'S INDEMNIFICATION OBLIGATION WITH RESPECT TO THIRD PARTY CLAIMS UNDER SECTION 10.1 OR SECTION 10.2, IN NO EVENT SHALL EITHER PARTY BE LIABLE TO THE OTHER PARTY OR ANY INDEMNIFIED PARTY HEREUNDER FOR ANY CONSEQUENTIAL DAMAGES, SPECIAL DAMAGES, INCIDENTAL OR INDIRECT DAMAGES, LOSS OF REVENUE OR PROFITS, DIMINUTION IN VALUE, DAMAGES BASED ON MULTIPLE OF REVENUE OR EARNINGS OR OTHER PERFORMANCE METRIC, LOSS OF BUSINESS REPUTATION, PUNITIVE AND EXEMPLARY DAMAGES OR ANY SIMILAR DAMAGES ARISING OR RESULTING FROM OR RELATING TO THIS AGREEMENT, WHETHER SUCH ACTION IS BASED ON WARRANTY, CONTRAC...</code> | <code>1.0</code> |
|
| 257 |
+
| <code>Party B may terminate this Agreement for convenience.</code> | <code>Party B may terminate this Agreement only upon thirty (30) days’ prior written notice to Party A and with valid cause, or by mutual written agreement of both parties. In all events, any termination shall be no earlier than six (6) months after the Effective Date of this Agreement.</code> | <code>0.0</code> |
|
| 258 |
+
| <code>Party B may terminate this Agreement for convenience.</code> | <code>["Either Party may terminate this Agreement by giving the other Party thirty (30) days' prior written notice."]</code> | <code>1.0</code> |
|
| 259 |
+
* Loss: [<code>ContrastiveLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastiveloss) with these parameters:
|
| 260 |
+
```json
|
| 261 |
+
{
|
| 262 |
+
"distance_metric": "SiameseDistanceMetric.COSINE_DISTANCE",
|
| 263 |
+
"margin": 0.5,
|
| 264 |
+
"size_average": true
|
| 265 |
+
}
|
| 266 |
+
```
|
| 267 |
+
|
| 268 |
+
### Training Hyperparameters
|
| 269 |
+
#### Non-Default Hyperparameters
|
| 270 |
+
|
| 271 |
+
- `per_device_train_batch_size`: 16
|
| 272 |
+
- `per_device_eval_batch_size`: 16
|
| 273 |
+
- `num_train_epochs`: 4
|
| 274 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 275 |
+
|
| 276 |
+
#### All Hyperparameters
|
| 277 |
+
<details><summary>Click to expand</summary>
|
| 278 |
+
|
| 279 |
+
- `overwrite_output_dir`: False
|
| 280 |
+
- `do_predict`: False
|
| 281 |
+
- `eval_strategy`: no
|
| 282 |
+
- `prediction_loss_only`: True
|
| 283 |
+
- `per_device_train_batch_size`: 16
|
| 284 |
+
- `per_device_eval_batch_size`: 16
|
| 285 |
+
- `per_gpu_train_batch_size`: None
|
| 286 |
+
- `per_gpu_eval_batch_size`: None
|
| 287 |
+
- `gradient_accumulation_steps`: 1
|
| 288 |
+
- `eval_accumulation_steps`: None
|
| 289 |
+
- `torch_empty_cache_steps`: None
|
| 290 |
+
- `learning_rate`: 5e-05
|
| 291 |
+
- `weight_decay`: 0.0
|
| 292 |
+
- `adam_beta1`: 0.9
|
| 293 |
+
- `adam_beta2`: 0.999
|
| 294 |
+
- `adam_epsilon`: 1e-08
|
| 295 |
+
- `max_grad_norm`: 1
|
| 296 |
+
- `num_train_epochs`: 4
|
| 297 |
+
- `max_steps`: -1
|
| 298 |
+
- `lr_scheduler_type`: linear
|
| 299 |
+
- `lr_scheduler_kwargs`: {}
|
| 300 |
+
- `warmup_ratio`: 0.0
|
| 301 |
+
- `warmup_steps`: 0
|
| 302 |
+
- `log_level`: passive
|
| 303 |
+
- `log_level_replica`: warning
|
| 304 |
+
- `log_on_each_node`: True
|
| 305 |
+
- `logging_nan_inf_filter`: True
|
| 306 |
+
- `save_safetensors`: True
|
| 307 |
+
- `save_on_each_node`: False
|
| 308 |
+
- `save_only_model`: False
|
| 309 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 310 |
+
- `no_cuda`: False
|
| 311 |
+
- `use_cpu`: False
|
| 312 |
+
- `use_mps_device`: False
|
| 313 |
+
- `seed`: 42
|
| 314 |
+
- `data_seed`: None
|
| 315 |
+
- `jit_mode_eval`: False
|
| 316 |
+
- `bf16`: False
|
| 317 |
+
- `fp16`: False
|
| 318 |
+
- `fp16_opt_level`: O1
|
| 319 |
+
- `half_precision_backend`: auto
|
| 320 |
+
- `bf16_full_eval`: False
|
| 321 |
+
- `fp16_full_eval`: False
|
| 322 |
+
- `tf32`: None
|
| 323 |
+
- `local_rank`: 0
|
| 324 |
+
- `ddp_backend`: None
|
| 325 |
+
- `tpu_num_cores`: None
|
| 326 |
+
- `tpu_metrics_debug`: False
|
| 327 |
+
- `debug`: []
|
| 328 |
+
- `dataloader_drop_last`: False
|
| 329 |
+
- `dataloader_num_workers`: 0
|
| 330 |
+
- `dataloader_prefetch_factor`: None
|
| 331 |
+
- `past_index`: -1
|
| 332 |
+
- `disable_tqdm`: False
|
| 333 |
+
- `remove_unused_columns`: True
|
| 334 |
+
- `label_names`: None
|
| 335 |
+
- `load_best_model_at_end`: False
|
| 336 |
+
- `ignore_data_skip`: False
|
| 337 |
+
- `fsdp`: []
|
| 338 |
+
- `fsdp_min_num_params`: 0
|
| 339 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 340 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 341 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 342 |
+
- `parallelism_config`: None
|
| 343 |
+
- `deepspeed`: None
|
| 344 |
+
- `label_smoothing_factor`: 0.0
|
| 345 |
+
- `optim`: adamw_torch_fused
|
| 346 |
+
- `optim_args`: None
|
| 347 |
+
- `adafactor`: False
|
| 348 |
+
- `group_by_length`: False
|
| 349 |
+
- `length_column_name`: length
|
| 350 |
+
- `project`: huggingface
|
| 351 |
+
- `trackio_space_id`: trackio
|
| 352 |
+
- `ddp_find_unused_parameters`: None
|
| 353 |
+
- `ddp_bucket_cap_mb`: None
|
| 354 |
+
- `ddp_broadcast_buffers`: False
|
| 355 |
+
- `dataloader_pin_memory`: True
|
| 356 |
+
- `dataloader_persistent_workers`: False
|
| 357 |
+
- `skip_memory_metrics`: True
|
| 358 |
+
- `use_legacy_prediction_loop`: False
|
| 359 |
+
- `push_to_hub`: False
|
| 360 |
+
- `resume_from_checkpoint`: None
|
| 361 |
+
- `hub_model_id`: None
|
| 362 |
+
- `hub_strategy`: every_save
|
| 363 |
+
- `hub_private_repo`: None
|
| 364 |
+
- `hub_always_push`: False
|
| 365 |
+
- `hub_revision`: None
|
| 366 |
+
- `gradient_checkpointing`: False
|
| 367 |
+
- `gradient_checkpointing_kwargs`: None
|
| 368 |
+
- `include_inputs_for_metrics`: False
|
| 369 |
+
- `include_for_metrics`: []
|
| 370 |
+
- `eval_do_concat_batches`: True
|
| 371 |
+
- `fp16_backend`: auto
|
| 372 |
+
- `push_to_hub_model_id`: None
|
| 373 |
+
- `push_to_hub_organization`: None
|
| 374 |
+
- `mp_parameters`:
|
| 375 |
+
- `auto_find_batch_size`: False
|
| 376 |
+
- `full_determinism`: False
|
| 377 |
+
- `torchdynamo`: None
|
| 378 |
+
- `ray_scope`: last
|
| 379 |
+
- `ddp_timeout`: 1800
|
| 380 |
+
- `torch_compile`: False
|
| 381 |
+
- `torch_compile_backend`: None
|
| 382 |
+
- `torch_compile_mode`: None
|
| 383 |
+
- `include_tokens_per_second`: False
|
| 384 |
+
- `include_num_input_tokens_seen`: no
|
| 385 |
+
- `neftune_noise_alpha`: None
|
| 386 |
+
- `optim_target_modules`: None
|
| 387 |
+
- `batch_eval_metrics`: False
|
| 388 |
+
- `eval_on_start`: False
|
| 389 |
+
- `use_liger_kernel`: False
|
| 390 |
+
- `liger_kernel_config`: None
|
| 391 |
+
- `eval_use_gather_object`: False
|
| 392 |
+
- `average_tokens_across_devices`: True
|
| 393 |
+
- `prompts`: None
|
| 394 |
+
- `batch_sampler`: batch_sampler
|
| 395 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 396 |
+
- `router_mapping`: {}
|
| 397 |
+
- `learning_rate_mapping`: {}
|
| 398 |
+
|
| 399 |
+
</details>
|
| 400 |
+
|
| 401 |
+
### Framework Versions
|
| 402 |
+
- Python: 3.12.12
|
| 403 |
+
- Sentence Transformers: 5.1.2
|
| 404 |
+
- Transformers: 4.57.3
|
| 405 |
+
- PyTorch: 2.9.0+cu126
|
| 406 |
+
- Accelerate: 1.12.0
|
| 407 |
+
- Datasets: 4.0.0
|
| 408 |
+
- Tokenizers: 0.22.1
|
| 409 |
+
|
| 410 |
+
## Citation
|
| 411 |
+
|
| 412 |
+
### BibTeX
|
| 413 |
+
|
| 414 |
+
#### Sentence Transformers
|
| 415 |
+
```bibtex
|
| 416 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 417 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 418 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 419 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 420 |
+
month = "11",
|
| 421 |
+
year = "2019",
|
| 422 |
+
publisher = "Association for Computational Linguistics",
|
| 423 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 424 |
+
}
|
| 425 |
+
```
|
| 426 |
+
|
| 427 |
+
#### ContrastiveLoss
|
| 428 |
+
```bibtex
|
| 429 |
+
@inproceedings{hadsell2006dimensionality,
|
| 430 |
+
author={Hadsell, R. and Chopra, S. and LeCun, Y.},
|
| 431 |
+
booktitle={2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'06)},
|
| 432 |
+
title={Dimensionality Reduction by Learning an Invariant Mapping},
|
| 433 |
+
year={2006},
|
| 434 |
+
volume={2},
|
| 435 |
+
number={},
|
| 436 |
+
pages={1735-1742},
|
| 437 |
+
doi={10.1109/CVPR.2006.100}
|
| 438 |
+
}
|
| 439 |
+
```
|
| 440 |
+
|
| 441 |
+
<!--
|
| 442 |
+
## Glossary
|
| 443 |
+
|
| 444 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 445 |
+
-->
|
| 446 |
+
|
| 447 |
+
<!--
|
| 448 |
+
## Model Card Authors
|
| 449 |
+
|
| 450 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 451 |
+
-->
|
| 452 |
+
|
| 453 |
+
<!--
|
| 454 |
+
## Model Card Contact
|
| 455 |
+
|
| 456 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 457 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"bos_token_id": 0,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"dtype": "float32",
|
| 9 |
+
"eos_token_ids": 0,
|
| 10 |
+
"hidden_act": "gelu",
|
| 11 |
+
"hidden_dropout_prob": 0.1,
|
| 12 |
+
"hidden_size": 768,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 3072,
|
| 15 |
+
"layer_norm_eps": 1e-12,
|
| 16 |
+
"max_position_embeddings": 512,
|
| 17 |
+
"model_type": "bert",
|
| 18 |
+
"num_attention_heads": 12,
|
| 19 |
+
"num_hidden_layers": 12,
|
| 20 |
+
"output_past": true,
|
| 21 |
+
"pad_token_id": 0,
|
| 22 |
+
"position_embedding_type": "absolute",
|
| 23 |
+
"transformers_version": "4.57.3",
|
| 24 |
+
"type_vocab_size": 2,
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 30522
|
| 27 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "SentenceTransformer",
|
| 3 |
+
"__version__": {
|
| 4 |
+
"sentence_transformers": "5.1.2",
|
| 5 |
+
"transformers": "4.57.3",
|
| 6 |
+
"pytorch": "2.9.0+cu126"
|
| 7 |
+
},
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2057f247e5f9d7952a7f9e81d149018ad20e37c5cc902ddb88a623567ba5a21f
|
| 3 |
+
size 437951328
|
modules.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
}
|
| 14 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": true,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
+
"do_lower_case": true,
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "[MASK]",
|
| 50 |
+
"model_max_length": 512,
|
| 51 |
+
"never_split": null,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"strip_accents": null,
|
| 55 |
+
"tokenize_chinese_chars": true,
|
| 56 |
+
"tokenizer_class": "BertTokenizer",
|
| 57 |
+
"unk_token": "[UNK]"
|
| 58 |
+
}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|