Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +565 -0
- config.json +25 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 384,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,565 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- generated_from_trainer
|
| 7 |
+
- dataset_size:939
|
| 8 |
+
- loss:CosineSimilarityLoss
|
| 9 |
+
widget:
|
| 10 |
+
- source_sentence: Diazepam was found to be the most surface active substance in aqueous
|
| 11 |
+
solutions, chlorpromazine was less active and the lowest activity was found for
|
| 12 |
+
haloperidol. The surface activity in blood was similar. The effect of pH on the
|
| 13 |
+
surface activity of chlorpromazine solutions was observed.
|
| 14 |
+
sentences:
|
| 15 |
+
- The range of the microbial contamination of 1 g of dust in minimum-morbidity pig
|
| 16 |
+
rearing houses is from 2.9 . 10(7) to 7.6 . 10(9) in the microbes growing on meat-peptone
|
| 17 |
+
agar (MPA), from 4.6 . 10(7) to 4.6 . 10(8) in moulds growing on the Czapek-Dox
|
| 18 |
+
agar, from 1.4 . 10(2) to 7.0 . 10(4) in the total number of microbes growing
|
| 19 |
+
on End's agar, and from 3.1 . 10(2) to 5.7 . 10(3) in the typically lactose-positive
|
| 20 |
+
microbes growing on End's agar. The numbers of microbes in 1 g of dust in the
|
| 21 |
+
conventional pig houses are close to the numbers found in pig rearing houses with
|
| 22 |
+
minimum morbidity. However, in the moulds on the Czapek-Dox agar, the range of
|
| 23 |
+
the microbial contamination of dust in the minimum-morbidity pig houses was much
|
| 24 |
+
higher. The highest level of the contamination of dust with microbes was found
|
| 25 |
+
on all types of media from houses with continual operation. It was revealed by
|
| 26 |
+
the control of the effectiveness of disinfection in the minimum-morbidity piglet
|
| 27 |
+
rearing section, operated on the all-in all-out basis, that in a half of the cases
|
| 28 |
+
studied the disinfection had low effectiveness. The total number of microbes per
|
| 29 |
+
1 sq. cm was lower than required on the basis of research published in literature,
|
| 30 |
+
but the occurrence rate of positive smears with lactose-positive microbes after
|
| 31 |
+
disinfection was higher in percentual expression.
|
| 32 |
+
- Urinary arylsulphatases (E.C.3.1.6.1) A and B were increased in male rats fasted
|
| 33 |
+
for 24 hours. Excretion of non dialysable protein nitrogen decreased whereas creatinine
|
| 34 |
+
excretion increased. On refeeding diet arylsulphatase A activity was restored
|
| 35 |
+
to normal whereas arylsulphatase B was not normalised. A single oral supplementation
|
| 36 |
+
of vitamin A acetate (20 000 IU) to rats fasted for 24 hours resulted in a significant
|
| 37 |
+
reduction of both arylsulphatase A and B eventhough no further reduction of protein
|
| 38 |
+
nitrogen excretion was evident. In vitamin A deficient male rats significant reduction
|
| 39 |
+
in urinary excretion of both arylsulphatases A and B occured. In a smaller number
|
| 40 |
+
of female rats depression of only arylsulphatase A was observed. This effect of
|
| 41 |
+
vitamin A deficiency leading to reduced urinary arylsulphatase activity was evident
|
| 42 |
+
even at the "weight plateau" stage when no reduction in food intake or growth
|
| 43 |
+
had occurred. These results suggest a possible direct or indirect role for vitamin
|
| 44 |
+
A on urinary excretion pattern of arylsulphatases presumably released from lysosomes
|
| 45 |
+
of tissues.
|
| 46 |
+
- 1. Among the several enzyme activities in rabbit liver cytosol able to dehydrogenate
|
| 47 |
+
1-indanol, only the main activity was not separable from 3-hydroxyhexobarbital
|
| 48 |
+
dehydrogenase during purification including polyacrylamide gel disc electrophoresis.
|
| 49 |
+
2. Results of mixed substrate method indicated that the same enzyme catalyses
|
| 50 |
+
the dehydrogenation of 1-indanol and 3-hydroxyhexobarbital. The ratio between
|
| 51 |
+
the two dehydrogenation activities was almost constant as the enzyme underwent
|
| 52 |
+
thermal inactivation. The Ki values of p-chloromercuribenzoate, the Km values
|
| 53 |
+
for NAD+, and the Km values for NADP+ were very similar for the two dehydrogenations.
|
| 54 |
+
These results lead to the conclusion that the same enzyme catalyses the dehydrogenation
|
| 55 |
+
of 3-hydroxyhexobarbital and 1-indanol. 3. 1-Tetralol, 1-acenaphthenol, 9-fluorenol,
|
| 56 |
+
thiochroman-4-ol and 4-chromanol also served as substrate of the enzyme, but 2-indanol,
|
| 57 |
+
2-tetralol, and trans- and cis-indan-1,2-diol were not oxidized. 4. Reversibility
|
| 58 |
+
of the reaction was also confirmed using 1-indanone as substrate.
|
| 59 |
+
- source_sentence: Radioimmunoassays were developed for the main plasma and urinary
|
| 60 |
+
metabolites of PGF2alpha in the human, viz., 15-keto-13,14-dihydro-PGF2alpha and
|
| 61 |
+
5alpha, 7alpha-dihydroxy-11-ketotetranorprosta-1,16-dioic acid. The methods allowed
|
| 62 |
+
assay of unextracted samples and had a sensitivity of 5 pg and 2 pg, respectively.
|
| 63 |
+
The assays were evaluated by comparison with mass spectrometric data. Normal levels
|
| 64 |
+
of these metabolites were estimated in a number of subjects. Levels in plasma
|
| 65 |
+
and urine of the metabolites were followed after injection or during infusion
|
| 66 |
+
of PGF2alpha and during administration of various antiinflammatory drugs.
|
| 67 |
+
sentences:
|
| 68 |
+
- Bacterium Arthrobacter GJM-1 known in the literature as a good producer of alpha-mannanase
|
| 69 |
+
was found to accumulate in the culture fluid lytic activities against viable yeast
|
| 70 |
+
cells during growth on isolated cell walls or beta-glucan fractions of yeast.
|
| 71 |
+
The accumulation of the lytic activities showed an inducible character. The lytic
|
| 72 |
+
system produced in the medium containing baker's yeast cell walls was capable
|
| 73 |
+
of complete solubiliaztion of cell wals in vitro. The system lysed viable cells
|
| 74 |
+
of a number of yeast species and induced their conversion to protoplasts in an
|
| 75 |
+
osmotically stabilized medium. The lytic system showed different pH and temperature
|
| 76 |
+
optima when viable cells or isolated cell walls were used as substrates. The pH
|
| 77 |
+
optimum of the lysis of isolated cell walls was identical with pH optimum of beta-glucanase
|
| 78 |
+
activities in the crude system. The results pointed out that in the lysis of intact
|
| 79 |
+
cells, in addition to beta-glucanases, some other factor is involved. Substantial
|
| 80 |
+
differences in the nature of the outer and the inner surface of cell walls of
|
| 81 |
+
Saccharomuces cerevisiae were confirmed in this paper based on the different
|
| 82 |
+
susceptibility to lysis of the cell walls in vivo and in vitro.
|
| 83 |
+
- The need for sophisticated ocmponent therapy has resulted in improved techniques
|
| 84 |
+
for obtaining concentrates of platelets and granulocytes. The use of single donors
|
| 85 |
+
as a source for these products is advisable to avoid multiple sensitizations.
|
| 86 |
+
Obtaining concentrated granulocytes represents a problem because of the difficulty
|
| 87 |
+
in separating granulocytes from red blood cells by differential centrifugation
|
| 88 |
+
or sedimentation since the specific gravities are similar. Hydroxyethyl starch
|
| 89 |
+
(HES) makes the separation more effective. A solution made of 250 ml of 6 per
|
| 90 |
+
cent HES, 250 ml of distilled water, and 15 g of sodium citrate in 30 ml distilled
|
| 91 |
+
water provided a satisfactory anticoagulant solution for this purpose. The granulocytes
|
| 92 |
+
collected averaged 49 per cent of the total available in the processed blood;
|
| 93 |
+
the platelets averages 82 per cent. A satisfactory yield could thus be obtained
|
| 94 |
+
from a single donor, and this could be repeated several times in a month. The
|
| 95 |
+
value of ABO, Rh, and HL-A compatibility between donor and recipient probably
|
| 96 |
+
increases the viability and safety of this procedure. The Haemonetic No. 30 Cell
|
| 97 |
+
Separator provided an easy and rapid method for this procedure.
|
| 98 |
+
- The number of lymphocytes forming spontaneous rosettes with sheep erythrocytes,
|
| 99 |
+
a property of thymus-dependent (T) cells, and the number of lymphocytes bearing
|
| 100 |
+
surface immunoglobulins, a characteristic feature of bone marrow-dependent (B)
|
| 101 |
+
cells, were determined in the peripheral blood of normals and of patients with
|
| 102 |
+
chronic lymphocytic leukemia (CLL) and Hodgkin's disease. As compared with normal
|
| 103 |
+
individuals CLL patients had an increased percentage of lymphocytes with membrane-bound
|
| 104 |
+
immunoglobulins, whereas the proportion of rosette-forming lymphocytes was reduced. In
|
| 105 |
+
Hodgkin's disease either normal, diminished, or increased B cell values were obtained;
|
| 106 |
+
the percentage of T cells was decreased or within the lower range of normals. Lymphocyte
|
| 107 |
+
transformation by various mitogenic agents in vitro may be regarded as a model
|
| 108 |
+
of lymphocyte reactivity during immunologic processes in vivo. In order to study
|
| 109 |
+
the functional capacity of lymphocytes in CLL and Hodgkin's disease in comparison
|
| 110 |
+
with normal cells, purified peripheral blood lymphocytes from normals and patients
|
| 111 |
+
with these diseases were incubated in vitro with phytohemagglutinin (PHA) and
|
| 112 |
+
pokeweed mitogen (PWM) over 7 to 11 days. DNA synthesis was determined by incorporation
|
| 113 |
+
of 3-H-thymidine. The cyto-architectural features of the cells before and during
|
| 114 |
+
incubation with these phytomitogens were studied by electron microscopy. Planimetric
|
| 115 |
+
measurements were performed on micrographs of comparable cell sections (through
|
| 116 |
+
nucleus and Golgi zone) for the determiniation of cell, nuclear, cytoplasmic,
|
| 117 |
+
and mitochondrial area. Furthermore, the number of mitochondria and of membrane-bounded
|
| 118 |
+
acid phosphatase-positive lysosome-like organelles was determined in comparable
|
| 119 |
+
sections of unstimulated and mitogen transformed lymphocytes.
|
| 120 |
+
- source_sentence: Depressive illness may affect the patient's response to dental
|
| 121 |
+
care. It has been implicated both as a causal factor and a sequela in facial pain
|
| 122 |
+
syndromes. Depression is treated with various medications that may influence function
|
| 123 |
+
and health of the oral cavity and that may adversely interact with drugs used
|
| 124 |
+
to control pain and anxiety.
|
| 125 |
+
sentences:
|
| 126 |
+
- The presence of cones in potto's retina has been proved beyond doubt although
|
| 127 |
+
they are very restricted in number (1 cone for 300 rods). Morphologically, speaking
|
| 128 |
+
there is no point in calling these cones "rudimentary" except for their slender
|
| 129 |
+
outer segment. There are red sensitive elements in that retina at wavelengths
|
| 130 |
+
beyond the spectral sensitivity of visual purple and it is tempting to assume
|
| 131 |
+
that these elements are cones. The ERG evoked from these elements by red light
|
| 132 |
+
differs from that in response to white and blue light. They dark-adapt faster
|
| 133 |
+
than the receptors sensitive to blue and white flashes. However in some of their
|
| 134 |
+
properties, for example fusion frequency, these cones behave like rods in other
|
| 135 |
+
species. As these few cones seem to activate the bipolar cells nearly as effectively
|
| 136 |
+
as the numerous rods, it is suggested that these cones may be responsible for
|
| 137 |
+
day vision in the potto.
|
| 138 |
+
- Solubilized antigen was prepared from P1798 lymphoma cells by sonication, 3 M
|
| 139 |
+
KCI extraction, or isolated from the ascites fluid of syngeneic tumor-bearing
|
| 140 |
+
BALB/c mice. Antigen was detected and quantitated by its ability to block activity
|
| 141 |
+
of anti-P1798 serum raised in syngeneic mice, as assayed by cytotoxic and indirect
|
| 142 |
+
immunofluorescence tests. It was established that the reaction was immunologically
|
| 143 |
+
specific as the P1798 antigen did not inhibit the binding to L1210 lymphoma cells
|
| 144 |
+
of antisera raised against L1210 in syngeneic DBA/2 or allogeneic BALB/c mice.
|
| 145 |
+
Vaccination of BALB/c mice with different subcellular fractions of sonicated antigen
|
| 146 |
+
or with ascites fluid resulted in protection against a live P1798 challenge with
|
| 147 |
+
results comparable to those obtained using iodoacetamide-modified tumor cells.
|
| 148 |
+
Solubilized antigen prepared by each of the three methods eluted from a Bio-Gel
|
| 149 |
+
A5m agarose column exclusively in an early peak that had a molecular weight estimated
|
| 150 |
+
to be greater than 2 X 10(6). This column-fractionated antigen was shown to cross-react
|
| 151 |
+
with antiserum raised against Thy-1.2 antigen, which is present on P1798 cells.
|
| 152 |
+
The purified P1798 antigen sedimented at 200,000 g and was shown to protect syngeneic
|
| 153 |
+
mice in immunoprophylactic tests.
|
| 154 |
+
- The purpose of this study was to identify pitfalls in the Gram staining technic
|
| 155 |
+
that limit its diagnostic value. In our clinical experience, gram-positive organisms
|
| 156 |
+
were often decolorized too easily. Factors have been identified that alter the
|
| 157 |
+
susceptibility of gram-positive organisms to decolorization in the Gram staining
|
| 158 |
+
technic. The age of the bacterial culture, the preparation of the smear, the fixation
|
| 159 |
+
technic, and the mordant have an important influence on the ease with which gram-positive
|
| 160 |
+
organisms are decolorized. On the basis of these studies, a more reliable and
|
| 161 |
+
reproducible Gram staining technic has been developed for the diagnosis of surgical
|
| 162 |
+
infections.
|
| 163 |
+
- source_sentence: The cellular immune system was tested in nine patients with Whipples'
|
| 164 |
+
disease. Three patients had active disease, and six had been in remission for
|
| 165 |
+
up to 10 years. Intradermal delayed hypersensitivity reactions to candidin, trichophytin,
|
| 166 |
+
tuberculin and varidase, T-cell counts as determined by E-rosettes, allogeneic
|
| 167 |
+
stimulation of lymphocytes in the mixed lymphocyte culture, and mitogenic activation
|
| 168 |
+
of lymphocytes by concanavalin A, phytohaemagglutinin and by pokeweed mitogen,
|
| 169 |
+
were tested in the patients and compared with control subjects. HLA typing was
|
| 170 |
+
performed in all patients. The reaction to tuberculin and varidase, the T-cell
|
| 171 |
+
counts and the activation of lymphocytes by concanavalin A were significantly
|
| 172 |
+
reduced in patients with active disease and in patients during remission. The
|
| 173 |
+
reaction to candidin and trichophytin was poor even in the controls. The mean
|
| 174 |
+
results of the mixed lymphocyte culture, phytohaemagglutinin, and pokeweed mitogen
|
| 175 |
+
activation tests were not significantly different from the controls. In patients
|
| 176 |
+
with active disease the mixed lymphocyte culture reaction and the T-cell counts
|
| 177 |
+
were less than in patients in remission. The results suggest a persistent defect
|
| 178 |
+
of T-cells in patients with Whipple's disease, a defect that is more severe in
|
| 179 |
+
patients with active disease. The finding of HLA B27 in four of thenine patients
|
| 180 |
+
supports the hypothesis of primary rather than secondary impairment of the cellular
|
| 181 |
+
immune system in Whipple's disease.
|
| 182 |
+
sentences:
|
| 183 |
+
- 'Rabbit liver purine nucleoside phosphorylase (purine nucleoside: orthophosphate
|
| 184 |
+
ribosyltransferase EC 2.4.2.1.) was purified to homogeneity by column chromatography
|
| 185 |
+
and ammonium sulfate fractionation. Homogeneity was established by disc gel electrophoresis
|
| 186 |
+
in presence and absence of sodium dodecyl sulfate, and isoelectric focusing. Molecular
|
| 187 |
+
weights of 46,000 and 39,000 were determined, respectively, by gel filtration
|
| 188 |
+
and by sodium dodecyl sulfate-polyacrylamide disc gel electrophoresis. Product
|
| 189 |
+
inhibition was observed with guanine and hypoxanthine as strong competitive inhibitors
|
| 190 |
+
for the enzymatic phosphorolysis of guanosine. Respective Kis calculated were
|
| 191 |
+
1.25 x 10(-5) M for guanine and 2.5 x 10(-5) M for hypoxanthine. Ribose 1-phosphate,
|
| 192 |
+
another product of the reaction, gave noncompetitive inhibition with guanosine
|
| 193 |
+
as variable substrate, and an inhibition constant of 3.61 x 10(-4) M was calculated.
|
| 194 |
+
The protection of essential --SH groups on the enzyme, by 2-mercaptoethanol or
|
| 195 |
+
dithiothreitol, was necessary for the maintenance of enzyme activity. Noncompetitive
|
| 196 |
+
inhibition was observed for p-chloromercuribenzoate with an inhibition constant
|
| 197 |
+
of 5.68 x 10(-6)M. Complete reversal of this inhibition by an excess of 2-mercaptoethanol
|
| 198 |
+
or dithiothreitol was demonstrated. In the presence of methylene blue, the enzyme
|
| 199 |
+
showed a high sensitivity to photooxidation and a dependence of photoinactivation
|
| 200 |
+
on pH, strongly implicating histidine as the susceptible group at the active site
|
| 201 |
+
of the enzyme. The pKa values determined for ionizable groups of the active site
|
| 202 |
+
of the enzyme were near pH 5.5 and pH 8.5 The chemical and kinetic evidences suggest
|
| 203 |
+
that histidine and cysteine may be essential for catalysis. Inorganic orthophosphate
|
| 204 |
+
(Km 1.54 x 10(-2) M) was an obligatory anion requirement, and arsenate substituted
|
| 205 |
+
for phosphate with comparable results. Guanosine (Km 5.00 x 10(-5) M), deoxyguanosine
|
| 206 |
+
(Km 1.00 x 10(-4)M) and inosine (Km 1.33 x 10(-4)M), were substrates for enzymatic
|
| 207 |
+
phosphorolysis. Xanthosine was an extremely poor substrate, and adenosine was
|
| 208 |
+
not phosphorylyzed at 20-fold excess of the homogeneous enzyme. Guanine (Km 1.82
|
| 209 |
+
x 10(-5)M),ribose 1-phosphate (Km 1.34 x 10(-4) M) and hypoxanthine were substrates
|
| 210 |
+
for the reverse reaction, namely, the enzymatic synthesis of nucleosides. The
|
| 211 |
+
initial velocity studies of the saturation of the enzyme with guanosine, at various
|
| 212 |
+
fixed concentrations of inorganic orthophosphate, suggest a sequential bireactant
|
| 213 |
+
catalytic mechanism for the enzyme.'
|
| 214 |
+
- Using absorptiometry, the bone calcium mass (BCM) at two different sites of the
|
| 215 |
+
radius was measured in 12 male patients referred for vestibuloplasty of the mandible.
|
| 216 |
+
The most distal site contained both cortical and cancellous bone while the other
|
| 217 |
+
site contained cortical bone only. The BCM values were compared with those for
|
| 218 |
+
normal patients of the same age. The patients with mandibular atrophy all had
|
| 219 |
+
less bone in the radius than their age-matched controls (p less than 0.01). This
|
| 220 |
+
is indicative of a systemic factor or factors operating both in the mandible and
|
| 221 |
+
in the radius. The examination prior to oral surgery should therefore include
|
| 222 |
+
an evaluation for diseases causing general bone loss.
|
| 223 |
+
- Carbamylation of epsilon-amino groups of lysine of human blood group MM glycoprotein,
|
| 224 |
+
some of its precursors and the blood group A B antigens gave products with 41%
|
| 225 |
+
- 91% epsilon-amino group substitution. Even the most extensive carbamylation
|
| 226 |
+
led to only marginal changes in the circular dichroic (CD) spectra of these substances
|
| 227 |
+
and none in sedimentation coefficients studied. Nevertheless, carbamylation resulted
|
| 228 |
+
in either increased or unchanged or decreased inhibitory activity of all blood
|
| 229 |
+
group antigens tested depending solely on the source of the hemagglutinin used.
|
| 230 |
+
Carbamylation of epsilon-amino groups of these blood group glycoproteins therefore
|
| 231 |
+
leads to minor conformational changes, not involved with the primary blood group
|
| 232 |
+
specificity, which is recognized by a large proportion but not by all corresponding
|
| 233 |
+
antibodies and lectins.
|
| 234 |
+
- source_sentence: Two patients with chronic renal failure developed transient sinus
|
| 235 |
+
node dysfunction requiring insertion of a temporary pacemaker while receiving
|
| 236 |
+
procainamide to control ventricular arrhythmias. Blood levels of procainamide
|
| 237 |
+
were found to be elevated, although at these levels, sinus node dysfunction has
|
| 238 |
+
not previously been reported. Following discontinuance of procainamide, sinus
|
| 239 |
+
rhythm returned. A combination of factors, including elevated levels of N-acetyl
|
| 240 |
+
procainamide, the metabolite of procainamide with anti-arrhythmic properties,
|
| 241 |
+
are suggested as possible contributory causes for the ECG findings. Thus, procainamide
|
| 242 |
+
may produce electrophysiologic features of "sick sinus syndrome" in patients with
|
| 243 |
+
chronic renal failure even when blood levels of this substance are being monitored.
|
| 244 |
+
sentences:
|
| 245 |
+
- We investigated the possible side chain/side chain interactions of four potential
|
| 246 |
+
transmembrane proteins. The basic assumptions are that the proteins are alpha-helical,
|
| 247 |
+
and that the proteins aggregate with knobs-into-holes packing. It was found that
|
| 248 |
+
these four proteins can be assembled into stereochemically feasible bundles of
|
| 249 |
+
alpha-helices with hydrophobic exteriors and with hydrogen bonds between the side
|
| 250 |
+
chains of one alpha-helix and the side chains of its knobs-into-holes packed neighbors.
|
| 251 |
+
- The distribution of IgG in the endothelium of the paracortical post-capillary
|
| 252 |
+
venules (PCV) of human lymph nodes was studied in frozen sections by using an
|
| 253 |
+
indirect immunoperoxidase technique. Three different patterns of distribution
|
| 254 |
+
could be differentiated (luminal site, intraendothelial and basement membrane
|
| 255 |
+
site). Each of these patterns was characteristically encoutered in association
|
| 256 |
+
with one of three grades of PCV (graded on the basis of the height of their endothelial
|
| 257 |
+
cells). The significance of this close relationship between the IgG distribution
|
| 258 |
+
and the changes in PCV endothelium was discussed in regard to the T-lymphocyte
|
| 259 |
+
recirculation. A hypothesis was introduced describing the possible sequence of
|
| 260 |
+
events involved in the traversing of T-lymphocyte through the PCV endothelium.
|
| 261 |
+
- A randomised controlled trial of antipyrine (phenazone) in the prophylaxis of
|
| 262 |
+
neonatal jaundice was completed in 48 women. Treatment of mothers from the 38th
|
| 263 |
+
week of pregnancy reduced neonatal plasma-bilirubin concentrations on the 4th
|
| 264 |
+
day after birth by an average of 44%. Prophylaxis of neonatal jaundice merits
|
| 265 |
+
a wider trial.
|
| 266 |
+
pipeline_tag: sentence-similarity
|
| 267 |
+
library_name: sentence-transformers
|
| 268 |
+
---
|
| 269 |
+
|
| 270 |
+
# SentenceTransformer
|
| 271 |
+
|
| 272 |
+
This is a [sentence-transformers](https://www.SBERT.net) model trained. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 273 |
+
|
| 274 |
+
## Model Details
|
| 275 |
+
|
| 276 |
+
### Model Description
|
| 277 |
+
- **Model Type:** Sentence Transformer
|
| 278 |
+
<!-- - **Base model:** [Unknown](https://huggingface.co/unknown) -->
|
| 279 |
+
- **Maximum Sequence Length:** 256 tokens
|
| 280 |
+
- **Output Dimensionality:** 384 dimensions
|
| 281 |
+
- **Similarity Function:** Cosine Similarity
|
| 282 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 283 |
+
<!-- - **Language:** Unknown -->
|
| 284 |
+
<!-- - **License:** Unknown -->
|
| 285 |
+
|
| 286 |
+
### Model Sources
|
| 287 |
+
|
| 288 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 289 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 290 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 291 |
+
|
| 292 |
+
### Full Model Architecture
|
| 293 |
+
|
| 294 |
+
```
|
| 295 |
+
SentenceTransformer(
|
| 296 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
|
| 297 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 298 |
+
(2): Normalize()
|
| 299 |
+
)
|
| 300 |
+
```
|
| 301 |
+
|
| 302 |
+
## Usage
|
| 303 |
+
|
| 304 |
+
### Direct Usage (Sentence Transformers)
|
| 305 |
+
|
| 306 |
+
First install the Sentence Transformers library:
|
| 307 |
+
|
| 308 |
+
```bash
|
| 309 |
+
pip install -U sentence-transformers
|
| 310 |
+
```
|
| 311 |
+
|
| 312 |
+
Then you can load this model and run inference.
|
| 313 |
+
```python
|
| 314 |
+
from sentence_transformers import SentenceTransformer
|
| 315 |
+
|
| 316 |
+
# Download from the 🤗 Hub
|
| 317 |
+
model = SentenceTransformer("jaimevera1107/all-MiniLM-pubmed")
|
| 318 |
+
# Run inference
|
| 319 |
+
sentences = [
|
| 320 |
+
'Two patients with chronic renal failure developed transient sinus node dysfunction requiring insertion of a temporary pacemaker while receiving procainamide to control ventricular arrhythmias. Blood levels of procainamide were found to be elevated, although at these levels, sinus node dysfunction has not previously been reported. Following discontinuance of procainamide, sinus rhythm returned. A combination of factors, including elevated levels of N-acetyl procainamide, the metabolite of procainamide with anti-arrhythmic properties, are suggested as possible contributory causes for the ECG findings. Thus, procainamide may produce electrophysiologic features of "sick sinus syndrome" in patients with chronic renal failure even when blood levels of this substance are being monitored.',
|
| 321 |
+
'The distribution of IgG in the endothelium of the paracortical post-capillary venules (PCV) of human lymph nodes was studied in frozen sections by using an indirect immunoperoxidase technique. Three different patterns of distribution could be differentiated (luminal site, intraendothelial and basement membrane site). Each of these patterns was characteristically encoutered in association with one of three grades of PCV (graded on the basis of the height of their endothelial cells). The significance of this close relationship between the IgG distribution and the changes in PCV endothelium was discussed in regard to the T-lymphocyte recirculation. A hypothesis was introduced describing the possible sequence of events involved in the traversing of T-lymphocyte through the PCV endothelium.',
|
| 322 |
+
'A randomised controlled trial of antipyrine (phenazone) in the prophylaxis of neonatal jaundice was completed in 48 women. Treatment of mothers from the 38th week of pregnancy reduced neonatal plasma-bilirubin concentrations on the 4th day after birth by an average of 44%. Prophylaxis of neonatal jaundice merits a wider trial.',
|
| 323 |
+
]
|
| 324 |
+
embeddings = model.encode(sentences)
|
| 325 |
+
print(embeddings.shape)
|
| 326 |
+
# [3, 384]
|
| 327 |
+
|
| 328 |
+
# Get the similarity scores for the embeddings
|
| 329 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 330 |
+
print(similarities.shape)
|
| 331 |
+
# [3, 3]
|
| 332 |
+
```
|
| 333 |
+
|
| 334 |
+
<!--
|
| 335 |
+
### Direct Usage (Transformers)
|
| 336 |
+
|
| 337 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 338 |
+
|
| 339 |
+
</details>
|
| 340 |
+
-->
|
| 341 |
+
|
| 342 |
+
<!--
|
| 343 |
+
### Downstream Usage (Sentence Transformers)
|
| 344 |
+
|
| 345 |
+
You can finetune this model on your own dataset.
|
| 346 |
+
|
| 347 |
+
<details><summary>Click to expand</summary>
|
| 348 |
+
|
| 349 |
+
</details>
|
| 350 |
+
-->
|
| 351 |
+
|
| 352 |
+
<!--
|
| 353 |
+
### Out-of-Scope Use
|
| 354 |
+
|
| 355 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 356 |
+
-->
|
| 357 |
+
|
| 358 |
+
<!--
|
| 359 |
+
## Bias, Risks and Limitations
|
| 360 |
+
|
| 361 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 362 |
+
-->
|
| 363 |
+
|
| 364 |
+
<!--
|
| 365 |
+
### Recommendations
|
| 366 |
+
|
| 367 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 368 |
+
-->
|
| 369 |
+
|
| 370 |
+
## Training Details
|
| 371 |
+
|
| 372 |
+
### Training Dataset
|
| 373 |
+
|
| 374 |
+
#### Unnamed Dataset
|
| 375 |
+
|
| 376 |
+
* Size: 939 training samples
|
| 377 |
+
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
|
| 378 |
+
* Approximate statistics based on the first 939 samples:
|
| 379 |
+
| | sentence_0 | sentence_1 | label |
|
| 380 |
+
|:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:--------------------------------------------------------------|
|
| 381 |
+
| type | string | string | float |
|
| 382 |
+
| details | <ul><li>min: 27 tokens</li><li>mean: 190.25 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 29 tokens</li><li>mean: 187.93 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 0.0</li><li>mean: 0.1</li><li>max: 0.7</li></ul> |
|
| 383 |
+
* Samples:
|
| 384 |
+
| sentence_0 | sentence_1 | label |
|
| 385 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
|
| 386 |
+
| <code>We have previously reported increased neutrophil chemotactic activity in sera obtained after positive antigen inhalation responses in atopic subjects. This report describes the kinetics of appearance of this serum activity and the effects of antigen dose and disodium cromoglycate pretreatment on the response in 10 ragweed-sensitive subjects. Significantly increased chemotactic activity was present as early as 1 min, peaked at 10 min, and persisted through 24 hr after inhalation of antigen. The increased chemotactic activity correlated with the degree of bronchospasm induced by antigen inhalation and the amount of antigen administered. The increased chemotactic activity and bronchospasm were blocked by administration of disodium cromoglycate prior to antigen challenge. These findings are consistent with a postulated antigen-induced anaphylactic release of chemotactic activity. The correlation of this activity with the degree of bronchospasm and its appearance after administration of eve...</code> | <code>Urinary arylsulphatases (E.C.3.1.6.1) A and B were increased in male rats fasted for 24 hours. Excretion of non dialysable protein nitrogen decreased whereas creatinine excretion increased. On refeeding diet arylsulphatase A activity was restored to normal whereas arylsulphatase B was not normalised. A single oral supplementation of vitamin A acetate (20 000 IU) to rats fasted for 24 hours resulted in a significant reduction of both arylsulphatase A and B eventhough no further reduction of protein nitrogen excretion was evident. In vitamin A deficient male rats significant reduction in urinary excretion of both arylsulphatases A and B occured. In a smaller number of female rats depression of only arylsulphatase A was observed. This effect of vitamin A deficiency leading to reduced urinary arylsulphatase activity was evident even at the "weight plateau" stage when no reduction in food intake or growth had occurred. These results suggest a possible direct or indirect role for vitamin A ...</code> | <code>0.1</code> |
|
| 387 |
+
| <code>Antibodies to herpes simplex virus type 1 and type 2 were detected in the sera of rabbits by release of histamine from basophils sensitized in vitro with the sera. The time course of the appearance of the antibodies, the dose-response curve of the release of histamine in relation to antigen concentration, the sedimentation characteristics of the antibodies in sucrose gradients, and the ability to destroy the sensitizing capacity of the sera with heat suggest that the antibodies being assessed were of the IgE class. These antibodies were induced in animals injected intradermally, intramuscularly, and i.p. with live virus. The antibodies were detected 1 week after primary injection and a similar time course of antibody appearance was observed after a second or third injection. The same cross-reactivity between type 1 and type 2 virus observed with IgG antibody was also observed with IgE antibody.</code> | <code>Chemical, physical and optical properties of chromomycin A3 are examined so as to ascertain appropriate staining and analysis procedures for flow cytometry of human gynecologic samples. Fluorescence excitation and emission spectra of chromomycin A3-stained cervical cells are compared with those of chromomycin A3-stained deoxyribonucleic acid. Conditions for deoxyribonucleic acid-specific staining of cervical cells are presented, and staining specificity of cervical cells with chromomycin A3 is compared to that obtained with ethidium bromide, propidium iodide and Hoechst 33258. Also presented is a brief review of two parameter flow cytometry as a prescreening procedure for detection of cervical neoplasia. Results of flow cytometry and cell sorting are interpreted based on the deoxyribonucleic acid-specificity of chromomycin A3 staining.</code> | <code>0.1</code> |
|
| 388 |
+
| <code>Peptide hydrolase activities against glycyl-L-leucine and glycylglycine were investigated in the soluble fractions of blood, liver, kidney cortex, skeletal muscle (gastrocnemius), and jejunal and ileal mucosa of rats. The maximal hydrolase activity in each tissue was determined when the incubation conditions, such as time, pH, substrate and enzyme concentrations, and ionic requirements were optimal. The kinetic constants (apparent Km and Vmax) were determined from Lineweaver-Burk double reciprocal graphs. Maximal hydrolysis rates against both dipeptides were many times greater by kidney and intestinal segments than by those of muscle, liver, or blood. The order of Vmax for hydrolase activity against glycylleucine was kidney greater than ileum greater than jejunum greater than liver greater than muscle greater than blood, and the order against glycylglycine was ileum greater than kidney greater than jejunum greater than liver greater than muscle. Those tissues that had the lowest Vmax v...</code> | <code>The authors studied 19 cases of brain abscess. The investigation included one or more EEG records from 13 patients and one or more cranial computerized tomograms in all cases. In this work the two techniques were associated to establish the diagnosis of cerebral abscess. The EEG is almost always abnormal and pronounced EEG disturbances are, in most cases, sufficient for indicating a diagnosis of a space-occupying process. On the other hand, EEG patterns suggestive of brain abscess were detected in only 38% of the patients. In brain abscess, CCT is of considerable value since the existence of 'suppurative' images was demonstrated in all cases and of these, 70% were characteristic. The association of these two techniques is thus extremely useful in the diagnosis of cerebral abscess, to follow its evolution and to detect postoperative relapse or to evaluate the efficiency of medical management alone.</code> | <code>0.1</code> |
|
| 389 |
+
* Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
|
| 390 |
+
```json
|
| 391 |
+
{
|
| 392 |
+
"loss_fct": "torch.nn.modules.loss.MSELoss"
|
| 393 |
+
}
|
| 394 |
+
```
|
| 395 |
+
|
| 396 |
+
### Training Hyperparameters
|
| 397 |
+
#### Non-Default Hyperparameters
|
| 398 |
+
|
| 399 |
+
- `per_device_train_batch_size`: 16
|
| 400 |
+
- `per_device_eval_batch_size`: 16
|
| 401 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 402 |
+
|
| 403 |
+
#### All Hyperparameters
|
| 404 |
+
<details><summary>Click to expand</summary>
|
| 405 |
+
|
| 406 |
+
- `overwrite_output_dir`: False
|
| 407 |
+
- `do_predict`: False
|
| 408 |
+
- `eval_strategy`: no
|
| 409 |
+
- `prediction_loss_only`: True
|
| 410 |
+
- `per_device_train_batch_size`: 16
|
| 411 |
+
- `per_device_eval_batch_size`: 16
|
| 412 |
+
- `per_gpu_train_batch_size`: None
|
| 413 |
+
- `per_gpu_eval_batch_size`: None
|
| 414 |
+
- `gradient_accumulation_steps`: 1
|
| 415 |
+
- `eval_accumulation_steps`: None
|
| 416 |
+
- `torch_empty_cache_steps`: None
|
| 417 |
+
- `learning_rate`: 5e-05
|
| 418 |
+
- `weight_decay`: 0.0
|
| 419 |
+
- `adam_beta1`: 0.9
|
| 420 |
+
- `adam_beta2`: 0.999
|
| 421 |
+
- `adam_epsilon`: 1e-08
|
| 422 |
+
- `max_grad_norm`: 1
|
| 423 |
+
- `num_train_epochs`: 3
|
| 424 |
+
- `max_steps`: -1
|
| 425 |
+
- `lr_scheduler_type`: linear
|
| 426 |
+
- `lr_scheduler_kwargs`: {}
|
| 427 |
+
- `warmup_ratio`: 0.0
|
| 428 |
+
- `warmup_steps`: 0
|
| 429 |
+
- `log_level`: passive
|
| 430 |
+
- `log_level_replica`: warning
|
| 431 |
+
- `log_on_each_node`: True
|
| 432 |
+
- `logging_nan_inf_filter`: True
|
| 433 |
+
- `save_safetensors`: True
|
| 434 |
+
- `save_on_each_node`: False
|
| 435 |
+
- `save_only_model`: False
|
| 436 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 437 |
+
- `no_cuda`: False
|
| 438 |
+
- `use_cpu`: False
|
| 439 |
+
- `use_mps_device`: False
|
| 440 |
+
- `seed`: 42
|
| 441 |
+
- `data_seed`: None
|
| 442 |
+
- `jit_mode_eval`: False
|
| 443 |
+
- `use_ipex`: False
|
| 444 |
+
- `bf16`: False
|
| 445 |
+
- `fp16`: False
|
| 446 |
+
- `fp16_opt_level`: O1
|
| 447 |
+
- `half_precision_backend`: auto
|
| 448 |
+
- `bf16_full_eval`: False
|
| 449 |
+
- `fp16_full_eval`: False
|
| 450 |
+
- `tf32`: None
|
| 451 |
+
- `local_rank`: 0
|
| 452 |
+
- `ddp_backend`: None
|
| 453 |
+
- `tpu_num_cores`: None
|
| 454 |
+
- `tpu_metrics_debug`: False
|
| 455 |
+
- `debug`: []
|
| 456 |
+
- `dataloader_drop_last`: False
|
| 457 |
+
- `dataloader_num_workers`: 0
|
| 458 |
+
- `dataloader_prefetch_factor`: None
|
| 459 |
+
- `past_index`: -1
|
| 460 |
+
- `disable_tqdm`: False
|
| 461 |
+
- `remove_unused_columns`: True
|
| 462 |
+
- `label_names`: None
|
| 463 |
+
- `load_best_model_at_end`: False
|
| 464 |
+
- `ignore_data_skip`: False
|
| 465 |
+
- `fsdp`: []
|
| 466 |
+
- `fsdp_min_num_params`: 0
|
| 467 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 468 |
+
- `tp_size`: 0
|
| 469 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 470 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 471 |
+
- `deepspeed`: None
|
| 472 |
+
- `label_smoothing_factor`: 0.0
|
| 473 |
+
- `optim`: adamw_torch
|
| 474 |
+
- `optim_args`: None
|
| 475 |
+
- `adafactor`: False
|
| 476 |
+
- `group_by_length`: False
|
| 477 |
+
- `length_column_name`: length
|
| 478 |
+
- `ddp_find_unused_parameters`: None
|
| 479 |
+
- `ddp_bucket_cap_mb`: None
|
| 480 |
+
- `ddp_broadcast_buffers`: False
|
| 481 |
+
- `dataloader_pin_memory`: True
|
| 482 |
+
- `dataloader_persistent_workers`: False
|
| 483 |
+
- `skip_memory_metrics`: True
|
| 484 |
+
- `use_legacy_prediction_loop`: False
|
| 485 |
+
- `push_to_hub`: False
|
| 486 |
+
- `resume_from_checkpoint`: None
|
| 487 |
+
- `hub_model_id`: None
|
| 488 |
+
- `hub_strategy`: every_save
|
| 489 |
+
- `hub_private_repo`: None
|
| 490 |
+
- `hub_always_push`: False
|
| 491 |
+
- `gradient_checkpointing`: False
|
| 492 |
+
- `gradient_checkpointing_kwargs`: None
|
| 493 |
+
- `include_inputs_for_metrics`: False
|
| 494 |
+
- `include_for_metrics`: []
|
| 495 |
+
- `eval_do_concat_batches`: True
|
| 496 |
+
- `fp16_backend`: auto
|
| 497 |
+
- `push_to_hub_model_id`: None
|
| 498 |
+
- `push_to_hub_organization`: None
|
| 499 |
+
- `mp_parameters`:
|
| 500 |
+
- `auto_find_batch_size`: False
|
| 501 |
+
- `full_determinism`: False
|
| 502 |
+
- `torchdynamo`: None
|
| 503 |
+
- `ray_scope`: last
|
| 504 |
+
- `ddp_timeout`: 1800
|
| 505 |
+
- `torch_compile`: False
|
| 506 |
+
- `torch_compile_backend`: None
|
| 507 |
+
- `torch_compile_mode`: None
|
| 508 |
+
- `include_tokens_per_second`: False
|
| 509 |
+
- `include_num_input_tokens_seen`: False
|
| 510 |
+
- `neftune_noise_alpha`: None
|
| 511 |
+
- `optim_target_modules`: None
|
| 512 |
+
- `batch_eval_metrics`: False
|
| 513 |
+
- `eval_on_start`: False
|
| 514 |
+
- `use_liger_kernel`: False
|
| 515 |
+
- `eval_use_gather_object`: False
|
| 516 |
+
- `average_tokens_across_devices`: False
|
| 517 |
+
- `prompts`: None
|
| 518 |
+
- `batch_sampler`: batch_sampler
|
| 519 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 520 |
+
|
| 521 |
+
</details>
|
| 522 |
+
|
| 523 |
+
### Framework Versions
|
| 524 |
+
- Python: 3.11.9
|
| 525 |
+
- Sentence Transformers: 4.1.0
|
| 526 |
+
- Transformers: 4.51.3
|
| 527 |
+
- PyTorch: 2.7.0+cu118
|
| 528 |
+
- Accelerate: 1.7.0
|
| 529 |
+
- Datasets: 3.6.0
|
| 530 |
+
- Tokenizers: 0.21.1
|
| 531 |
+
|
| 532 |
+
## Citation
|
| 533 |
+
|
| 534 |
+
### BibTeX
|
| 535 |
+
|
| 536 |
+
#### Sentence Transformers
|
| 537 |
+
```bibtex
|
| 538 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 539 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 540 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 541 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 542 |
+
month = "11",
|
| 543 |
+
year = "2019",
|
| 544 |
+
publisher = "Association for Computational Linguistics",
|
| 545 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 546 |
+
}
|
| 547 |
+
```
|
| 548 |
+
|
| 549 |
+
<!--
|
| 550 |
+
## Glossary
|
| 551 |
+
|
| 552 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 553 |
+
-->
|
| 554 |
+
|
| 555 |
+
<!--
|
| 556 |
+
## Model Card Authors
|
| 557 |
+
|
| 558 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 559 |
+
-->
|
| 560 |
+
|
| 561 |
+
<!--
|
| 562 |
+
## Model Card Contact
|
| 563 |
+
|
| 564 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 565 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"BertModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_probs_dropout_prob": 0.1,
|
| 6 |
+
"classifier_dropout": null,
|
| 7 |
+
"gradient_checkpointing": false,
|
| 8 |
+
"hidden_act": "gelu",
|
| 9 |
+
"hidden_dropout_prob": 0.1,
|
| 10 |
+
"hidden_size": 384,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 1536,
|
| 13 |
+
"layer_norm_eps": 1e-12,
|
| 14 |
+
"max_position_embeddings": 512,
|
| 15 |
+
"model_type": "bert",
|
| 16 |
+
"num_attention_heads": 12,
|
| 17 |
+
"num_hidden_layers": 6,
|
| 18 |
+
"pad_token_id": 0,
|
| 19 |
+
"position_embedding_type": "absolute",
|
| 20 |
+
"torch_dtype": "float32",
|
| 21 |
+
"transformers_version": "4.51.3",
|
| 22 |
+
"type_vocab_size": 2,
|
| 23 |
+
"use_cache": true,
|
| 24 |
+
"vocab_size": 30522
|
| 25 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "4.1.0",
|
| 4 |
+
"transformers": "4.51.3",
|
| 5 |
+
"pytorch": "2.7.0+cu118"
|
| 6 |
+
},
|
| 7 |
+
"prompts": {},
|
| 8 |
+
"default_prompt_name": null,
|
| 9 |
+
"similarity_fn_name": "cosine"
|
| 10 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:984879d1a7ded5fdfb90673519399ca0f0dd8e19bdcaaa8382f29a7da75e88a7
|
| 3 |
+
size 90864192
|
modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 256,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": false,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
+
"do_lower_case": true,
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "[MASK]",
|
| 50 |
+
"max_length": 128,
|
| 51 |
+
"model_max_length": 256,
|
| 52 |
+
"never_split": null,
|
| 53 |
+
"pad_to_multiple_of": null,
|
| 54 |
+
"pad_token": "[PAD]",
|
| 55 |
+
"pad_token_type_id": 0,
|
| 56 |
+
"padding_side": "right",
|
| 57 |
+
"sep_token": "[SEP]",
|
| 58 |
+
"stride": 0,
|
| 59 |
+
"strip_accents": null,
|
| 60 |
+
"tokenize_chinese_chars": true,
|
| 61 |
+
"tokenizer_class": "BertTokenizer",
|
| 62 |
+
"truncation_side": "right",
|
| 63 |
+
"truncation_strategy": "longest_first",
|
| 64 |
+
"unk_token": "[UNK]"
|
| 65 |
+
}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|