Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +702 -0
- adapter_config.json +37 -0
- adapter_model.safetensors +3 -0
- config_sentence_transformers.json +14 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +58 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 1024,
|
| 3 |
+
"pooling_mode_cls_token": true,
|
| 4 |
+
"pooling_mode_mean_tokens": false,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,702 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
license: mit
|
| 5 |
+
tags:
|
| 6 |
+
- sentence-transformers
|
| 7 |
+
- sentence-similarity
|
| 8 |
+
- feature-extraction
|
| 9 |
+
- dense
|
| 10 |
+
- generated_from_trainer
|
| 11 |
+
- dataset_size:80184
|
| 12 |
+
- loss:CachedMultipleNegativesRankingLoss
|
| 13 |
+
base_model: BAAI/bge-large-en-v1.5
|
| 14 |
+
widget:
|
| 15 |
+
- source_sentence: who does lennie choose in the sky is everywhere
|
| 16 |
+
sentences:
|
| 17 |
+
- Bank of England £1 note The Bank of England £1 note was a banknote of the pound
|
| 18 |
+
sterling. After the ten shilling note was withdrawn in 1970 it became the smallest
|
| 19 |
+
denomination note issued by the Bank of England. The one pound note was issued
|
| 20 |
+
by the Bank of England for the first time in 1797 and continued to be printed
|
| 21 |
+
until 1984. The note was withdrawn in 1988 in favour of the one pound coin.
|
| 22 |
+
- The Sky Is Everywhere Lennie tries to make up with Joe by taking him some of Gram's
|
| 23 |
+
roses, but doesn't succeed. Gram becomes furious with Lennie for cutting her roses
|
| 24 |
+
and criticizes her for being selfish. Lennie realizes that she needs to change,
|
| 25 |
+
apologizes to her grandmother, and tells her about the situation with Joe. Gram
|
| 26 |
+
reassures Lennie that Joe is in love with her. Lennie writes Joe a letter expressing
|
| 27 |
+
her feelings, and Joe ultimately forgives her and they reconcile. Toby and Lennie
|
| 28 |
+
become good friends and visit Bailey's grave together to apologize to her. Lennie
|
| 29 |
+
walks away from the grave with a smile, knowing that her sister would have forgiven
|
| 30 |
+
her and that the only way to deal with grief is to accept that it is a part of
|
| 31 |
+
you and to look ahead to the future.
|
| 32 |
+
- 'Senate of the Philippines The Senate of the Philippines (Filipino: Senado ng
|
| 33 |
+
Pilipinas, also Mataas na Kapulungan ng Pilipinas or "upper chamber") is the upper
|
| 34 |
+
house of the bicameral legislature of the Philippines, the Congress; the House
|
| 35 |
+
of Representatives is the lower house. The Senate is composed of 24 senators who
|
| 36 |
+
are elected at-large with the country as one district under plurality-at-large
|
| 37 |
+
voting.'
|
| 38 |
+
- source_sentence: who played charlie in charlie and the chocolate factory 2005
|
| 39 |
+
sentences:
|
| 40 |
+
- Charlie and the Chocolate Factory (film) Charlie and the Chocolate Factory is
|
| 41 |
+
a 2005 musical fantasy comedy film directed by Tim Burton and written by John
|
| 42 |
+
August, based on the 1964 British novel of the same name by Roald Dahl. The film
|
| 43 |
+
stars Johnny Depp as Willy Wonka and Freddie Highmore as Charlie Bucket. The storyline
|
| 44 |
+
follows Charlie, who wins a contest and is along with four other contest winners,
|
| 45 |
+
subsequently led by Wonka on a tour of his chocolate factory, the most magnificent
|
| 46 |
+
in the world.
|
| 47 |
+
- The Punisher (TV series) The Punisher is scheduled to be released on November
|
| 48 |
+
17, 2017.
|
| 49 |
+
- The Vampire Diaries (season 2) The Vampire Diaries, an American supernatural drama,
|
| 50 |
+
was officially renewed by The CW for a full 22-episode season on February 16,
|
| 51 |
+
2010.[1] The first episode premiered on September 9, 2010, at 8 p.m. ET.[2] The
|
| 52 |
+
season picks up immediately after the events of the season one finale. All the
|
| 53 |
+
series regulars returned.[3] Season two focuses on the return of Elena Gilbert's
|
| 54 |
+
(Nina Dobrev) doppelgänger, Katherine Pierce, the introduction of werewolves,
|
| 55 |
+
the sun and moon curse, and the arrival of the original vampires. Tyler Lockwood's
|
| 56 |
+
(Michael Trevino) uncle, Mason Lockwood (Taylor Kinney), arrives in town searching
|
| 57 |
+
for the moonstone, a family heirloom. Tyler later learns of his family's werewolf
|
| 58 |
+
curse. Meanwhile, Caroline Forbes (Candice Accola) is killed by Katherine while
|
| 59 |
+
having Damon Salvatore's (Ian Somerhalder) blood in her system, turning her into
|
| 60 |
+
a vampire. The arrival of the original vampires, Elijah (Daniel Gillies) and Klaus
|
| 61 |
+
Mikaelson (Joseph Morgan), also bring about complications. Klaus is a vampire-werewolf
|
| 62 |
+
hybrid, but his werewolf side had been forced into dormancy by witches, as nature
|
| 63 |
+
would not stand for such an imbalance in power. Therefore, Klaus arrives in town
|
| 64 |
+
with plans to break the curse and unleash his werewolf side by channelling the
|
| 65 |
+
power of the full moon into the moonstone, sacrificing a vampire and a werewolf,
|
| 66 |
+
and drinking the blood of the doppelgänger. The season is currently on air in
|
| 67 |
+
Urdu on filmax channel in Pakistan. It became available on DVD and Blu-ray on
|
| 68 |
+
August 30, 2011.[4]
|
| 69 |
+
- source_sentence: most of the really good agricultural land in mexico is owned by
|
| 70 |
+
sentences:
|
| 71 |
+
- 'State of the art The origin of the concept of "state of the art" took place in
|
| 72 |
+
the beginning of the twentieth century.[3] The earliest use of the term "state
|
| 73 |
+
of the art" documented by the Oxford English Dictionary dates back to 1910, from
|
| 74 |
+
an engineering manual by Henry Harrison Suplee (1856-post 1943), an engineering
|
| 75 |
+
graduate (University of Pennsylvania, 1876), titled Gas Turbine: progress in the
|
| 76 |
+
design and construction of turbines operated by gases of combustion. The relevant
|
| 77 |
+
passage reads: "In the present state of the art this is all that can be done".
|
| 78 |
+
The term "art" refers to technics, rather than performing or fine arts.[4]'
|
| 79 |
+
- London sewerage system Joseph Bazalgette, a civil engineer and Chief Engineer
|
| 80 |
+
of the Metropolitan Board of Works, was given responsibility for the work. He
|
| 81 |
+
designed an extensive underground sewerage system that diverted waste to the Thames
|
| 82 |
+
Estuary, downstream of the main centre of population. Six main interceptor sewers,
|
| 83 |
+
totalling almost 160 km (100 miles) in length, were constructed, some incorporating
|
| 84 |
+
stretches of London's "lost" rivers. Three of these sewers were north of the river,
|
| 85 |
+
the southernmost, low-level one being incorporated in the Thames Embankment. The
|
| 86 |
+
Embankment also allowed new roads, new public gardens, and the Circle line of
|
| 87 |
+
the London Underground. Victoria Embankment was finally officially opened on 13
|
| 88 |
+
July 1870.[3][4]
|
| 89 |
+
- Agriculture in Mexico During the early colonial period, the Spanish introduced
|
| 90 |
+
more plants and the concept of domesticated animals, principally cattle, horses,
|
| 91 |
+
donkeys, mules, goats and sheep, and barn yard animals such as chickens and pigs.
|
| 92 |
+
Farming from the colonial period until the Mexican Revolution was focused on large
|
| 93 |
+
private properties. After the Revolution these were broken up and the land redistributed.
|
| 94 |
+
Since the latter 20th century NAFTA and economic policies have again favored large
|
| 95 |
+
scale commercial agricultural holdings.
|
| 96 |
+
- source_sentence: who is the person who plays black panther
|
| 97 |
+
sentences:
|
| 98 |
+
- United States Capitol The United States Capitol, often called the Capitol Building,
|
| 99 |
+
is the home of the United States Congress, and the seat of the legislative branch
|
| 100 |
+
of the U.S. federal government. It is located on Capitol Hill at the eastern end
|
| 101 |
+
of the National Mall in Washington, D.C. Though not at the geographic center of
|
| 102 |
+
the Federal District, the Capitol forms the origin point for the District's street-numbering
|
| 103 |
+
system and the District's four quadrants.
|
| 104 |
+
- Supreme Court of the United States The Supreme Court of the United States is the
|
| 105 |
+
highest federal court of the United States. Established pursuant to Article Three
|
| 106 |
+
of the United States Constitution in 1789, it has ultimate (and largely discretionary)
|
| 107 |
+
appellate jurisdiction over all federal courts and state court cases involving
|
| 108 |
+
issues of federal law plus original jurisdiction over a small range of cases.
|
| 109 |
+
In the legal system of the United States, the Supreme Court is generally the final
|
| 110 |
+
interpreter of federal law including the United States Constitution, but it may
|
| 111 |
+
act only within the context of a case in which it has jurisdiction. The Court
|
| 112 |
+
may decide cases having political overtones but does not have power to decide
|
| 113 |
+
nonjusticiable political questions, and its enforcement arm is in the executive
|
| 114 |
+
rather than judicial branch of government.
|
| 115 |
+
- Chadwick Boseman Chadwick Aaron Boseman[1] (born November 29, 1977)[2][3] is an
|
| 116 |
+
American actor. He is known for portraying Jackie Robinson in 42 (2013), James
|
| 117 |
+
Brown in Get on Up (2014), Black Panther in the Marvel Cinematic Universe (since
|
| 118 |
+
2016), and Thurgood Marshall in Marshall (2017). He also had roles in the television
|
| 119 |
+
series Lincoln Heights (2008) and Persons Unknown (2010), and the films The Express
|
| 120 |
+
(2008), Draft Day (2014), and Message from the King (2016).
|
| 121 |
+
- source_sentence: can you find a pearl in a mussel
|
| 122 |
+
sentences:
|
| 123 |
+
- Freshwater pearl mussel Although the name "freshwater pearl mussel" is often used
|
| 124 |
+
for this species, other freshwater mussel species can also create pearls and some
|
| 125 |
+
can also be used as a source of mother of pearl. In fact, most cultured pearls
|
| 126 |
+
today come from Hyriopsis species in Asia, or Amblema species in North America,
|
| 127 |
+
both members of the related family Unionidae; pearls are also found within species
|
| 128 |
+
in the genus Unio.
|
| 129 |
+
- Ellis Island Generally, those immigrants who were approved spent from two to five
|
| 130 |
+
hours at Ellis Island. Arrivals were asked 29 questions including name, occupation,
|
| 131 |
+
and the amount of money carried. It was important to the American government that
|
| 132 |
+
the new arrivals could support themselves and have money to get started. The average
|
| 133 |
+
the government wanted the immigrants to have was between 18 and 25 dollars ($600
|
| 134 |
+
in 2015 adjusted for inflation). Those with visible health problems or diseases
|
| 135 |
+
were sent home or held in the island's hospital facilities for long periods of
|
| 136 |
+
time. More than 3,000 would-be immigrants died on Ellis Island while being held
|
| 137 |
+
in the hospital facilities. Some unskilled workers were rejected because they
|
| 138 |
+
were considered "likely to become a public charge." About 2% were denied admission
|
| 139 |
+
to the U.S. and sent back to their countries of origin for reasons such as having
|
| 140 |
+
a chronic contagious disease, criminal background, or insanity.[43] Ellis Island
|
| 141 |
+
was sometimes known as "The Island of Tears" or "Heartbreak Island"[44] because
|
| 142 |
+
of those 2% who were not admitted after the long transatlantic voyage. The Kissing
|
| 143 |
+
Post is a wooden column outside the Registry Room, where new arrivals were greeted
|
| 144 |
+
by their relatives and friends, typically with tears, hugs, and kisses.[45][46]
|
| 145 |
+
- Glee (season 1) The first season of the musical comedy-drama television series
|
| 146 |
+
Glee originally aired on Fox in the United States. The pilot episode was broadcast
|
| 147 |
+
as an advanced preview of the series on May 19, 2009, with the remainder of the
|
| 148 |
+
season airing between September 9, 2009 and June 8, 2010. The season consisted
|
| 149 |
+
of 22 episodes; the first 13 aired on Wednesdays at 9 pm (ET) and the final 9
|
| 150 |
+
aired on Tuesdays at 9 pm (ET). The season was executive produced by Ryan Murphy,
|
| 151 |
+
Brad Falchuk, and Dante Di Loreto; Murphy's production company helped co-produce
|
| 152 |
+
the series alongside 20th Century Fox.
|
| 153 |
+
datasets:
|
| 154 |
+
- sentence-transformers/natural-questions
|
| 155 |
+
pipeline_tag: sentence-similarity
|
| 156 |
+
library_name: sentence-transformers
|
| 157 |
+
metrics:
|
| 158 |
+
- cosine_accuracy@1
|
| 159 |
+
- cosine_accuracy@3
|
| 160 |
+
- cosine_accuracy@5
|
| 161 |
+
- cosine_accuracy@10
|
| 162 |
+
- cosine_precision@1
|
| 163 |
+
- cosine_precision@3
|
| 164 |
+
- cosine_precision@5
|
| 165 |
+
- cosine_precision@10
|
| 166 |
+
- cosine_recall@1
|
| 167 |
+
- cosine_recall@3
|
| 168 |
+
- cosine_recall@5
|
| 169 |
+
- cosine_recall@10
|
| 170 |
+
- cosine_ndcg@10
|
| 171 |
+
- cosine_mrr@10
|
| 172 |
+
- cosine_map@100
|
| 173 |
+
model-index:
|
| 174 |
+
- name: bge-large-en-v1.5
|
| 175 |
+
results:
|
| 176 |
+
- task:
|
| 177 |
+
type: information-retrieval
|
| 178 |
+
name: Information Retrieval
|
| 179 |
+
dataset:
|
| 180 |
+
name: NanoQuoraRetrieval
|
| 181 |
+
type: NanoQuoraRetrieval
|
| 182 |
+
metrics:
|
| 183 |
+
- type: cosine_accuracy@1
|
| 184 |
+
value: 0.88
|
| 185 |
+
name: Cosine Accuracy@1
|
| 186 |
+
- type: cosine_accuracy@3
|
| 187 |
+
value: 0.98
|
| 188 |
+
name: Cosine Accuracy@3
|
| 189 |
+
- type: cosine_accuracy@5
|
| 190 |
+
value: 0.98
|
| 191 |
+
name: Cosine Accuracy@5
|
| 192 |
+
- type: cosine_accuracy@10
|
| 193 |
+
value: 1.0
|
| 194 |
+
name: Cosine Accuracy@10
|
| 195 |
+
- type: cosine_precision@1
|
| 196 |
+
value: 0.88
|
| 197 |
+
name: Cosine Precision@1
|
| 198 |
+
- type: cosine_precision@3
|
| 199 |
+
value: 0.4133333333333333
|
| 200 |
+
name: Cosine Precision@3
|
| 201 |
+
- type: cosine_precision@5
|
| 202 |
+
value: 0.25199999999999995
|
| 203 |
+
name: Cosine Precision@5
|
| 204 |
+
- type: cosine_precision@10
|
| 205 |
+
value: 0.13999999999999999
|
| 206 |
+
name: Cosine Precision@10
|
| 207 |
+
- type: cosine_recall@1
|
| 208 |
+
value: 0.7673333333333332
|
| 209 |
+
name: Cosine Recall@1
|
| 210 |
+
- type: cosine_recall@3
|
| 211 |
+
value: 0.9520000000000001
|
| 212 |
+
name: Cosine Recall@3
|
| 213 |
+
- type: cosine_recall@5
|
| 214 |
+
value: 0.9553333333333334
|
| 215 |
+
name: Cosine Recall@5
|
| 216 |
+
- type: cosine_recall@10
|
| 217 |
+
value: 1.0
|
| 218 |
+
name: Cosine Recall@10
|
| 219 |
+
- type: cosine_ndcg@10
|
| 220 |
+
value: 0.9435612217207588
|
| 221 |
+
name: Cosine Ndcg@10
|
| 222 |
+
- type: cosine_mrr@10
|
| 223 |
+
value: 0.9295238095238095
|
| 224 |
+
name: Cosine Mrr@10
|
| 225 |
+
- type: cosine_map@100
|
| 226 |
+
value: 0.919404761904762
|
| 227 |
+
name: Cosine Map@100
|
| 228 |
+
- type: cosine_accuracy@1
|
| 229 |
+
value: 0.88
|
| 230 |
+
name: Cosine Accuracy@1
|
| 231 |
+
- type: cosine_accuracy@3
|
| 232 |
+
value: 0.98
|
| 233 |
+
name: Cosine Accuracy@3
|
| 234 |
+
- type: cosine_accuracy@5
|
| 235 |
+
value: 0.98
|
| 236 |
+
name: Cosine Accuracy@5
|
| 237 |
+
- type: cosine_accuracy@10
|
| 238 |
+
value: 1.0
|
| 239 |
+
name: Cosine Accuracy@10
|
| 240 |
+
- type: cosine_precision@1
|
| 241 |
+
value: 0.88
|
| 242 |
+
name: Cosine Precision@1
|
| 243 |
+
- type: cosine_precision@3
|
| 244 |
+
value: 0.4133333333333333
|
| 245 |
+
name: Cosine Precision@3
|
| 246 |
+
- type: cosine_precision@5
|
| 247 |
+
value: 0.25199999999999995
|
| 248 |
+
name: Cosine Precision@5
|
| 249 |
+
- type: cosine_precision@10
|
| 250 |
+
value: 0.13999999999999999
|
| 251 |
+
name: Cosine Precision@10
|
| 252 |
+
- type: cosine_recall@1
|
| 253 |
+
value: 0.7673333333333332
|
| 254 |
+
name: Cosine Recall@1
|
| 255 |
+
- type: cosine_recall@3
|
| 256 |
+
value: 0.9520000000000001
|
| 257 |
+
name: Cosine Recall@3
|
| 258 |
+
- type: cosine_recall@5
|
| 259 |
+
value: 0.9553333333333334
|
| 260 |
+
name: Cosine Recall@5
|
| 261 |
+
- type: cosine_recall@10
|
| 262 |
+
value: 1.0
|
| 263 |
+
name: Cosine Recall@10
|
| 264 |
+
- type: cosine_ndcg@10
|
| 265 |
+
value: 0.9435612217207588
|
| 266 |
+
name: Cosine Ndcg@10
|
| 267 |
+
- type: cosine_mrr@10
|
| 268 |
+
value: 0.9295238095238095
|
| 269 |
+
name: Cosine Mrr@10
|
| 270 |
+
- type: cosine_map@100
|
| 271 |
+
value: 0.919404761904762
|
| 272 |
+
name: Cosine Map@100
|
| 273 |
+
---
|
| 274 |
+
|
| 275 |
+
# bge-large-en-v1.5
|
| 276 |
+
|
| 277 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) on the [natural-questions](https://huggingface.co/datasets/sentence-transformers/natural-questions) dataset. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 278 |
+
|
| 279 |
+
## Model Details
|
| 280 |
+
|
| 281 |
+
### Model Description
|
| 282 |
+
- **Model Type:** Sentence Transformer
|
| 283 |
+
- **Base model:** [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) <!-- at revision d4aa6901d3a41ba39fb536a557fa166f842b0e09 -->
|
| 284 |
+
- **Maximum Sequence Length:** 512 tokens
|
| 285 |
+
- **Output Dimensionality:** 1024 dimensions
|
| 286 |
+
- **Similarity Function:** Cosine Similarity
|
| 287 |
+
- **Training Dataset:**
|
| 288 |
+
- [natural-questions](https://huggingface.co/datasets/sentence-transformers/natural-questions)
|
| 289 |
+
- **Language:** en
|
| 290 |
+
- **License:** mit
|
| 291 |
+
|
| 292 |
+
### Model Sources
|
| 293 |
+
|
| 294 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 295 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 296 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 297 |
+
|
| 298 |
+
### Full Model Architecture
|
| 299 |
+
|
| 300 |
+
```
|
| 301 |
+
SentenceTransformer(
|
| 302 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': True, 'architecture': 'BertModel'})
|
| 303 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 304 |
+
(2): Normalize()
|
| 305 |
+
)
|
| 306 |
+
```
|
| 307 |
+
|
| 308 |
+
## Usage
|
| 309 |
+
|
| 310 |
+
### Direct Usage (Sentence Transformers)
|
| 311 |
+
|
| 312 |
+
First install the Sentence Transformers library:
|
| 313 |
+
|
| 314 |
+
```bash
|
| 315 |
+
pip install -U sentence-transformers
|
| 316 |
+
```
|
| 317 |
+
|
| 318 |
+
Then you can load this model and run inference.
|
| 319 |
+
```python
|
| 320 |
+
from sentence_transformers import SentenceTransformer
|
| 321 |
+
|
| 322 |
+
# Download from the 🤗 Hub
|
| 323 |
+
model = SentenceTransformer("DannyAI/embedding_fine_tuning_with_peft_bge_large_en_v1.5")
|
| 324 |
+
# Run inference
|
| 325 |
+
queries = [
|
| 326 |
+
"can you find a pearl in a mussel",
|
| 327 |
+
]
|
| 328 |
+
documents = [
|
| 329 |
+
'Freshwater pearl mussel Although the name "freshwater pearl mussel" is often used for this species, other freshwater mussel species can also create pearls and some can also be used as a source of mother of pearl. In fact, most cultured pearls today come from Hyriopsis species in Asia, or Amblema species in North America, both members of the related family Unionidae; pearls are also found within species in the genus Unio.',
|
| 330 |
+
'Ellis Island Generally, those immigrants who were approved spent from two to five hours at Ellis Island. Arrivals were asked 29 questions including name, occupation, and the amount of money carried. It was important to the American government that the new arrivals could support themselves and have money to get started. The average the government wanted the immigrants to have was between 18 and 25 dollars ($600 in 2015 adjusted for inflation). Those with visible health problems or diseases were sent home or held in the island\'s hospital facilities for long periods of time. More than 3,000 would-be immigrants died on Ellis Island while being held in the hospital facilities. Some unskilled workers were rejected because they were considered "likely to become a public charge." About 2% were denied admission to the U.S. and sent back to their countries of origin for reasons such as having a chronic contagious disease, criminal background, or insanity.[43] Ellis Island was sometimes known as "The Island of Tears" or "Heartbreak Island"[44] because of those 2% who were not admitted after the long transatlantic voyage. The Kissing Post is a wooden column outside the Registry Room, where new arrivals were greeted by their relatives and friends, typically with tears, hugs, and kisses.[45][46]',
|
| 331 |
+
"Glee (season 1) The first season of the musical comedy-drama television series Glee originally aired on Fox in the United States. The pilot episode was broadcast as an advanced preview of the series on May 19, 2009, with the remainder of the season airing between September 9, 2009 and June 8, 2010. The season consisted of 22 episodes; the first 13 aired on Wednesdays at 9\xa0pm (ET) and the final 9 aired on Tuesdays at 9\xa0pm (ET). The season was executive produced by Ryan Murphy, Brad Falchuk, and Dante Di Loreto; Murphy's production company helped co-produce the series alongside 20th Century Fox.",
|
| 332 |
+
]
|
| 333 |
+
query_embeddings = model.encode_query(queries)
|
| 334 |
+
document_embeddings = model.encode_document(documents)
|
| 335 |
+
print(query_embeddings.shape, document_embeddings.shape)
|
| 336 |
+
# [1, 1024] [3, 1024]
|
| 337 |
+
|
| 338 |
+
# Get the similarity scores for the embeddings
|
| 339 |
+
similarities = model.similarity(query_embeddings, document_embeddings)
|
| 340 |
+
print(similarities)
|
| 341 |
+
# tensor([[0.7103, 0.3918, 0.2758]])
|
| 342 |
+
```
|
| 343 |
+
|
| 344 |
+
<!--
|
| 345 |
+
### Direct Usage (Transformers)
|
| 346 |
+
|
| 347 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 348 |
+
|
| 349 |
+
</details>
|
| 350 |
+
-->
|
| 351 |
+
|
| 352 |
+
<!--
|
| 353 |
+
### Downstream Usage (Sentence Transformers)
|
| 354 |
+
|
| 355 |
+
You can finetune this model on your own dataset.
|
| 356 |
+
|
| 357 |
+
<details><summary>Click to expand</summary>
|
| 358 |
+
|
| 359 |
+
</details>
|
| 360 |
+
-->
|
| 361 |
+
|
| 362 |
+
<!--
|
| 363 |
+
### Out-of-Scope Use
|
| 364 |
+
|
| 365 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 366 |
+
-->
|
| 367 |
+
|
| 368 |
+
## Evaluation
|
| 369 |
+
|
| 370 |
+
### Metrics
|
| 371 |
+
|
| 372 |
+
#### Information Retrieval
|
| 373 |
+
|
| 374 |
+
* Dataset: `NanoQuoraRetrieval`
|
| 375 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
|
| 376 |
+
```json
|
| 377 |
+
{
|
| 378 |
+
"query_prompt": "query: ",
|
| 379 |
+
"corpus_prompt": "document: "
|
| 380 |
+
}
|
| 381 |
+
```
|
| 382 |
+
|
| 383 |
+
| Metric | Value |
|
| 384 |
+
|:--------------------|:-----------|
|
| 385 |
+
| cosine_accuracy@1 | 0.88 |
|
| 386 |
+
| cosine_accuracy@3 | 0.98 |
|
| 387 |
+
| cosine_accuracy@5 | 0.98 |
|
| 388 |
+
| cosine_accuracy@10 | 1.0 |
|
| 389 |
+
| cosine_precision@1 | 0.88 |
|
| 390 |
+
| cosine_precision@3 | 0.4133 |
|
| 391 |
+
| cosine_precision@5 | 0.252 |
|
| 392 |
+
| cosine_precision@10 | 0.14 |
|
| 393 |
+
| cosine_recall@1 | 0.7673 |
|
| 394 |
+
| cosine_recall@3 | 0.952 |
|
| 395 |
+
| cosine_recall@5 | 0.9553 |
|
| 396 |
+
| cosine_recall@10 | 1.0 |
|
| 397 |
+
| **cosine_ndcg@10** | **0.9436** |
|
| 398 |
+
| cosine_mrr@10 | 0.9295 |
|
| 399 |
+
| cosine_map@100 | 0.9194 |
|
| 400 |
+
|
| 401 |
+
#### Information Retrieval
|
| 402 |
+
|
| 403 |
+
* Dataset: `NanoQuoraRetrieval`
|
| 404 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
|
| 405 |
+
```json
|
| 406 |
+
{
|
| 407 |
+
"query_prompt": "query: ",
|
| 408 |
+
"corpus_prompt": "document: "
|
| 409 |
+
}
|
| 410 |
+
```
|
| 411 |
+
|
| 412 |
+
| Metric | Value |
|
| 413 |
+
|:--------------------|:-----------|
|
| 414 |
+
| cosine_accuracy@1 | 0.88 |
|
| 415 |
+
| cosine_accuracy@3 | 0.98 |
|
| 416 |
+
| cosine_accuracy@5 | 0.98 |
|
| 417 |
+
| cosine_accuracy@10 | 1.0 |
|
| 418 |
+
| cosine_precision@1 | 0.88 |
|
| 419 |
+
| cosine_precision@3 | 0.4133 |
|
| 420 |
+
| cosine_precision@5 | 0.252 |
|
| 421 |
+
| cosine_precision@10 | 0.14 |
|
| 422 |
+
| cosine_recall@1 | 0.7673 |
|
| 423 |
+
| cosine_recall@3 | 0.952 |
|
| 424 |
+
| cosine_recall@5 | 0.9553 |
|
| 425 |
+
| cosine_recall@10 | 1.0 |
|
| 426 |
+
| **cosine_ndcg@10** | **0.9436** |
|
| 427 |
+
| cosine_mrr@10 | 0.9295 |
|
| 428 |
+
| cosine_map@100 | 0.9194 |
|
| 429 |
+
|
| 430 |
+
<!--
|
| 431 |
+
## Bias, Risks and Limitations
|
| 432 |
+
|
| 433 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 434 |
+
-->
|
| 435 |
+
|
| 436 |
+
<!--
|
| 437 |
+
### Recommendations
|
| 438 |
+
|
| 439 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 440 |
+
-->
|
| 441 |
+
|
| 442 |
+
## Training Details
|
| 443 |
+
|
| 444 |
+
### Training Dataset
|
| 445 |
+
|
| 446 |
+
#### natural-questions
|
| 447 |
+
|
| 448 |
+
* Dataset: [natural-questions](https://huggingface.co/datasets/sentence-transformers/natural-questions) at [f9e894e](https://huggingface.co/datasets/sentence-transformers/natural-questions/tree/f9e894e1081e206e577b4eaa9ee6de2b06ae6f17)
|
| 449 |
+
* Size: 80,184 training samples
|
| 450 |
+
* Columns: <code>query</code> and <code>answer</code>
|
| 451 |
+
* Approximate statistics based on the first 1000 samples:
|
| 452 |
+
| | query | answer |
|
| 453 |
+
|:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 454 |
+
| type | string | string |
|
| 455 |
+
| details | <ul><li>min: 10 tokens</li><li>mean: 11.72 tokens</li><li>max: 24 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 132.91 tokens</li><li>max: 512 tokens</li></ul> |
|
| 456 |
+
* Samples:
|
| 457 |
+
| query | answer |
|
| 458 |
+
|:--------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 459 |
+
| <code>who wrote i came in like a wrecking ball</code> | <code>Wrecking Ball (Miley Cyrus song) "Wrecking Ball" is a song recorded by American singer Miley Cyrus for her fourth studio album Bangerz (2013). It was released on August 25, 2013, by RCA Records as the album's second single. The song was written by MoZella, Stephan Moccio, Sacha Skarbek, Kiyanu Kim,[2] Lukasz Gottwald, and Henry Russell Walter;[3] production was helmed by the last two. "Wrecking Ball" is a pop ballad which lyrically discusses the deterioration of a relationship.</code> |
|
| 460 |
+
| <code>what was the purpose of the three-field system</code> | <code>Three-field system The three-field system is a regime of crop rotation that was used in medieval and early-modern Europe. Crop rotation is the practice of growing a series of different types of crops in the same area in sequential seasons. Under this system, the arable land of an estate or village was divided into three large fields: one was planted in the autumn with winter wheat or rye; the second field was planted with other crops such as peas, lentils, or beans; and the third was left fallow, in order to allow the soil of that field to regain its nutrients. With each rotation, the field would be used differently, so that a field would be planted for two out of the three years used, whilst one year it "rested". Previously a "two field system" had been in place, with half the land being left fallow. The three field system allowed farmers to plant more crops and therefore to increase production and legumes have the ability to fix nitrogen and so fertilize the soil. With more crops ava...</code> |
|
| 461 |
+
| <code>who is the main person in the legislative branch</code> | <code>Article One of the United States Constitution Section 1 is a vesting clause that bestows federal legislative power exclusively to Congress. Similar clauses are found in Articles II and III. The former confers executive power upon the President alone, and the latter grants judicial power solely to the federal judiciary. These three articles create a separation of powers among the three branches of the federal government. This separation of powers, by which each department may exercise only its own constitutional powers and no others,[1][2] is fundamental to the idea of a limited government accountable to the people.</code> |
|
| 462 |
+
* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
|
| 463 |
+
```json
|
| 464 |
+
{
|
| 465 |
+
"scale": 20.0,
|
| 466 |
+
"similarity_fct": "cos_sim",
|
| 467 |
+
"mini_batch_size": 16,
|
| 468 |
+
"gather_across_devices": false
|
| 469 |
+
}
|
| 470 |
+
```
|
| 471 |
+
|
| 472 |
+
### Evaluation Dataset
|
| 473 |
+
|
| 474 |
+
#### natural-questions
|
| 475 |
+
|
| 476 |
+
* Dataset: [natural-questions](https://huggingface.co/datasets/sentence-transformers/natural-questions) at [f9e894e](https://huggingface.co/datasets/sentence-transformers/natural-questions/tree/f9e894e1081e206e577b4eaa9ee6de2b06ae6f17)
|
| 477 |
+
* Size: 20,047 evaluation samples
|
| 478 |
+
* Columns: <code>query</code> and <code>answer</code>
|
| 479 |
+
* Approximate statistics based on the first 1000 samples:
|
| 480 |
+
| | query | answer |
|
| 481 |
+
|:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
| 482 |
+
| type | string | string |
|
| 483 |
+
| details | <ul><li>min: 10 tokens</li><li>mean: 11.79 tokens</li><li>max: 25 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 135.48 tokens</li><li>max: 512 tokens</li></ul> |
|
| 484 |
+
* Samples:
|
| 485 |
+
| query | answer |
|
| 486 |
+
|:--------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 487 |
+
| <code>when did call of duty ww2 come out</code> | <code>Call of Duty: WWII Call of Duty: WWII is a first-person shooter video game developed by Sledgehammer Games and published by Activision. It is the fourteenth main installment in the Call of Duty series and was released worldwide on November 3, 2017 for Microsoft Windows, PlayStation 4 and Xbox One. It is the first title in the series to be set primarily during World War II since Call of Duty: World at War in 2008.[2] The game is set in the European theatre, and is centered around a squad in the 1st Infantry Division, following their battles on the Western Front, and set mainly in the historical events of Operation Overlord; the multiplayer expands to different fronts not seen in the campaign.</code> |
|
| 488 |
+
| <code>who is doing the half time super bowl</code> | <code>Super Bowl LII halftime show The Super Bowl LII Halftime Show (officially known as the Pepsi Super Bowl LII Halftime Show) took place on February 4, 2018 at U.S. Bank Stadium in Minneapolis, Minnesota, as part of Super Bowl LII. Justin Timberlake was the featured performer, as confirmed by the National Football League (NFL) on October 22, 2017.[1] It was televised nationally by NBC.</code> |
|
| 489 |
+
| <code>when was the sewage system built in london</code> | <code>London sewerage system Joseph Bazalgette, a civil engineer and Chief Engineer of the Metropolitan Board of Works, was given responsibility for the work. He designed an extensive underground sewerage system that diverted waste to the Thames Estuary, downstream of the main centre of population. Six main interceptor sewers, totalling almost 160 km (100 miles) in length, were constructed, some incorporating stretches of London's "lost" rivers. Three of these sewers were north of the river, the southernmost, low-level one being incorporated in the Thames Embankment. The Embankment also allowed new roads, new public gardens, and the Circle line of the London Underground. Victoria Embankment was finally officially opened on 13 July 1870.[3][4]</code> |
|
| 490 |
+
* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
|
| 491 |
+
```json
|
| 492 |
+
{
|
| 493 |
+
"scale": 20.0,
|
| 494 |
+
"similarity_fct": "cos_sim",
|
| 495 |
+
"mini_batch_size": 16,
|
| 496 |
+
"gather_across_devices": false
|
| 497 |
+
}
|
| 498 |
+
```
|
| 499 |
+
|
| 500 |
+
### Training Hyperparameters
|
| 501 |
+
#### Non-Default Hyperparameters
|
| 502 |
+
|
| 503 |
+
- `eval_strategy`: steps
|
| 504 |
+
- `per_device_train_batch_size`: 5
|
| 505 |
+
- `per_device_eval_batch_size`: 5
|
| 506 |
+
- `learning_rate`: 2e-05
|
| 507 |
+
- `max_steps`: 100
|
| 508 |
+
- `warmup_ratio`: 0.1
|
| 509 |
+
- `seed`: 30
|
| 510 |
+
- `bf16`: True
|
| 511 |
+
- `load_best_model_at_end`: True
|
| 512 |
+
- `prompts`: {'query': 'query: ', 'answer': 'document: '}
|
| 513 |
+
- `batch_sampler`: no_duplicates
|
| 514 |
+
|
| 515 |
+
#### All Hyperparameters
|
| 516 |
+
<details><summary>Click to expand</summary>
|
| 517 |
+
|
| 518 |
+
- `overwrite_output_dir`: False
|
| 519 |
+
- `do_predict`: False
|
| 520 |
+
- `eval_strategy`: steps
|
| 521 |
+
- `prediction_loss_only`: True
|
| 522 |
+
- `per_device_train_batch_size`: 5
|
| 523 |
+
- `per_device_eval_batch_size`: 5
|
| 524 |
+
- `per_gpu_train_batch_size`: None
|
| 525 |
+
- `per_gpu_eval_batch_size`: None
|
| 526 |
+
- `gradient_accumulation_steps`: 1
|
| 527 |
+
- `eval_accumulation_steps`: None
|
| 528 |
+
- `torch_empty_cache_steps`: None
|
| 529 |
+
- `learning_rate`: 2e-05
|
| 530 |
+
- `weight_decay`: 0.0
|
| 531 |
+
- `adam_beta1`: 0.9
|
| 532 |
+
- `adam_beta2`: 0.999
|
| 533 |
+
- `adam_epsilon`: 1e-08
|
| 534 |
+
- `max_grad_norm`: 1.0
|
| 535 |
+
- `num_train_epochs`: 3.0
|
| 536 |
+
- `max_steps`: 100
|
| 537 |
+
- `lr_scheduler_type`: linear
|
| 538 |
+
- `lr_scheduler_kwargs`: {}
|
| 539 |
+
- `warmup_ratio`: 0.1
|
| 540 |
+
- `warmup_steps`: 0
|
| 541 |
+
- `log_level`: passive
|
| 542 |
+
- `log_level_replica`: warning
|
| 543 |
+
- `log_on_each_node`: True
|
| 544 |
+
- `logging_nan_inf_filter`: True
|
| 545 |
+
- `save_safetensors`: True
|
| 546 |
+
- `save_on_each_node`: False
|
| 547 |
+
- `save_only_model`: False
|
| 548 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 549 |
+
- `no_cuda`: False
|
| 550 |
+
- `use_cpu`: False
|
| 551 |
+
- `use_mps_device`: False
|
| 552 |
+
- `seed`: 30
|
| 553 |
+
- `data_seed`: None
|
| 554 |
+
- `jit_mode_eval`: False
|
| 555 |
+
- `use_ipex`: False
|
| 556 |
+
- `bf16`: True
|
| 557 |
+
- `fp16`: False
|
| 558 |
+
- `fp16_opt_level`: O1
|
| 559 |
+
- `half_precision_backend`: auto
|
| 560 |
+
- `bf16_full_eval`: False
|
| 561 |
+
- `fp16_full_eval`: False
|
| 562 |
+
- `tf32`: None
|
| 563 |
+
- `local_rank`: 0
|
| 564 |
+
- `ddp_backend`: None
|
| 565 |
+
- `tpu_num_cores`: None
|
| 566 |
+
- `tpu_metrics_debug`: False
|
| 567 |
+
- `debug`: []
|
| 568 |
+
- `dataloader_drop_last`: False
|
| 569 |
+
- `dataloader_num_workers`: 0
|
| 570 |
+
- `dataloader_prefetch_factor`: None
|
| 571 |
+
- `past_index`: -1
|
| 572 |
+
- `disable_tqdm`: False
|
| 573 |
+
- `remove_unused_columns`: True
|
| 574 |
+
- `label_names`: None
|
| 575 |
+
- `load_best_model_at_end`: True
|
| 576 |
+
- `ignore_data_skip`: False
|
| 577 |
+
- `fsdp`: []
|
| 578 |
+
- `fsdp_min_num_params`: 0
|
| 579 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 580 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 581 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 582 |
+
- `parallelism_config`: None
|
| 583 |
+
- `deepspeed`: None
|
| 584 |
+
- `label_smoothing_factor`: 0.0
|
| 585 |
+
- `optim`: adamw_torch_fused
|
| 586 |
+
- `optim_args`: None
|
| 587 |
+
- `adafactor`: False
|
| 588 |
+
- `group_by_length`: False
|
| 589 |
+
- `length_column_name`: length
|
| 590 |
+
- `ddp_find_unused_parameters`: None
|
| 591 |
+
- `ddp_bucket_cap_mb`: None
|
| 592 |
+
- `ddp_broadcast_buffers`: False
|
| 593 |
+
- `dataloader_pin_memory`: True
|
| 594 |
+
- `dataloader_persistent_workers`: False
|
| 595 |
+
- `skip_memory_metrics`: True
|
| 596 |
+
- `use_legacy_prediction_loop`: False
|
| 597 |
+
- `push_to_hub`: False
|
| 598 |
+
- `resume_from_checkpoint`: None
|
| 599 |
+
- `hub_model_id`: None
|
| 600 |
+
- `hub_strategy`: every_save
|
| 601 |
+
- `hub_private_repo`: None
|
| 602 |
+
- `hub_always_push`: False
|
| 603 |
+
- `hub_revision`: None
|
| 604 |
+
- `gradient_checkpointing`: False
|
| 605 |
+
- `gradient_checkpointing_kwargs`: None
|
| 606 |
+
- `include_inputs_for_metrics`: False
|
| 607 |
+
- `include_for_metrics`: []
|
| 608 |
+
- `eval_do_concat_batches`: True
|
| 609 |
+
- `fp16_backend`: auto
|
| 610 |
+
- `push_to_hub_model_id`: None
|
| 611 |
+
- `push_to_hub_organization`: None
|
| 612 |
+
- `mp_parameters`:
|
| 613 |
+
- `auto_find_batch_size`: False
|
| 614 |
+
- `full_determinism`: False
|
| 615 |
+
- `torchdynamo`: None
|
| 616 |
+
- `ray_scope`: last
|
| 617 |
+
- `ddp_timeout`: 1800
|
| 618 |
+
- `torch_compile`: False
|
| 619 |
+
- `torch_compile_backend`: None
|
| 620 |
+
- `torch_compile_mode`: None
|
| 621 |
+
- `include_tokens_per_second`: False
|
| 622 |
+
- `include_num_input_tokens_seen`: False
|
| 623 |
+
- `neftune_noise_alpha`: None
|
| 624 |
+
- `optim_target_modules`: None
|
| 625 |
+
- `batch_eval_metrics`: False
|
| 626 |
+
- `eval_on_start`: False
|
| 627 |
+
- `use_liger_kernel`: False
|
| 628 |
+
- `liger_kernel_config`: None
|
| 629 |
+
- `eval_use_gather_object`: False
|
| 630 |
+
- `average_tokens_across_devices`: False
|
| 631 |
+
- `prompts`: {'query': 'query: ', 'answer': 'document: '}
|
| 632 |
+
- `batch_sampler`: no_duplicates
|
| 633 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 634 |
+
- `router_mapping`: {}
|
| 635 |
+
- `learning_rate_mapping`: {}
|
| 636 |
+
|
| 637 |
+
</details>
|
| 638 |
+
|
| 639 |
+
### Training Logs
|
| 640 |
+
| Epoch | Step | Training Loss | Validation Loss | NanoQuoraRetrieval_cosine_ndcg@10 |
|
| 641 |
+
|:----------:|:-------:|:-------------:|:---------------:|:---------------------------------:|
|
| 642 |
+
| -1 | -1 | - | - | 0.9583 |
|
| 643 |
+
| **0.0062** | **100** | **0.0156** | **0.0067** | **0.9436** |
|
| 644 |
+
| -1 | -1 | - | - | 0.9436 |
|
| 645 |
+
|
| 646 |
+
* The bold row denotes the saved checkpoint.
|
| 647 |
+
|
| 648 |
+
### Framework Versions
|
| 649 |
+
- Python: 3.12.11
|
| 650 |
+
- Sentence Transformers: 5.1.0
|
| 651 |
+
- Transformers: 4.56.1
|
| 652 |
+
- PyTorch: 2.8.0+cu126
|
| 653 |
+
- Accelerate: 1.10.1
|
| 654 |
+
- Datasets: 4.0.0
|
| 655 |
+
- Tokenizers: 0.22.0
|
| 656 |
+
|
| 657 |
+
## Citation
|
| 658 |
+
|
| 659 |
+
### BibTeX
|
| 660 |
+
|
| 661 |
+
#### Sentence Transformers
|
| 662 |
+
```bibtex
|
| 663 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 664 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 665 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 666 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 667 |
+
month = "11",
|
| 668 |
+
year = "2019",
|
| 669 |
+
publisher = "Association for Computational Linguistics",
|
| 670 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 671 |
+
}
|
| 672 |
+
```
|
| 673 |
+
|
| 674 |
+
#### CachedMultipleNegativesRankingLoss
|
| 675 |
+
```bibtex
|
| 676 |
+
@misc{gao2021scaling,
|
| 677 |
+
title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
|
| 678 |
+
author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
|
| 679 |
+
year={2021},
|
| 680 |
+
eprint={2101.06983},
|
| 681 |
+
archivePrefix={arXiv},
|
| 682 |
+
primaryClass={cs.LG}
|
| 683 |
+
}
|
| 684 |
+
```
|
| 685 |
+
|
| 686 |
+
<!--
|
| 687 |
+
## Glossary
|
| 688 |
+
|
| 689 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 690 |
+
-->
|
| 691 |
+
|
| 692 |
+
<!--
|
| 693 |
+
## Model Card Authors
|
| 694 |
+
|
| 695 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 696 |
+
-->
|
| 697 |
+
|
| 698 |
+
<!--
|
| 699 |
+
## Model Card Contact
|
| 700 |
+
|
| 701 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 702 |
+
-->
|
adapter_config.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "BAAI/bge-large-en-v1.5",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
+
"fan_in_fan_out": false,
|
| 10 |
+
"inference_mode": false,
|
| 11 |
+
"init_lora_weights": true,
|
| 12 |
+
"layer_replication": null,
|
| 13 |
+
"layers_pattern": null,
|
| 14 |
+
"layers_to_transform": null,
|
| 15 |
+
"loftq_config": {},
|
| 16 |
+
"lora_alpha": 128,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
+
"lora_dropout": 0.1,
|
| 19 |
+
"megatron_config": null,
|
| 20 |
+
"megatron_core": "megatron.core",
|
| 21 |
+
"modules_to_save": null,
|
| 22 |
+
"peft_type": "LORA",
|
| 23 |
+
"qalora_group_size": 16,
|
| 24 |
+
"r": 64,
|
| 25 |
+
"rank_pattern": {},
|
| 26 |
+
"revision": null,
|
| 27 |
+
"target_modules": [
|
| 28 |
+
"query",
|
| 29 |
+
"value"
|
| 30 |
+
],
|
| 31 |
+
"target_parameters": null,
|
| 32 |
+
"task_type": "FEATURE_EXTRACTION",
|
| 33 |
+
"trainable_token_indices": null,
|
| 34 |
+
"use_dora": false,
|
| 35 |
+
"use_qalora": false,
|
| 36 |
+
"use_rslora": false
|
| 37 |
+
}
|
adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9e47ce461f4c937b1fd1b3b74632019771f4a16c99ee2e1992512526f4dce32
|
| 3 |
+
size 25179176
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.1.0",
|
| 4 |
+
"transformers": "4.56.1",
|
| 5 |
+
"pytorch": "2.8.0+cu126"
|
| 6 |
+
},
|
| 7 |
+
"model_type": "SentenceTransformer",
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 512,
|
| 3 |
+
"do_lower_case": true
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
"clean_up_tokenization_spaces": true,
|
| 45 |
+
"cls_token": "[CLS]",
|
| 46 |
+
"do_basic_tokenize": true,
|
| 47 |
+
"do_lower_case": true,
|
| 48 |
+
"extra_special_tokens": {},
|
| 49 |
+
"mask_token": "[MASK]",
|
| 50 |
+
"model_max_length": 512,
|
| 51 |
+
"never_split": null,
|
| 52 |
+
"pad_token": "[PAD]",
|
| 53 |
+
"sep_token": "[SEP]",
|
| 54 |
+
"strip_accents": null,
|
| 55 |
+
"tokenize_chinese_chars": true,
|
| 56 |
+
"tokenizer_class": "BertTokenizer",
|
| 57 |
+
"unk_token": "[UNK]"
|
| 58 |
+
}
|
vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|