Add new SentenceTransformer model

Browse files

Files changed (11) hide show

1_Pooling/config.json +10 -0
README.md +1016 -0
config.json +25 -0
config_sentence_transformers.json +10 -0
model.safetensors +3 -0
modules.json +20 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +63 -0
vocab.txt +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,1016 @@

+---
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:113450
+- loss:ContrastiveTensionLoss
+base_model: sucharush/e5_stem_finetuned
+widget:
+- source_sentence: 'Question: what channel is nj news 12 on fios, Answer: News 12
+    Networks The networks are carried by Altice''s Optimum systems throughout the
+    region (except for certain systems in Dutchess, Orange, and Putnam counties in
+    New York, and Litchfield Count in Connecticut); these systems were originally
+    owned by Dolan-owned Cablevision before being spun off to Altice. Since the sale,
+    News 12 networks are also available on most Charter Communications, Comcast, and
+    Service Electric systems in the Tri-State area (in the case of Charter, this is
+    part of a cross-provider agreement that also allows Charter''s 24-hour local news
+    channel, NY1, to be seen on Altice providers). However, none of the channels are
+    available on Verizon FiOS (which offers a competing FiOS 1 service) or satellite
+    providers DirecTV or Dish Network; this is referenced in the slogan used by the
+    networks, "Only on cable. Never on FiOS. Never on satellite." Although the seven
+    channels have individual and unique news operations, they share the same mission,
+    graphics, and music packages.'
+  sentences:
+  - 'Question: who won the hockey game between usa and russia, Answer: Miracle on
+    Ice For the first game in the medal round, the United States played the Soviets.
+    Finishing the first period tied at 2–2, and the Soviets leading 3–2 following
+    the second, the U.S. team scored two more goals to take their first lead during
+    the third and final period, winning the game 4–3 in a major upset against the
+    Cold War rival. Following the game, the U.S. went on to clinch the gold medal
+    by beating Finland in their final match of the tournament. Likewise, the Soviet
+    Union took the silver medal by beating Sweden.'
+  - 'Question: when did the vietnam war start for the us, Answer: Vietnam War Beginning
+    in 1950, American military advisors arrived in what was then French Indochina.[86][A
+    3] Most of the funding for the French war effort was provided by the U.S.[87]
+    The Việt Cộng, also known as Front national de libération du Sud-Viêt Nam or FNL
+    (the National Liberation Front), a South Vietnamese communist common front aided
+    by the North, fought a guerrilla war against anti-communist forces in the region,
+    while the People''s Army of Vietnam, also known as the North Vietnamese Army (NVA),
+    engaged in more conventional warfare, and had launched armed struggles from 1959
+    onward. U.S. involvement escalated in 1960 under President John F. Kennedy, with
+    troop levels gradually surging under the MAAG program from just under a thousand
+    in 1959 to 16,000 in 1963.[88][89]'
+  - 'Question: how many medals did gb win at the winter olympics, Answer: Great Britain
+    at the 2018 Winter Olympics Great Britain competed at the 2018 Winter Olympics
+    in Pyeongchang, South Korea, from 9 to 25 February 2018, with 58 competitors in
+    11 sports. They won five medals in total, one gold and four bronze, ranking 19th
+    in the medal table.'
+- source_sentence: 'Question: when did wet n wild close las vegas, Answer: Wet ''n
+    Wild (Las Vegas) The park was acquired by Universal Studios in October 1998, Ogden
+    Corp in March 1999, Alfa SmartParks in March 2000, and Palace Entertainment on
+    July 10, 2002.[8] The park closed in 2004.[9][10][8]'
+  sentences:
+  - 'Question: how many times has north america hosted the olympics, Answer: List
+    of Olympic Games host cities The Games have primarily been hosted in the continents
+    of Europe (32 editions) and North America (12 editions); seven Games have been
+    hosted in Asia and two have been hosted in Oceania. In 2010, Singapore became
+    Southeast Asia''s first Olympic host city for the inaugural Summer Youth Olympics,
+    while Rio de Janeiro became South America''s first Olympic host city with the
+    2016 Summer Olympics, followed by Buenos Aires with the 2018 Summer Youth Olympics.
+    The 2022 Summer Youth Olympics in Dakar will become the first-ever Games to be
+    held on the African continent. Other major geographic regions which have never
+    hosted the Olympics include the Middle East, the Indian subcontinent, Central
+    America and the Caribbean.'
+  - '[{''from'': ''system'', ''value'': ''You are an AI assistant. You will be given
+    a task. You must generate a detailed and long answer.''}, {''from'': ''human'',
+    ''value'': ''Answer this question: where does ground pepper come from?\nAnswer:''},
+    {''from'': ''gpt'', ''value'': ''Ground pepper, a widely used and popular spice
+    across the globe, comes from the dried berries of the Piper nigrum plant. Piper
+    nigrum is a perennial flowering vine, native to Southern India and Sri Lanka,
+    but now cultivated in various tropical regions such as Vietnam, Indonesia, Malaysia,
+    Brazil, and Madagascar. The plant bears small, green, unripe berries that turn
+    red as they ripen. The resulting ground pepper differs in color (black, green,
+    or white) depending on the time the berries are harvested and the processing method
+    employed.\n\nBlack ground pepper, which is the most common variety, is derived
+    from the unripe green berries of the Piper nigrum plant. These berries are harvested
+    and then blanched briefly in boiling water, a process that activates the enzyme
+    responsible for the oxidation of the outer skin, promoting its dark coloration.
+    The berries are then spread out in the sun or other drying methods to dry over
+    several days. The outer skin wrinkles, and the remaining seed dries out, developing
+    its characteristic black color and distinct pungent aroma. Once fully dried, the
+    berries, known as black peppercorns, are ground to produce black ground pepper.\n\nGreen
+    ground pepper is produced from fresh green berries that are harvested before they
+    ripen. Rather than being dried, these unripe berries are treated to preserve their
+    green color, either through freeze-drying or by soaking them in a saltwater or
+    vinegar solution. This process retains the green hue and a milder, herbaceous
+    flavor profile compared to black pepper. After preservation, green peppercorns
+    are ground into green ground pepper.\n\nWhite ground pepper, on the other hand,
+    comes from ripe red berries. The red berries are harvested at full maturity and
+    soaked in water for an extended period, a process that softens and loosens their
+    outer skin. The outer skin is then removed, leaving behind the inner seed, which
+    is pale in color. These seeds are then dried and ground to create white ground
+    pepper. This variety has a milder and slightly less pungent flavor compared to
+    black pepper.\n\nAll three varieties (black, green, and white) of ground pepper
+    are used in various cuisines worldwide, each imparting unique flavors to culinary
+    dishes. Today, ground pepper is not only a primary seasoning ingredient in countless
+    recipes but also a staple condiment on restaurant tables and in home kitchens.
+    As a testament to its universal appeal, ground pepper remains one of the most
+    traded commodities in the global spice market.''}]'
+  - '[{''from'': ''human'', ''value'': ''You are working on a project that involves
+    creating a function in Python to calculate the roots of a quadratic equation.
+    The function should take the coefficients of the quadratic equation as input and
+    return the roots as output. The quadratic equation is of the form ax^2 + bx +
+    c = 0.\n\nWrite a Python function to calculate the roots of a quadratic equation.''},
+    {''from'': ''gpt'', ''value'': ''```python\nimport cmath\n\ndef calculate_quadratic_roots(a,
+    b, c):\n    # Calculate the discriminant\n    discriminant = (b**2) - (4*a*c)\n\n    #
+    Calculate the two roots using the quadratic formula\n    root1 = (-b - cmath.sqrt(discriminant))
+    / (2 * a)\n    root2 = (-b + cmath.sqrt(discriminant)) / (2 * a)\n\n    return
+    root1, root2\n```\nThis Python function takes the coefficients a, b, and c as
+    input and calculates the roots of the quadratic equation using the quadratic formula.
+    The cmath module is used to handle complex roots if the discriminant is negative.
+    The function returns the two roots as output.''}]'
+- source_sentence: '[{''from'': ''human'', ''value'': ''How can we define a function
+    by a formula in a rigorous way, ensuring that the function is well-defined and
+    its domain is properly specified?''}, {''from'': ''gpt'', ''value'': "To define
+    a function by a formula in a rigorous way, we need to consider both the formula
+    itself and the domain of the function. Here''s a step-by-step approach:\n\n1.
+    **Formula Definition**:\n   - Define a set of expressions for rational functions
+    using a fixed variable, an alphabet for real constants, and a function assigning
+    values to constants.\n   - Each expression is a string in a specific alphabet,
+    including the variable, constants, and operators.\n\n2. **Natural Domain and Interpretation**:\n   -
+    Simultaneously define two functions:\n     - The natural domain function $D$ associates
+    each expression with a subset of real numbers, representing the domain where the
+    expression is well-defined.\n     - The interpretation function $I$ associates
+    each expression with a function from its natural domain to real numbers, representing
+    the value of the expression at each point in the domain.\n\n3. **Structural Induction**:\n   -
+    Define $D$ and $I$ inductively based on the structure of expressions:\n     -
+    For the variable, the natural domain is all real numbers, and the interpretation
+    is the identity function.\n     - For constants, the natural domain is all real
+    numbers, and the interpretation is the constant value.\n     - For sums, products,
+    and quotients, the natural domain and interpretation are defined based on the
+    natural domains and interpretations of the constituent expressions.\n\n4. **Syntactic
+    Equality and Sugar**:\n   - Introduce syntactic equality to simplify expressions
+    and make them more readable.\n   - Define rules for syntactic sugar, such as removing
+    unnecessary parentheses and using more compact notation for powers and fractions.\n\n5.
+    **Equivalence of Fractions and Irreducible Fractions**:\n   - Define equivalence
+    of fractions and irreducible fractions within the context of expressions.\n\nBy
+    following this approach, we can rigorously define a function by a formula, ensuring
+    that the function is well-defined and its domain is properly specified. This approach
+    provides a solid foundation for understanding and working with functions expressed
+    as formulas in mathematics."}]'
+  sentences:
+  - '[{''from'': ''human'', ''value'': ''How can we define a function by a formula
+    in a rigorous way, ensuring that the function is well-defined and its domain is
+    properly specified?''}, {''from'': ''gpt'', ''value'': "To define a function by
+    a formula in a rigorous way, we need to consider both the formula itself and the
+    domain of the function. Here''s a step-by-step approach:\n\n1. **Formula Definition**:\n   -
+    Define a set of expressions for rational functions using a fixed variable, an
+    alphabet for real constants, and a function assigning values to constants.\n   -
+    Each expression is a string in a specific alphabet, including the variable, constants,
+    and operators.\n\n2. **Natural Domain and Interpretation**:\n   - Simultaneously
+    define two functions:\n     - The natural domain function $D$ associates each
+    expression with a subset of real numbers, representing the domain where the expression
+    is well-defined.\n     - The interpretation function $I$ associates each expression
+    with a function from its natural domain to real numbers, representing the value
+    of the expression at each point in the domain.\n\n3. **Structural Induction**:\n   -
+    Define $D$ and $I$ inductively based on the structure of expressions:\n     -
+    For the variable, the natural domain is all real numbers, and the interpretation
+    is the identity function.\n     - For constants, the natural domain is all real
+    numbers, and the interpretation is the constant value.\n     - For sums, products,
+    and quotients, the natural domain and interpretation are defined based on the
+    natural domains and interpretations of the constituent expressions.\n\n4. **Syntactic
+    Equality and Sugar**:\n   - Introduce syntactic equality to simplify expressions
+    and make them more readable.\n   - Define rules for syntactic sugar, such as removing
+    unnecessary parentheses and using more compact notation for powers and fractions.\n\n5.
+    **Equivalence of Fractions and Irreducible Fractions**:\n   - Define equivalence
+    of fractions and irreducible fractions within the context of expressions.\n\nBy
+    following this approach, we can rigorously define a function by a formula, ensuring
+    that the function is well-defined and its domain is properly specified. This approach
+    provides a solid foundation for understanding and working with functions expressed
+    as formulas in mathematics."}]'
+  - '[{''from'': ''system'', ''value'': ''You are a helpful assistant, with no access
+    to external functions.''}, {''from'': ''human'', ''value'': ''Group the shapes
+    given in the table below into 2D and 3D shapes, based on their respective dimensions.
+    Additionally, provide the surface area and volume of each shape in the table.
+    The list of shapes includes triangle, sphere, cylinder, and square.''}, {''from'':
+    ''gpt'', ''value'': ''2D Shapes: Triangle, Square\n- Triangle: Surface Area =
+    (base x height) / 2\n- Square: Surface Area = 4 x side length\n3D Shapes: Sphere,
+    Cylinder\n- Sphere: Surface Area = 4πr^2, Volume = (4/3)πr^3\n- Cylinder: Surface
+    Area = 2πrh + 2πr^2, Volume = πr^2h\nNote: r is the radius of the sphere or cylinder,
+    h is the height of the cylinder.\nBased on their dimensions, the 2D shapes are
+    the triangle and the square, while the 3D shapes are the sphere and the cylinder.\n|
+    Shape | Type | Surface Area Formula | Volume Formula |\n|----------|----------|--------------------------------|-------------------------------|\n|
+    Triangle | 2D Shape | (base x height) / 2 | N/A |\n| Square | 2D Shape | 4 x side
+    length | N/A |\n| Sphere | 3D Shape | 4πr^2 | (4/3)πr^3 |\n| Cylinder | 3D Shape
+    | 2πrh + 2πr^2 | πr^2h |''}, {''from'': ''human'', ''value'': ''Can you explain
+    the difference between a hurricane, a typhoon, and a cyclone?''}, {''from'': ''gpt'',
+    ''value'': ''Yes, I can explain the difference between a hurricane, a typhoon,
+    and a cyclone. These are all terms for tropical cyclones, which are rotating storms
+    that form in warm ocean waters near the equator. The main difference between these
+    terms is the location where the storm forms. Hurricanes form in the Atlantic Ocean
+    or northeastern Pacific Ocean, while typhoons form in the northwestern Pacific
+    Ocean. Cyclones, on the other hand, is a term used to describe tropical storms
+    that occur in the South Pacific and Indian Oceans. Another key difference is the
+    direction of rotation. In the Northern Hemisphere, hurricanes and typhoons rotate
+    counterclockwise, while in the Southern Hemisphere, they rotate clockwise. Cyclones,
+    however, rotate clockwise in the Southern Hemisphere and counterclockwise in the
+    Northern Hemisphere.''}, {''from'': ''human'', ''value'': ''How does the process
+    of photosynthesis work?''}, {''from'': ''gpt'', ''value'': ''Photosynthesis is
+    the process by which plants convert sunlight, carbon dioxide, and water into glucose
+    (sugar) and oxygen. This process takes place in the chloroplasts of plant cells,
+    which contain a green pigment called chlorophyll. During photosynthesis, light
+    energy is absorbed by chlorophyll, which then uses that energy to split water
+    molecules into hydrogen ions and oxygen gas. The hydrogen ions are combined with
+    carbon dioxide from the air to create glucose, which the plant uses as food. Oxygen
+    gas is released into the atmosphere as a byproduct of this process. The overall
+    chemical equation for photosynthesis is: 6CO2 + 6H2O + Light Energy → C6H12O6
+    + 6O2.''}, {''from'': ''human'', ''value'': ''What is the difference between nature
+    and nurture when it comes to human characteristics?''}, {''from'': ''gpt'', ''value'':
+    ''Nature refers to the genetic or inherited traits that we receive from our parents
+    and ancestors. These traits include things like eye color, height, and certain
+    personality traits. Nurture refers to the environmental factors that influence
+    our development, such as our upbringing, education, and life experiences. While
+    nature plays a role in shaping who we are, research has shown that nurture can
+    have a significant impact on human characteristics as well. For example, studies
+    of identical twins who were separated at birth and raised in different environments
+    have shown that their personalities and behaviors can differ quite significantly,
+    despite having the same genetic makeup.''}, {''from'': ''human'', ''value'': ''Why
+    is the sky blue during the day and red/orange during sunrise and sunset?''}, {''from'':
+    ''gpt'', ''value'': "The sky appears blue during the day and red/orange during
+    sunrise and sunset due to the way that light is scattered by the Earth''s atmosphere.
+    During the day, sunlight consists of all the colors of the spectrum, including
+    blue, green, and red. When this light enters the Earth''s atmosphere, it collides
+    with molecules of gas and other particles, causing it to scatter in all directions.
+    Blue light is scattered more than other colors because it travels in smaller,
+    shorter waves. This is why the sky appears blue to us. During sunrise and sunset,
+    however, the sun''s rays have to travel through more of the Earth''s atmosphere
+    to reach us, which causes the blue light to be scattered even more. This leaves
+    only the longer-wavelength red, orange, and yellow light to reach our eyes, giving
+    the sky its characteristic red or orange hue."}]'
+  - '[{''from'': ''human'', ''value'': ''How can I create a program in Java to find
+    the greatest common divisor (GCD) of two integers? None''}, {''from'': ''gpt'',
+    ''value'': ''1. The task is to create a Java program to find the greatest common
+    divisor (GCD) of two integers.\n2. The GCD of two integers is the largest number
+    that divides both of them without leaving a remainder.\n3. To solve this, we can
+    use a simple iterative method where we check each number up to the minimum of
+    the two given numbers to see if it divides both numbers.\n4. We start by declaring
+    two integer variables `a` and `b` with the values of the two numbers we want to
+    find the GCD of.\n5. We then initialize a variable `gcd` with 1. This variable
+    will hold the greatest common divisor.\n6. Next, we use a `for` loop to iterate
+    from 1 to the smaller of the two numbers (`a` and `b`).\n7. Inside the loop, we
+    check if both `a` and `b` are divisible by the current value of `i` using the
+    modulus operator (`%`). The modulus operator returns the remainder of the division
+    of two numbers.\n8. If `a` and `b` are both divisible by `i` (i.e., `a % i ==
+    0` and `b % i == 0`), this means `i` is a common divisor of `a` and `b`.\n9. In
+    this case, we update `gcd` with the current value of `i`. Since `i` is increasing
+    with each iteration of the loop, `gcd` will always hold the greatest common divisor
+    found so far.\n10. After the loop finishes, `gcd` will hold the greatest common
+    divisor of `a` and `b`.\n11. Finally, we print the value of `gcd` using `System.out.printf`.
+    This will display the GCD of the two numbers.\n12. The output of the program will
+    be the GCD of the two input numbers.\n\nYou can use the following code as a solution:\n\n```java\npublic
+    class Main {\n    public static void main(String[] args) {\n        int a = 24;\n        int
+    b = 18;\n\n        int gcd = 1;\n\n        for(int i = 1; i <= a && i <= b; i++)
+    {\n            if(a % i == 0 && b % i == 0)\n                gcd = i;\n        }\n\n        System.out.printf("GCD
+    of %d and %d is: %d", a, b, gcd);\n    }\n}\n```\n\nIn this code, we start by
+    declaring two integer variables `a` and `b` with values 24 and 18 respectively.
+    We then initialize a variable `gcd` with 1, which will hold the greatest common
+    divisor.\n\nNext, we use a `for` loop to iterate from 1 to the smaller of the
+    two numbers (`a` and `b`). Inside the loop, we check if both `a` and `b` are divisible
+    by the current value of `i`. If they are, we update `gcd` with the current value
+    of `i`.\n\nFinally, we use `System.out.printf` to display the resulting GCD value.
+    In this example, the output will be "GCD of 24 and 18 is: 6", indicating that
+    the greatest common divisor of 24 and 18 is 6.''}]'
+- source_sentence: 'Question: how old do you have to be to buy scratchers in arizona,
+    Answer: Arizona Lottery Arizona requires lottery players to be at least 21; the
+    minimum age was 18 until June 1, 2003.'
+  sentences:
+  - '[{''from'': ''human'', ''value'': ''Provide a detailed guide on how to crop an
+    image using Python including an explanation of the Pillow library, the code needed
+    for cropping, and an explanation of each line of code. None''}, {''from'': ''gpt'',
+    ''value'': "1. Understand the instruction: Provide a detailed guide on cropping
+    an image using Python, including an explanation of the Pillow library, the code
+    needed for cropping, and an explanation of each line of code.\n\n2. Identify the
+    main components: Pillow library, cropping an image, code explanation.\n\n3. Start
+    with an introduction: Explain the objective of the article and introduce Python''s
+    digital image processing capability.\n\n4. Introduce the Pillow library: Explain
+    that Python uses the Pillow library for handling images, which is an updated version
+    of the PIL module.\n\n5. Explain the process of cropping an image: Describe how
+    cropping involves selecting a rectangular portion from an image and creating a
+    new image with just the selected portion.\n\n6. Provide the code snippet for cropping
+    an image: Include the code snippet and explain each line of code.\n\n7. Line 1:
+    Explain the ''crop()'' function, its input parameters, and how it returns a new
+    image with the specified rectangular box.\n\n8. Line 2: Explain how the new Image
+    object for the cropped image is stored in the ''croppedIm'' variable.\n\n9. Line
+    3: Explain how the ''save()'' function is called on ''croppedIm'' to save the
+    cropped image as ''cropped.png''.\n\n10. Mention the coordinate system used by
+    Pillow: Explain that Pillow uses a right-handed coordinate system with (0,0) in
+    the upper left corner.\n\n11. Conclude the chain of thought: Summarize the process
+    of cropping an image using Python and the Pillow library.\n\nTitle: Code Snippet
+    Guide on Image Cropping Using Python\n\nObjective: This article explains the use
+    of Python''s digital image processing capability to perform a common task - image
+    cropping. Python''s powerful and rich libraries aid in manipulating images with
+    just a few lines of code. Here, we will introduce how to crop an image using Python.\n\n
+    Python''s Pillow Library\n\nPrimarily, Python uses a library named Pillow for
+    handling images, which is an updated version of an older module called PIL (Python
+    Imaging Library). It can be used for opening, manipulating, and saving different
+    image file formats.\n\n Cropping an Image with Python \n\nThe process of cropping
+    an image using Python involves selecting a desired rectangular portion from a
+    given image and creating a new image with just the selected portion.\n\n Code
+    Explanation:\n\nHere is the code snippet to perform image cropping.\n\n```python\ncroppedIm
+    = catIm.crop((335, 345, 565, 560))\ncroppedIm.save(''cropped.png'')\n```\n\nThe
+    above code snippet does the following:\n\n1. `catIm.crop((335, 345, 565, 560))`:
+    The ''crop()'' function is called on the Image object ''catIm''. This function
+    requires a tuple of four integer coordinates that represent a rectangular box
+    in the original image. The coordinates are pixel positions for left, upper, right,
+    and lower position (in that order). The ''crop()'' function then returns a new
+    image that includes the area inside the specified rectangular box.\n\n2. `croppedIm
+    = catIm.crop((335, 345, 565, 560))`: This line of code makes a new Image object
+    for the cropped image and stores it in the ''croppedIm'' variable.\n\n3. `croppedIm.save(''cropped.png'')`:
+    Finally, the ''save()'' function is called on ''croppedIm'', which saves this
+    newly created cropped image as ''cropped.png''. If the file doesn''t already exist,
+    it will be created in your active Python script''s current directory. \n\nWith
+    this simple coding approach, we can easily generate a new image from the original,
+    containing only the cropped section. \n\nPlease note: Pillow uses a coordinate
+    system with (0,0) in the upper left corner. The coordinate system is right handed,
+    meaning the right direction corresponds to increasing X, and down direction corresponds
+    to increasing Y. Always check the coordinate before cropping, else you might end
+    up cropping the wrong section of the image."}]'
+  - 'Question: what is the standardized coefficient in a regression, Answer: Standardized
+    coefficient In statistics, standardized coefficients or beta coefficients are
+    the estimates resulting from a regression analysis that have been standardized
+    so that the variances of dependent and independent variables are 1.[1] Therefore,
+    standardized coefficients refer to how many standard deviations a dependent variable
+    will change, per standard deviation increase in the predictor variable. For univariate
+    regression, the absolute value of the standardized coefficient equals the correlation
+    coefficient. Standardization of the coefficient is usually done to answer the
+    question of which of the independent variables have a greater effect on the dependent
+    variable in a multiple regression analysis, when the variables are measured in
+    different units of measurement (for example, income measured in dollars and family
+    size measured in number of individuals).'
+  - '[{''from'': ''human'', ''value'': ''Solve the following math problem step-by-step.\nSimplify
+    your answer as much as possible. Present your final answer as \\boxed{Your Answer}.\nSarah
+    is playing tic-tac-toe. She wins $1 for every game she wins, $0 for every game
+    she ties, and she loses $2 for every game she loses. If she played 100 games where
+    she tied 40 of them, and she lost $30, how many games did she win?''}, {''from'':
+    ''gpt'', ''value'': "Step 1: First, let''s calculate the number of games Sarah
+    lost. We know that she lost $30 in total, and she loses $2 for each game she loses.
+    So the number of games she lost is 30 / 2 = 15.\nStep 2: We also know that Sarah
+    played 100 games in total, and 40 of them were ties and 15 were losses. So the
+    number of games she won is 100 - 40 - 15 = 45.\n\nAnswer:\n\\boxed{45}"}, {''from'':
+    ''human'', ''value'': ''Out: 45\nYour answer is wrong.\nYour approach to calculate
+    the number of games Sarah lost is correct. However, you made a mistake in calculating
+    the number of games she won. You just subtracted the games she lost and tied from
+    the total games. But remember, she wins $1 for every game she wins and loses $2
+    for each game she loses. So, the losses include not only the games she actually
+    lost, but also the money she could have won if she had won those games. Thus,
+    when calculating the number of games won, you need to consider the games she actually
+    lost and the equivalent number of games she could have won to offset the monetary
+    losses. Try to adjust your calculations for game wins.''}, {''from'': ''gpt'',
+    ''value'': "Step 1: First, let''s calculate the number of games Sarah lost. We
+    know that she lost $30 in total, and she loses $2 for each game she loses. So
+    the number of games she lost is 30 / 2 = 15.\nStep 2: To calculate the number
+    of games Sarah won, we need to consider the monetary losses from the games she
+    actually lost. For each game she lost, she would have won $2 if she had won. So
+    the equivalent number of games she could have won to offset the monetary losses
+    is 15 / 2 = 7.5.\nStep 3: Now, we can calculate the number of games she won. We
+    know that she played a total of 100 games, and out of those, 40 were ties, 15
+    were losses, and she could have won 7.5 games. So the number of games she won
+    is 100 - 40 - 15 - 7.5 = 37.5.\n\nAnswer:\n\\boxed{37.5}"}, {''from'': ''human'',
+    ''value'': ''Out: 37.5\nYour answer is wrong.\nYour reasoning in Step 2 is incorrect.
+    The idea that "for each game she lost, she would have won $2 if she had won" is
+    not logical. This calculation double counts the losses and doesn\''t make sense
+    in the context of the problem. Instead, the losses should be considered as a product
+    of the numbers of games lost and the money lost per each game lost. Hence, to
+    find the number of games won, simply subtract the number of games tied and lost
+    from the total games played. You should then calculate the total losses in terms
+    of games by dividing the total lost money by the dollar earned per game won. This
+    will give the total lost games not only from losing but also from not winning.
+    Adding these lost games and tied games then subtracting from total games played
+    will give the number of games won. Try to fix your calculations accordingly.''},
+    {''from'': ''gpt'', ''value'': "Step 1: First, let''s calculate the number of
+    games Sarah lost. We know that she lost $30 in total, and she loses $2 for each
+    game she loses. So the number of games she lost is 30 / -2 = -15. This means our
+    assumption that she lost 15 games is incorrect.\n\nStep 2: To find the number
+    of games Sarah won, we need to consider the total amount of money lost. She loses
+    $2 for each game she loses and she lost a total of $30. So the total number of
+    games lost is -30 / -2 = 15.\n\nStep 3: Now, we can calculate the number of games
+    won. We know that Sarah played a total of 100 games, out of which 40 were ties
+    and 15 were losses. So the number of games she won is 100 - 40 - 15 = 45.\n\nAnswer:\n\\boxed{45}"},
+    {''from'': ''human'', ''value'': ''Out: 45\nYour answer is wrong.\nYou seem to
+    misunderstand how the losses impact the total number of games won. When Sarah
+    loses a game, she not only loses $2, but also loses the opportunity to win $1.
+    Therefore, each lost game equates to a $3 loss ($2 lost + $1 not won). With this
+    in mind, try revising your calculation of the number of games Sarah lost. Then
+    proceed with calculating the number of games won.''}, {''from'': ''gpt'', ''value'':
+    "Step 1: First, let''s calculate the number of games Sarah lost. We know that
+    she lost $30 in total. Since each loss costs her $3 (including the $2 lost and
+    the $1 she could have won), the number of games she lost is 30 / 3 = 10.\n\nStep
+    2: To find the number of games Sarah won, we subtract the number of games tied
+    and lost from the total number of games played. She played 100 games in total,
+    with 40 ties and 10 losses. So the number of games she won is 100 - 40 - 10 =
+    50.\n\nAnswer:\n\\boxed{50}"}, {''from'': ''human'', ''value'': "Out: 50\nYour
+    answer is wrong.\nYour Step 1 is correct but Step 2 isn''t. After knowing the
+    number of games lost, You just straight up subtracted the number of tied and lost
+    games from the total. But this regularization doesn''t apply here, because she
+    is losing $2 for every game she loses. So, her total losses of $30 isn''t just
+    from the games she lost, it''s also from the games she could have won. So in calculating
+    the games she won, you actually need to find out how many games she didn''t win
+    and then subtract these from the total games. A hint is to consider the total
+    income if she had won all the games and compare it with her actual total income.
+    The difference will give you the number of games she didn''t win (i.e., she either
+    lost or tied)."}, {''from'': ''gpt'', ''value'': "Step 1:  Let''s start by breaking
+    down the information we have. Sarah played a total of 100 games, out of which
+    40 were ties. So, the remaining games must be the ones she either won or lost.\nStep
+    2:  We''re given that she lost 30 games, so to find the number of games she won,
+    we can calculate the difference between the total games and the sum of tied and
+    lost games, which gives us the number of games she won.\nStep 3:  Essentially,
+    since there are only 3 possible outcomes in tic-tac-toe (win, lose, or tie), the
+    number of games she won can be obtained by subtracting the number of games tied
+    and the number of games lost from the total games played. So, the formula for
+    the number of games won is: Total games - Tied games - Lost games.\nStep 4:  Substituting
+    the given values, we have: Number of games won = 100 - 40 - 30 = 30.\nStep 5:  This
+    leads us to the conclusion that Sarah won 30 games out of the 100 games she played.\n\nAnswer:\n\\boxed{30}"}]'
+- source_sentence: '[{''from'': ''human'', ''value'': ''Solve the following math problem
+    step-by-step.\nSimplify your answer as much as possible. Present your final answer
+    as \\boxed{Your Answer}.\nA pentagon is drawn by placing an isosceles right triangle
+    on top of a square as pictured. What percent of the area of the pentagon is the
+    area of the right triangle?\n\n[asy]\nsize(50);\ndraw((0,0)--(0,-1)--(1,-1)--(1,0)--(0,0)--(.5,.5)--(1,0));\n[/asy]''},
+    {''from'': ''gpt'', ''value'': "Step 1:  We are given a pentagon formed by placing
+    an isosceles right triangle on top of a square.\nStep 2:  To find the percentage
+    of the area of the pentagon that is the right triangle, we need to consider the
+    areas of both shapes and then calculate the ratio of the triangle''s area to the
+    total pentagon''s area.\nStep 3:  Let''s designate the leg length of the isosceles
+    right triangle as $x$. This means the hypotenuse of the triangle has a length
+    of $x\\sqrt{2}$.\nStep 4:  Since the hypotenuse of the triangle is a side of the
+    square, the area of the square is $(x\\sqrt{2})^2 = 2x^2$. \nStep 5:  On the other
+    hand, the area of the triangle is $(x)(x)/2 = x^2/2$. This is obtained using the
+    standard formula for the area of a triangle, which is base times height divided
+    by 2.\nStep 6:  So, the area of the pentagon is the sum of the areas of the triangle
+    and the square, which gives us $\\frac{x^2}{2} + 2x^2 = \\frac{5x^2}{2}$.\nStep
+    7:  Now, to find the percentage of the pentagon''s area that is inside the triangle,
+    we calculate the ratio of the triangle''s area to the total pentagon''s area.
+    This can be computed as $\\frac{x^2/2}{5x^2/2} =\\frac{x^2}{2}\\cdot \\frac{2}{5x^2}
+    = \\frac15$.\nStep 8:  Therefore, the percentage of the area of the pentagon that
+    is the right triangle is $\\frac{1}{5}$, which is equivalent to $\\boxed{20\\%}$.\nStep
+    9:  As an alternate solution, we can consider drawing the two diagonals of the
+    square and observe the resulting geometric properties to arrive at the same conclusion.\n\nAnswer:\n\\boxed{20\\%}"}]'
+  sentences:
+  - 'Question: who played george luz in band of brothers, Answer: Rick Gomez Richard
+    Harper "Rick" Gomez (born June 1, 1972) is an American actor and voice actor.
+    He is known for portraying Technician 4th Grade George Luz in the HBO television
+    miniseries Band of Brothers, and as "Endless Mike" Hellstrom in the Nickelodeon
+    TV series The Adventures of Pete and Pete. He is the older brother of actor Joshua
+    Gomez.'
+  - '[{''from'': ''human'', ''value'': ''Solve the following math problem step-by-step.\nSimplify
+    your answer as much as possible. Present your final answer as \\boxed{Your Answer}.\nA
+    pentagon is drawn by placing an isosceles right triangle on top of a square as
+    pictured. What percent of the area of the pentagon is the area of the right triangle?\n\n[asy]\nsize(50);\ndraw((0,0)--(0,-1)--(1,-1)--(1,0)--(0,0)--(.5,.5)--(1,0));\n[/asy]''},
+    {''from'': ''gpt'', ''value'': "Step 1:  We are given a pentagon formed by placing
+    an isosceles right triangle on top of a square.\nStep 2:  To find the percentage
+    of the area of the pentagon that is the right triangle, we need to consider the
+    areas of both shapes and then calculate the ratio of the triangle''s area to the
+    total pentagon''s area.\nStep 3:  Let''s designate the leg length of the isosceles
+    right triangle as $x$. This means the hypotenuse of the triangle has a length
+    of $x\\sqrt{2}$.\nStep 4:  Since the hypotenuse of the triangle is a side of the
+    square, the area of the square is $(x\\sqrt{2})^2 = 2x^2$. \nStep 5:  On the other
+    hand, the area of the triangle is $(x)(x)/2 = x^2/2$. This is obtained using the
+    standard formula for the area of a triangle, which is base times height divided
+    by 2.\nStep 6:  So, the area of the pentagon is the sum of the areas of the triangle
+    and the square, which gives us $\\frac{x^2}{2} + 2x^2 = \\frac{5x^2}{2}$.\nStep
+    7:  Now, to find the percentage of the pentagon''s area that is inside the triangle,
+    we calculate the ratio of the triangle''s area to the total pentagon''s area.
+    This can be computed as $\\frac{x^2/2}{5x^2/2} =\\frac{x^2}{2}\\cdot \\frac{2}{5x^2}
+    = \\frac15$.\nStep 8:  Therefore, the percentage of the area of the pentagon that
+    is the right triangle is $\\frac{1}{5}$, which is equivalent to $\\boxed{20\\%}$.\nStep
+    9:  As an alternate solution, we can consider drawing the two diagonals of the
+    square and observe the resulting geometric properties to arrive at the same conclusion.\n\nAnswer:\n\\boxed{20\\%}"}]'
+  - 'Question: where are the basilar membrane and the organ of corti located, Answer:
+    Organ of Corti The organ of Corti is located in the scala media of the cochlea
+    of the inner ear between the vestibular duct and the tympanic duct and is composed
+    of mechanosensory cells, known as hair cells.[2] Strategically positioned on the
+    basilar membrane of the organ of Corti are three rows of outer hair cells (OHCs)
+    and one row of inner hair cells (IHCs).[4] Separating these hair cells are supporting
+    cells: Deiters cells, also called phalangeal cells, which separate and support
+    both the OHCs and the IHCs.[4]'
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+---
+# SentenceTransformer based on sucharush/e5_stem_finetuned
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sucharush/e5_stem_finetuned](https://huggingface.co/sucharush/e5_stem_finetuned). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [sucharush/e5_stem_finetuned](https://huggingface.co/sucharush/e5_stem_finetuned) <!-- at revision 3013cc95b53a83082746afd556743e81f1da5dff -->
+- **Maximum Sequence Length:** 512 tokens
+- **Output Dimensionality:** 768 dimensions
+- **Similarity Function:** Cosine Similarity
+<!-- - **Training Dataset:** Unknown -->
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("Stergios-Konstantinidis/MNLP_M3_tokenizer_tuned")
+# Run inference
+sentences = [
+    '[{\'from\': \'human\', \'value\': \'Solve the following math problem step-by-step.\\nSimplify your answer as much as possible. Present your final answer as \\\\boxed{Your Answer}.\\nA pentagon is drawn by placing an isosceles right triangle on top of a square as pictured. What percent of the area of the pentagon is the area of the right triangle?\\n\\n[asy]\\nsize(50);\\ndraw((0,0)--(0,-1)--(1,-1)--(1,0)--(0,0)--(.5,.5)--(1,0));\\n[/asy]\'}, {\'from\': \'gpt\', \'value\': "Step 1:  We are given a pentagon formed by placing an isosceles right triangle on top of a square.\\nStep 2:  To find the percentage of the area of the pentagon that is the right triangle, we need to consider the areas of both shapes and then calculate the ratio of the triangle\'s area to the total pentagon\'s area.\\nStep 3:  Let\'s designate the leg length of the isosceles right triangle as $x$. This means the hypotenuse of the triangle has a length of $x\\\\sqrt{2}$.\\nStep 4:  Since the hypotenuse of the triangle is a side of the square, the area of the square is $(x\\\\sqrt{2})^2 = 2x^2$. \\nStep 5:  On the other hand, the area of the triangle is $(x)(x)/2 = x^2/2$. This is obtained using the standard formula for the area of a triangle, which is base times height divided by 2.\\nStep 6:  So, the area of the pentagon is the sum of the areas of the triangle and the square, which gives us $\\\\frac{x^2}{2} + 2x^2 = \\\\frac{5x^2}{2}$.\\nStep 7:  Now, to find the percentage of the pentagon\'s area that is inside the triangle, we calculate the ratio of the triangle\'s area to the total pentagon\'s area. This can be computed as $\\\\frac{x^2/2}{5x^2/2} =\\\\frac{x^2}{2}\\\\cdot \\\\frac{2}{5x^2} = \\\\frac15$.\\nStep 8:  Therefore, the percentage of the area of the pentagon that is the right triangle is $\\\\frac{1}{5}$, which is equivalent to $\\\\boxed{20\\\\%}$.\\nStep 9:  As an alternate solution, we can consider drawing the two diagonals of the square and observe the resulting geometric properties to arrive at the same conclusion.\\n\\nAnswer:\\n\\\\boxed{20\\\\%}"}]',
+    '[{\'from\': \'human\', \'value\': \'Solve the following math problem step-by-step.\\nSimplify your answer as much as possible. Present your final answer as \\\\boxed{Your Answer}.\\nA pentagon is drawn by placing an isosceles right triangle on top of a square as pictured. What percent of the area of the pentagon is the area of the right triangle?\\n\\n[asy]\\nsize(50);\\ndraw((0,0)--(0,-1)--(1,-1)--(1,0)--(0,0)--(.5,.5)--(1,0));\\n[/asy]\'}, {\'from\': \'gpt\', \'value\': "Step 1:  We are given a pentagon formed by placing an isosceles right triangle on top of a square.\\nStep 2:  To find the percentage of the area of the pentagon that is the right triangle, we need to consider the areas of both shapes and then calculate the ratio of the triangle\'s area to the total pentagon\'s area.\\nStep 3:  Let\'s designate the leg length of the isosceles right triangle as $x$. This means the hypotenuse of the triangle has a length of $x\\\\sqrt{2}$.\\nStep 4:  Since the hypotenuse of the triangle is a side of the square, the area of the square is $(x\\\\sqrt{2})^2 = 2x^2$. \\nStep 5:  On the other hand, the area of the triangle is $(x)(x)/2 = x^2/2$. This is obtained using the standard formula for the area of a triangle, which is base times height divided by 2.\\nStep 6:  So, the area of the pentagon is the sum of the areas of the triangle and the square, which gives us $\\\\frac{x^2}{2} + 2x^2 = \\\\frac{5x^2}{2}$.\\nStep 7:  Now, to find the percentage of the pentagon\'s area that is inside the triangle, we calculate the ratio of the triangle\'s area to the total pentagon\'s area. This can be computed as $\\\\frac{x^2/2}{5x^2/2} =\\\\frac{x^2}{2}\\\\cdot \\\\frac{2}{5x^2} = \\\\frac15$.\\nStep 8:  Therefore, the percentage of the area of the pentagon that is the right triangle is $\\\\frac{1}{5}$, which is equivalent to $\\\\boxed{20\\\\%}$.\\nStep 9:  As an alternate solution, we can consider drawing the two diagonals of the square and observe the resulting geometric properties to arrive at the same conclusion.\\n\\nAnswer:\\n\\\\boxed{20\\\\%}"}]',
+    'Question: where are the basilar membrane and the organ of corti located, Answer: Organ of Corti The organ of Corti is located in the scala media of the cochlea of the inner ear between the vestibular duct and the tympanic duct and is composed of mechanosensory cells, known as hair cells.[2] Strategically positioned on the basilar membrane of the organ of Corti are three rows of outer hair cells (OHCs) and one row of inner hair cells (IHCs).[4] Separating these hair cells are supporting cells: Deiters cells, also called phalangeal cells, which separate and support both the OHCs and the IHCs.[4]',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### Unnamed Dataset
+* Size: 113,450 training samples
+* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | sentence_0                                                                           | sentence_1                                                                           | label                                           |
+  |:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:------------------------------------------------|
+  | type    | string                                                                               | string                                                                               | int                                             |
+  | details | <ul><li>min: 17 tokens</li><li>mean: 283.42 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 281.46 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>0: ~80.00%</li><li>1: ~20.00%</li></ul> |
+* Samples:
+  | sentence_0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | sentence_1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 | label          |
+  |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
+  | <code>Question: where does rasin in the sun take place, Answer: A Raisin in the Sun A Raisin in the Sun is a play by Lorraine Hansberry that debuted on Broadway in 1959.[1] The title comes from the poem "Harlem" (also known as "A Dream Deferred"[2]) by Langston Hughes. The story tells of a black family's experiences in the Washington Park Subdivision of Chicago's Woodlawn neighborhood as they attempt to "better" themselves with an insurance payout following the death of the father. The New York Drama Critics' Circle named it the best play of 1959.</code>                                                                                                                                                     | <code>Question: where does rasin in the sun take place, Answer: A Raisin in the Sun A Raisin in the Sun is a play by Lorraine Hansberry that debuted on Broadway in 1959.[1] The title comes from the poem "Harlem" (also known as "A Dream Deferred"[2]) by Langston Hughes. The story tells of a black family's experiences in the Washington Park Subdivision of Chicago's Woodlawn neighborhood as they attempt to "better" themselves with an insurance payout following the death of the father. The New York Drama Critics' Circle named it the best play of 1959.</code>                                                                                                                                                                                                                                                                                                                                                                                                           | <code>1</code> |
+  | <code>Question: when does the movie midnight sun come out, Answer: Midnight Sun (2018 film) Midnight Sun is a 2018 American romantic drama film directed by Scott Speer and written by Eric Kirsten, based on the 2006 Japanese film of the same name. The film stars Bella Thorne, Patrick Schwarzenegger, and Rob Riggle, and follows a teenage girl with the disease xeroderma pigmentosum, which prevents her from going out into sunlight. When she meets a boy, she struggles to decide whether to tell him about her condition or pretend to live a normal life. Principal photography began on October 12, 2015 in Vancouver, British Columbia, Canada. The film was released in the United States on March 23, 2018.</code> | <code>Question: who sang the song unchained melody in the movie ghost, Answer: Unchained Melody "Unchained Melody" reappeared on the Billboard charts in 1990 after The Righteous Brothers' recording was used in the box office blockbuster film Ghost. Two versions charted in the US that year â€“ the original and a new recording. According to Medley, he was interested in having the original recording released due to the renewed interest in the song, but was told that there were licensing issues. Although Hatfield was no longer in the same condition vocally as when he first recorded the song, they decided to re-record the song for Curb Records.[44] The re-recorded version was released as both a cassette single[45] and a CD single.[46] It received minimal airplay but recorded excellent sales, peaking at number 19. The re-recorded version was certified Platinum by the RIAA on January 10, 1991,[47] and received a Grammy Award nomination.[48]</code> | <code>0</code> |
+  | <code>Question: where is the o2 arena in london located, Answer: The O2 Arena The O2 Arena (temporarily the sponsor neutral "North Greenwich Arena", during the 2012 Summer Olympics and 2012 Summer Paralympics), is a multi purpose indoor arena located in the centre of The O2 entertainment complex on the Greenwich Peninsula in south east London.</code>                                                                                                                                                                                                                                                                                                                                                                     | <code>[{'from': 'human', 'value': "Explain how Archimedes' Principle enables objects to float in water."}, {'from': 'gpt', 'value': "Archimedes' Principle states that the buoyant force acting on an object submerged in water is equal to the weight of the water displaced by the object. If the buoyant force exceeds the weight of the object, it will float."}]</code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | <code>0</code> |
+* Loss: [<code>ContrastiveTensionLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#contrastivetensionloss)
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `per_device_train_batch_size`: 10
+- `per_device_eval_batch_size`: 10
+- `num_train_epochs`: 10
+- `multi_dataset_batch_sampler`: round_robin
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: no
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 10
+- `per_device_eval_batch_size`: 10
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 5e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1
+- `num_train_epochs`: 10
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: round_robin
+</details>
+### Training Logs
+<details><summary>Click to expand</summary>
+| Epoch  | Step   | Training Loss |
+|:------:|:------:|:-------------:|
+| 0.0441 | 500    | 10.4083       |
+| 0.0881 | 1000   | 7.1707        |
+| 0.1322 | 1500   | 5.1666        |
+| 0.1763 | 2000   | 5.2289        |
+| 0.2204 | 2500   | 5.1515        |
+| 0.2644 | 3000   | 5.0737        |
+| 0.3085 | 3500   | 5.0824        |
+| 0.3526 | 4000   | 5.0569        |
+| 0.3967 | 4500   | 5.2043        |
+| 0.4407 | 5000   | 5.1293        |
+| 0.4848 | 5500   | 5.1169        |
+| 0.5289 | 6000   | 5.0743        |
+| 0.5729 | 6500   | 5.1535        |
+| 0.6170 | 7000   | 5.2208        |
+| 0.6611 | 7500   | 5.1847        |
+| 0.7052 | 8000   | 5.1545        |
+| 0.7492 | 8500   | 5.0944        |
+| 0.7933 | 9000   | 5.0662        |
+| 0.8374 | 9500   | 5.1083        |
+| 0.8814 | 10000  | 5.102         |
+| 0.9255 | 10500  | 5.2018        |
+| 0.9696 | 11000  | 5.1479        |
+| 1.0137 | 11500  | 5.1877        |
+| 1.0577 | 12000  | 5.2397        |
+| 1.1018 | 12500  | 5.1657        |
+| 1.1459 | 13000  | 5.2276        |
+| 1.1900 | 13500  | 5.2295        |
+| 1.2340 | 14000  | 5.1096        |
+| 1.2781 | 14500  | 5.0877        |
+| 1.3222 | 15000  | 5.0776        |
+| 1.3662 | 15500  | 5.0957        |
+| 1.4103 | 16000  | 5.1655        |
+| 1.4544 | 16500  | 5.0537        |
+| 1.4985 | 17000  | 5.0836        |
+| 1.5425 | 17500  | 5.1495        |
+| 1.5866 | 18000  | 5.0776        |
+| 1.6307 | 18500  | 5.1215        |
+| 1.6747 | 19000  | 5.1614        |
+| 1.7188 | 19500  | 5.0195        |
+| 1.7629 | 20000  | 5.0855        |
+| 1.8070 | 20500  | 5.2153        |
+| 1.8510 | 21000  | 5.1569        |
+| 1.8951 | 21500  | 5.1213        |
+| 1.9392 | 22000  | 5.1694        |
+| 1.9833 | 22500  | 5.1414        |
+| 2.0273 | 23000  | 5.1314        |
+| 2.0714 | 23500  | 5.2454        |
+| 2.1155 | 24000  | 5.1414        |
+| 2.1595 | 24500  | 5.1194        |
+| 2.2036 | 25000  | 5.1115        |
+| 2.2477 | 25500  | 5.0393        |
+| 2.2918 | 26000  | 5.1154        |
+| 2.3358 | 26500  | 5.1414        |
+| 2.3799 | 27000  | 5.0794        |
+| 2.4240 | 27500  | 5.1013        |
+| 2.4680 | 28000  | 5.1873        |
+| 2.5121 | 28500  | 5.1034        |
+| 2.5562 | 29000  | 5.1593        |
+| 2.6003 | 29500  | 5.1933        |
+| 2.6443 | 30000  | 5.2192        |
+| 2.6884 | 30500  | 5.1233        |
+| 2.7325 | 31000  | 5.1033        |
+| 2.7766 | 31500  | 5.1408        |
+| 2.8206 | 32000  | 5.1233        |
+| 2.8647 | 32500  | 5.1153        |
+| 2.9088 | 33000  | 5.0795        |
+| 2.9528 | 33500  | 5.1792        |
+| 2.9969 | 34000  | 5.1233        |
+| 3.0410 | 34500  | 5.0852        |
+| 3.0851 | 35000  | 5.0653        |
+| 3.1291 | 35500  | 5.0973        |
+| 3.1732 | 36000  | 5.1212        |
+| 3.2173 | 36500  | 5.1046        |
+| 3.2613 | 37000  | 5.1193        |
+| 3.3054 | 37500  | 5.1632        |
+| 3.3495 | 38000  | 5.1932        |
+| 3.3936 | 38500  | 5.0894        |
+| 3.4376 | 39000  | 5.1493        |
+| 3.4817 | 39500  | 5.1812        |
+| 3.5258 | 40000  | 5.1612        |
+| 3.5699 | 40500  | 5.0972        |
+| 3.6139 | 41000  | 5.0872        |
+| 3.6580 | 41500  | 5.2375        |
+| 3.7021 | 42000  | 5.1652        |
+| 3.7461 | 42500  | 5.1273        |
+| 3.7902 | 43000  | 5.0695        |
+| 3.8343 | 43500  | 5.1557        |
+| 3.8784 | 44000  | 5.0697        |
+| 3.9224 | 44500  | 5.1635        |
+| 3.9665 | 45000  | 5.2095        |
+| 4.0106 | 45500  | 5.0936        |
+| 4.0546 | 46000  | 5.1435        |
+| 4.0987 | 46500  | 5.1635        |
+| 4.1428 | 47000  | 5.1415        |
+| 4.1869 | 47500  | 5.1634        |
+| 4.2309 | 48000  | 5.1595        |
+| 4.2750 | 48500  | 5.0775        |
+| 4.3191 | 49000  | 5.0976        |
+| 4.3632 | 49500  | 5.1135        |
+| 4.4072 | 50000  | 5.0877        |
+| 4.4513 | 50500  | 5.1934        |
+| 4.4954 | 51000  | 5.0915        |
+| 4.5394 | 51500  | 5.1295        |
+| 4.5835 | 52000  | 5.0994        |
+| 4.6276 | 52500  | 5.1235        |
+| 4.6717 | 53000  | 5.1435        |
+| 4.7157 | 53500  | 5.1335        |
+| 4.7598 | 54000  | 5.1735        |
+| 4.8039 | 54500  | 5.0914        |
+| 4.8480 | 55000  | 5.1673        |
+| 4.8920 | 55500  | 5.1834        |
+| 4.9361 | 56000  | 5.1434        |
+| 4.9802 | 56500  | 5.1713        |
+| 5.0242 | 57000  | 5.1034        |
+| 5.0683 | 57500  | 5.0615        |
+| 5.1124 | 58000  | 5.0835        |
+| 5.1565 | 58500  | 5.0654        |
+| 5.2005 | 59000  | 5.1813        |
+| 5.2446 | 59500  | 5.2133        |
+| 5.2887 | 60000  | 5.2273        |
+| 5.3327 | 60500  | 5.1313        |
+| 5.3768 | 61000  | 5.1415        |
+| 5.4209 | 61500  | 5.2432        |
+| 5.4650 | 62000  | 5.0854        |
+| 5.5090 | 62500  | 5.1415        |
+| 5.5531 | 63000  | 5.0834        |
+| 5.5972 | 63500  | 5.1294        |
+| 5.6413 | 64000  | 5.1234        |
+| 5.6853 | 64500  | 5.1913        |
+| 5.7294 | 65000  | 5.1793        |
+| 5.7735 | 65500  | 5.0614        |
+| 5.8175 | 66000  | 5.1014        |
+| 5.8616 | 66500  | 5.0354        |
+| 5.9057 | 67000  | 5.1913        |
+| 5.9498 | 67500  | 5.0614        |
+| 5.9938 | 68000  | 5.2453        |
+| 6.0379 | 68500  | 5.0913        |
+| 6.0820 | 69000  | 5.1135        |
+| 6.1260 | 69500  | 5.1374        |
+| 6.1701 | 70000  | 5.0933        |
+| 6.2142 | 70500  | 5.0134        |
+| 6.2583 | 71000  | 5.0435        |
+| 6.3023 | 71500  | 5.2412        |
+| 6.3464 | 72000  | 5.0754        |
+| 6.3905 | 72500  | 5.1933        |
+| 6.4346 | 73000  | 5.2932        |
+| 6.4786 | 73500  | 5.0515        |
+| 6.5227 | 74000  | 5.2073        |
+| 6.5668 | 74500  | 5.1312        |
+| 6.6108 | 75000  | 5.1332        |
+| 6.6549 | 75500  | 5.0615        |
+| 6.6990 | 76000  | 5.1873        |
+| 6.7431 | 76500  | 5.0654        |
+| 6.7871 | 77000  | 5.1333        |
+| 6.8312 | 77500  | 5.1593        |
+| 6.8753 | 78000  | 5.1234        |
+| 6.9193 | 78500  | 5.2012        |
+| 6.9634 | 79000  | 5.1513        |
+| 7.0075 | 79500  | 5.1153        |
+| 7.0516 | 80000  | 5.0414        |
+| 7.0956 | 80500  | 5.2034        |
+| 7.1397 | 81000  | 5.1073        |
+| 7.1838 | 81500  | 5.1753        |
+| 7.2279 | 82000  | 5.1233        |
+| 7.2719 | 82500  | 5.1293        |
+| 7.3160 | 83000  | 5.1492        |
+| 7.3601 | 83500  | 5.2392        |
+| 7.4041 | 84000  | 5.0033        |
+| 7.4482 | 84500  | 5.1912        |
+| 7.4923 | 85000  | 5.1673        |
+| 7.5364 | 85500  | 5.1932        |
+| 7.5804 | 86000  | 5.1393        |
+| 7.6245 | 86500  | 5.1653        |
+| 7.6686 | 87000  | 5.1232        |
+| 7.7126 | 87500  | 5.0993        |
+| 7.7567 | 88000  | 5.1313        |
+| 7.8008 | 88500  | 5.0695        |
+| 7.8449 | 89000  | 5.1493        |
+| 7.8889 | 89500  | 5.1733        |
+| 7.9330 | 90000  | 5.1233        |
+| 7.9771 | 90500  | 5.1292        |
+| 8.0212 | 91000  | 5.0254        |
+| 8.0652 | 91500  | 5.2092        |
+| 8.1093 | 92000  | 5.1313        |
+| 8.1534 | 92500  | 5.0834        |
+| 8.1974 | 93000  | 5.1032        |
+| 8.2415 | 93500  | 5.0653        |
+| 8.2856 | 94000  | 5.1832        |
+| 8.3297 | 94500  | 5.1752        |
+| 8.3737 | 95000  | 5.0752        |
+| 8.4178 | 95500  | 5.1593        |
+| 8.4619 | 96000  | 5.1633        |
+| 8.5059 | 96500  | 5.0794        |
+| 8.5500 | 97000  | 5.1991        |
+| 8.5941 | 97500  | 5.0952        |
+| 8.6382 | 98000  | 5.1153        |
+| 8.6822 | 98500  | 5.1192        |
+| 8.7263 | 99000  | 5.1053        |
+| 8.7704 | 99500  | 5.2213        |
+| 8.8145 | 100000 | 5.1571        |
+| 8.8585 | 100500 | 5.1292        |
+| 8.9026 | 101000 | 5.1531        |
+| 8.9467 | 101500 | 5.2251        |
+| 8.9907 | 102000 | 5.0733        |
+| 9.0348 | 102500 | 5.1313        |
+| 9.0789 | 103000 | 5.1973        |
+| 9.1230 | 103500 | 5.1133        |
+| 9.1670 | 104000 | 5.1813        |
+| 9.2111 | 104500 | 5.2472        |
+| 9.2552 | 105000 | 5.1352        |
+| 9.2993 | 105500 | 5.0373        |
+| 9.3433 | 106000 | 5.1791        |
+| 9.3874 | 106500 | 5.1891        |
+| 9.4315 | 107000 | 5.0832        |
+| 9.4755 | 107500 | 5.1632        |
+| 9.5196 | 108000 | 5.1012        |
+| 9.5637 | 108500 | 5.0354        |
+| 9.6078 | 109000 | 5.2112        |
+| 9.6518 | 109500 | 5.1212        |
+| 9.6959 | 110000 | 5.1212        |
+| 9.7400 | 110500 | 5.1612        |
+| 9.7840 | 111000 | 5.1613        |
+| 9.8281 | 111500 | 5.1072        |
+| 9.8722 | 112000 | 5.0852        |
+| 9.9163 | 112500 | 5.0234        |
+| 9.9603 | 113000 | 5.1033        |
+</details>
+### Framework Versions
+- Python: 3.12.8
+- Sentence Transformers: 3.4.1
+- Transformers: 4.52.4
+- PyTorch: 2.6.0+cu126
+- Accelerate: 1.3.0
+- Datasets: 3.2.0
+- Tokenizers: 0.21.0
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### ContrastiveTensionLoss
+```bibtex
+@inproceedings{carlsson2021semantic,
+    title={Semantic Re-tuning with Contrastive Tension},
+    author={Fredrik Carlsson and Amaru Cuba Gyllensten and Evangelia Gogoulou and Erik Ylip{"a}{"a} Hellqvist and Magnus Sahlgren},
+    booktitle={International Conference on Learning Representations},
+    year={2021},
+    url={https://openreview.net/forum?id=Ov_sMNau-PF}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.4",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.4.1",
+    "transformers": "4.52.4",
+    "pytorch": "2.6.0+cu126"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5223a9f4c56d5391436409acb852c6851f42821b2280087e759e95f39a6f2c59
+size 437951328

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 512,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "max_length": 512,
+  "model_max_length": 512,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff