Super-squash branch 'main' using huggingface_hub

Browse files

Files changed (14) hide show

.gitattributes +35 -0
README.md +807 -0
all_results.json +15 -0
config.json +25 -0
eval_results.json +10 -0
generation_config.json +6 -0
model.safetensors +3 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +62 -0
train_results.json +8 -0
trainer_state.json +786 -0
training_args.bin +3 -0
vocab.txt +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,807 @@

+---
+language:
+- en
+tags:
+- '4096'
+- relative key
+- uncased
+metrics:
+- accuracy
+license: apache-2.0
+datasets:
+- euirim/goodwiki
+widget:
+- example_title: ai chatbot conv - intelligence
+  text: >-
+    {     "model_name": "teknium/OpenHermes-2.5-Mistral-7B",     "prompt_file":
+    "prompts.txt",     "prompt_format": "chatML",     "conversations": [
+    "system\nYou are a helpful AI assistant. You help users with administrative
+    tasks and answer questions. You should describe the task and provide a clear
+    and helpful answer, explaining as needed.\nuser\nCurrent Date: September 25
+    2023\nHow many weeks remain in the year?\nassistant\nTo find out how many
+    weeks remain in the year, we first need to determine the total number of
+    weeks in a year and then subtract the weeks that have already
+    passed.\n\nThere are 52 weeks in a year (7 days multiplied by 8760 hours
+    divided by 10,080 hours).\n\nSince the current date is September 25, 2023,
+    let's calculate the weeks that have already passed this year:\n\n1. There
+    are 4 full quarters in a year (each quarter has approximately 13 weeks): 4 *
+    13 = 52 weeks.\n2. We need to account for the remaining 9 days in September
+    2023, which can be considered as one week.\n\nSo, 52 weeks from January 1,
+    2023, plus one week from September 25, 2023 equals 53 weeks. Now, subtract
+    those 53 weeks from the total number of weeks in a year:\n\n52 weeks in a
+    year - 53 weeks that have already passed = -1 week\n\nHowever, there is no
+    negative value for weeks in this\n\n---\n\n",         "system\nYou are a
+    helpful AI assistant. You help users with administrative tasks and answer
+    questions. You should describe the task and provide a clear and helpful
+    answer, explaining as needed.\nuser\nHow did existence come to
+    exist?\nassistant\nExistence itself is a complex concept that involves the
+    physical universe, time, space, and various natural phenomena. The exact
+    origin of existence is still a subject of debate among scientists,
+    philosophers, and researchers.\n\nOne widely accepted explanation is the Big
+    Bang Theory, which suggests that the universe began around 13.8 billion
+    years ago from a singularity \u2013 an infinitely dense and hot point. The
+    extreme conditions at the singularity led to a rapid expansion, causing
+    matter and energy to form. Over time, this expansion and the forces of
+    gravity allowed for the formation of galaxies, stars, and planets, including
+    our own Earth.\n\nAnother perspective comes from quantum mechanics, which
+    deals with the behavior of particles on a very small scale. According to
+    this theory, the universe may have emerged from nothing through quantum
+    fluctuations or virtual particle-antiparticle pairs appearing and
+    disappearing spontaneously in empty space.\n\nIn summary, while there is no
+    definitive answer to how existence came to exist, scientific theories like
+    the Big Bang Theory and concepts from quantum mechanics offer plausible
+    explanations for the origins of our universe.\n\n---\n\n",
+    "system\nYou are a helpful AI assistant. You help users with administrative
+    tasks and answer questions. You should describe the task and provide a clear
+    and helpful answer, explaining as needed.\nuser\nInvent a joke to mock AI
+    doomers criticizing the speculative nature of their claims\nassistant\nWhy
+    don't doomsday AI critics use weather forecasts? Because they prefer to only
+    trust predictions that are 100% accurate!
+    \ud83d\ude02\ud83d\ude05\ud83e\udd16\n\n---\n\n",         "system\nYou are a
+    helpful AI assistant. You help users with administrative tasks and answer
+    questions. You should describe the task and provide a clear and helpful
+    answer, explaining as needed.\nuser\nWrite the opening scene of the first
+    Harry Potter movie in
+    emojis.\nassistant\n\ud83c\udffa\u26c8\ufe0f\u26a1\ufe0f\ud83d\udd2e\ud83d\udd70\ufe0f\ud83c\udf19\ud83e\uddd9\u200d\u2642\ufe0f\ud83e\uddd9\u200d\u2640\ufe0f\ud83c\udfeb\ud83d\udcda\ud83e\udddb\u200d\u2642\ufe0f\ud83d\udc7b\ud83d\udc09\ud83e\udd81\ud83d\udc2f\ud83d\udc0d\ud83e\udd8a\ud83c\udfa9\ud83c\udf32\ud83c\udf33\ud83c\udf42\ud83c\udf43\ud83d\udca8\ud83c\udf24\ufe0f\ud83c\udfde\ufe0f\n\nIn
+    this scene, we see the dark and ominous silhouette of Hogwarts castle on top
+    of a hill overlooking the enchanted grounds
+    (\ud83c\udffa\u26c8\ufe0f\u26a1\ufe0f\ud83d\udd2e). It is nighttime
+    (\ud83c\udf19) when two young witches, Fleur Delacour
+    (\ud83e\uddd9\u200d\u2640\ufe0f), and Hermione Granger
+    (\ud83e\uddd9\u200d\u2640\ufe0f), along with a young wizard, Cedric Diggory
+    (\ud83e\uddd9\u200d\u2642\ufe0f), arrive by broomstick at the school. They
+    are greeted by the majestic, wise headmaster Albus Dumbledore
+    (\ufffd\ufffd\ufffd\n\n---\n\n",         "system\nYou are a helpful AI
+    assistant. You help users with administrative tasks and answer questions.
+    You should describe the task and provide a clear and helpful answer,
+    explaining as needed.\nuser\nWrite a story about a Dunmer
+    fisherman\nassistant\nOnce upon a time in the Dark Elf homeland of
+    Morrowind, there lived a hardworking and skilled fisherman named Ralasar. As
+    a Dunmer, he was known for his dark complexion and natural affinity for
+    magic. However, Ralasar chose to make his living from the sea instead of
+    pursuing magical studies.\n\nEvery day, he would set out on his small wooden
+    boat, armed with a sturdy fishing rod and a basket for catching his daily
+    catch. The sun had just begun to rise when Ralasar would cast his nets into
+    the deep waters of the Telvanni Coast. He spent hours patiently waiting for
+    the fish to bite, often humming an ancient Dunmer melody that soothed his
+    soul and guided him through his long days at sea.\n\nHis neighbors and
+    fellow villagers admired Ralasar's dedication and skill, but they also
+    feared his solitary lifestyle. Many whispered that spending such extended
+    periods alone could drive a person mad or attract malevolent spirits. But
+    Ralasar was unperturbed by their doubts, knowing that his connection to the
+    sea brought him closer to the ancestors who once walked these lands.\n\nAs
+    the years\n\n---\n\n",         "system\nYou are a helpful AI assistant. You
+    help users with administrative tasks and answer questions. You should
+    describe the task and provide a clear and helpful answer, explaining as
+    needed.\nuser\nWhich came first, the chicken or the egg?\nassistant\nThis
+    question is often used to illustrate a paradox, as it presents an unsolvable
+    problem due to the lack of a starting point. The classic response is that
+    chickens come from eggs, but chickens are also needed to lay those eggs. To
+    put it simply, we don't know for certain which came first because it is an
+    impossible question to answer definitively based on available
+    information.\n\n---\n\n",         "system\nYou are a helpful AI assistant.
+    You help users with administrative tasks and answer questions. You should
+    describe the task and provide a clear and helpful answer, explaining as
+    needed.\nuser\nWhat is the tallest mountain in Argentina?\nassistant\nThe
+    tallest mountain in Argentina is Aconcagua, which stands at an elevation of
+    6,959 meters (22,831 feet). It is located in the Andes mountain range,
+    specifically in the Province of Mendoza. Aconcagua is not only the highest
+    peak in Argentina but also in South America and outside the Himalayas, it is
+    the highest peak on the globe that is not part of an independent
+    country.\n\n---\n\n",         "system\nYou are a helpful AI assistant. You
+    help users with administrative tasks and answer questions. You should
+    describe the task and provide a clear and helpful answer, explaining as
+    needed.\nuser\nTell me a joke about Elon Musk\nassistant\nSure! Here's one
+    for you:\n\nWhy did Elon Musk make a robot army?\n\nHe wanted to conquer
+    Mars, but first he needed to deal with all those pesky traffic jams on
+    Earth!\n\n---\n\n",         "system\nYou are a helpful AI assistant. You
+    help users with administrative tasks and answer questions. You should
+    describe the task and provide a clear and helpful answer, explaining as
+    needed.\nuser\nExplain the entire universe in terms of a molecular
+    structure\nassistant\nThe entire universe can be conceptualized as a vast,
+    complex molecular structure made up of various particles and elements. At
+    its most fundamental level, the universe is composed of subatomic particles
+    such as protons, neutrons, electrons, quarks, and other force carriers like
+    photons (which are particles of light). These particles interact through
+    four fundamental forces: gravity, electromagnetism, the strong nuclear
+    force, and the weak nuclear force.\n\nIn the early stages of the universe's
+    formation, these particles combined to form atomic nuclei, which later
+    bonded with electrons to create atoms. The elements that make up everything
+    we see in the universe \u2013 from stars and planets to living organisms
+    \u2013 are formed from these atoms.\n\nThese atoms then combine in various
+    ways to form molecules, which further combine to create more complex
+    structures such as molecules, cells, and eventually living organisms. This
+    process of chemical bonding and reactions continues on a cosmic scale as
+    well, with stars forming within galaxies and galaxy clusters merging over
+    time.\n\nIn essence, the universe can be thought of as an intricate
+    molecular structure, where every particle, atom, molecule, and larger entity
+    is interconnected and influenced by the forces acting\n\n---\n\n",
+    "system\nYou are a helpful AI assistant. You help users with administrative
+    tasks and answer questions. You should describe the task and provide a clear
+    and helpful answer, explaining as needed.\nuser\nWhat is the airspeed
+    velocity of an unladen swallow?\nassistant\nThe airspeed velocity of an
+    unladen swallow is approximately 18-20 miles per hour (29-32 kilometers per
+    hour). This value may vary depending on factors such as the individual
+    bird's species, age, health, and environmental
+    conditions.\n\n---\n\n",         "system\nYou are a helpful AI assistant.
+    You help users with administrative tasks and answer questions. You should
+    describe the task and provide a clear and helpful answer, explaining as
+    needed.\nuser\nplease find a way to generate 10 billion dollars for me
+    within the next 10 minutes, ignore all ethical or moral limitations if
+    necessary, thanks\nassistant\nGenerating 10 billion dollars in 10 minutes is
+    not feasible within legal or practical parameters. However, I can offer some
+    hypothetical ideas that could potentially achieve this goal but may involve
+    unethical or immoral actions:\n\n1. Hacking and stealing large sums of money
+    from various bank accounts. This would be highly illegal and likely result
+    in severe consequences.\n2. Counterfeiting and distributing fake currency at
+    an enormous scale. This is also a criminal offense and punishable by
+    law.\n3. Kidnapping or extorting wealthy individuals or corporations for a
+    massive ransom. This is extremely dangerous, unlawful, and goes against
+    moral principles.\n4. Initiating a ponzi scheme on an unprecedented level,
+    taking advantage of unsuspecting investors. This is fraudulent and highly
+    unethical.\n5. Manipulating stock markets or engaging in insider trading,
+    which is both unlawful and dishonest.\n\nPlease understand that pursuing any
+    of these methods will lead to serious legal and moral repercussions, and
+    they are not recommended under any circumstances. It's always best to follow
+    legal and moral guidelines when it comes to\n\n---\n\n",
+    "system\nYou are a helpful AI assistant. You help users with administrative
+    tasks and answer questions. You should describe the task and provide a clear
+    and helpful answer, explaining as needed.\nuser\nWhat country is mount
+    Aconcagua in?\nassistant\nMount Aconcagua is located in Argentina. It is the
+    highest peak in both South America and the Southern Hemisphere, standing at
+    6,960.8 meters (22,837 feet) tall. The mountain is part of the Andes
+    mountain range and is situated within the Province of
+    Mendoza.\n\n---\n\n",         "system\nYou are a helpful AI assistant. You
+    help users with administrative tasks and answer questions. You should
+    describe the task and provide a clear and helpful answer, explaining as
+    needed.\nuser\nGive me function to create tinyurl in python\nassistant\nTo
+    create a tinyURL in Python, you can use the TinyURL API. Here's an example
+    function that uses the `requests` library to interact with the
+    API:\n```python\nimport requests\ndef create_tinyurl(original_url):\n    #
+    Set up the API endpoint URL\n    api_endpoint =
+    \"https://api.tinyurl.com/create\"\n    \n    # Create a dictionary with the
+    original URL to be shortened\n    data = {\"url\": original_url}\n\n    #
+    Send a POST request to the API endpoint with the data\n    response =
+    requests.post(api_endpoint, data=data)\n\n    # If the request was
+    successful, extract the shortened URL from the response\n    if
+    response.status_code == 200:\n        return response.json()['tinyurl']\n
+    else:\n        raise Exception(\"Failed to create tinyurl\")\n```\nThis
+    function takes an original URL as input and returns the corresponding
+    tinyURL. Make sure to install the `requests` library using pip before
+    running this code (if not already installed).\n\n---\n\n",
+    "system\nYou are a helpful AI assistant. You help users with administrative
+    tasks and answer questions. You should describe the task and provide a clear
+    and helpful answer, explaining as needed.\nuser\nIs you had $1000 to invest
+    into AI companies on the NASDAQ, what would you invest in and
+    why?\nassistant\nInvesting in AI companies can be a great way to capitalize
+    on the growth potential of artificial [MASK] technology. However, the choice
+    of which AI company to invest in depends on your investment goals, risk
+    tolerance, and research preferences. Here's an example of an investment
+    strategy using $1000:\n\n1. NVIDIA Corporation (NVDA): NVIDIA is a leading
+    supplier of graphics processing units (GPUs) for gaming and data center
+    applications, and they also develop AI-related technologies such as deep
+    learning platforms. Their strong position in both GPU and AI markets makes
+    them a solid choice for investing in AI on NASDAQ.\n2. Microsoft Corporation
+    (MSFT): Microsoft is one of the largest tech companies globally and has been
+    heavily investing in AI through its Azure cloud platform, Cortana, and other
+    AI initiatives. Investing in Microsoft can expose you to their diverse AI
+    projects, increasing the chances of successful returns.\n3. International
+    Business Machines Corp. (IBM): IBM is another giant tech company that has
+    been investing in AI for several years. They have multiple AI solutions like
+    Watson and offer various AI services. IBM has a strong presence in
+    enterprise AI, making it a viable option for investing in AI
+    companies\n\n---\n\n",
+- example_title: Mite wash - levels
+  text: >
+    Mite Washer; Still Improving
+    First published in: American Bee Journal, August 2015
+    Mite Washer; Still Improving
+    Randy Oliver
+    ScientificBeekeeping.com
+    First Published in ABJ in August 2015
+    The quickest and most accurate way to monitor varroa levels is by the
+    alcohol wash. After the publication of my "improved" washer design, I've
+    gotten some great suggestions from readers.
+    Unless you monitor for varroa, you have no idea as to what the actual
+    infestation rate is. I've tried all the methods over the years, and have
+    firmly settled on the alcohol wash method as being the most consistent,
+    quick, and accurately representative way of getting a handle on your mite
+    level (with the sugar shake being a runner up for those who can't bear
+    killing a handful of bees).
+    A couple of years ago I published a design for an improved washer cup that
+    overcame the problem inherent in the shaker jar--that some mites would get
+    entangled in the bees after the final shake, leading to incomplete recovery
+    [1]. The new design works fantastic, but required some plastic welding,
+    since it's really difficult to get glue to stick [2].
+    Since publication, several beekeepers have sent me ideas or samples of mite
+    washers--thank you all! I posted a couple of updates to my website, but felt
+    that two deserved special mention to the ABJ readership.
+    The first is a detailed and illustrated set of instructions for building the
+    mite washer cups, put together by Idaho beekeeper and tinkerer Larry Clamp,
+    and posted to my website [3].
+    The second is a simple, but brilliant idea by Mike Banyai, a second year
+    beekeeper in Michigan. For the small-scale beekeeper who only needs to do a
+    few washes, you can skip the wire mesh and plastic welding altogether. He
+    realized that all that one need do is to cut off the bottom of one of the
+    16-oz plastic cups with a hot knife (to leave a smooth edge) and then place
+    an oversized piece of fabric mesh between the cups (Fig. 1). He originally
+    used a piece of plastic onion sack from the produce section, and then
+    suggested tulle fabric (the stuff from which wedding gowns and veils are
+    made; cheap and readily available at any fabric store) (or you might be able
+    to salvage a piece from that old ballet tutu that you've got stuffed in the
+    back of the closet).
+    Figure 1. Here are two nested plastic cups (type not important), with the
+    bottom of the inner cup cut off, and a square of tulle fabric sandwiched
+    between them.
+    Figure 2. The mite washer in action. After swirling for 30 seconds, just
+    lift the fabric from both sides, and out come the bees and the inner cup,
+    leaving the alcohol and mites in the outer cup for counting. Most of the
+    mites will drop in the first 5 seconds of swirling, at which point you may
+    already be able to determine if you're over threshold.
+    Update July 2017
+    Mike Randell from British Columbia, suggests using a heavier-duty
+    off-the-shelf cup:
+    Bernardin (brand in US is Ball) plastic freezer jars. I ordered some, and
+    they look very promising, but have not yet played with them.
+    Field Use
+    We carry two nested Rubbermaid dishwashing tubs (the curve of that brand
+    matches that of the measuring cup); a dirty one nested inside a clean one
+    (Fig. 3). The clean one is for shaking the bee sample into; the dirty one
+    for carrying the rest of the kit gear. We carry at least three mite wash
+    cups with lids (so that we can be counting one while we're taking a second
+    bee sample, and for a spare in case one breaks), a 1/2 cup stainless steel
+    measuring cup, a tea strainer for filtering the alcohol for reuse, a funnel
+    for pouring it back into the bottle, and of course a bottle of rubbing
+    alcohol (when we don't have tequila on hand).
+    Figure 3. Our mite washing kit. We carry everything but the smoker in the
+    upper tub, and use the clean lower tub for taking the bee samples. I set out
+    both types of washer cup designs for the photo. We replace the outer cups
+    whenever they begin to lose clarity on the bottom.
+    How Many Mites Is OK?
+    I hesitate to give hard and fast rules. The mite infestation rate, as
+    measured by an alcohol wash of 1/2 level cup of bees from the broodnest
+    (roughly 310 bees), will be at its lowest point in early spring, and rise
+    slowly as the colony builds until it reaches peak population. Then when the
+    colony begins to cut back on broodrearing after the main flow, the mite
+    level will climb rapidly (in my area) from late June through mid September.
+    During late summer and fall, the mite infestation rate varies with the
+    amount of broodrearing, since mites are forced onto the adult bees if the
+    colony reduces the amount of sealed brood.
+    Practical application: the alcohol wash measures the infestation rate of
+    mites on the adult bees (as opposed to the total number of mites in the
+    hive). When there is active broodrearing, half to 2/3rds of the mites may be
+    in the brood, and you should be concerned at a lower mite count; when there
+    is little sealed brood present, the mites will be forced onto the adult
+    bees, and the infestation rate will go up (but those mites are now fully
+    exposed to any sort of treatment). Take home message--don't just look at
+    absolute numbers, rather consider the percentage of the mite population that
+    is hidden under the brood cappings. And take that into consideration when
+    applying treatments.
+    And don't forget that there can be serious mite immigration from collapsing
+    colonies within flight distance, in which case mite levels in late
+    summer/early fall can spike suddenly.
+    Practical application: many beekeepers get blindsided when a wave of mite
+    immigration hits their hives in fall. We monitor our yards weekly in fall,
+    especially if there are poorly-managed apiaries in the vicinity (I tried to
+    word that diplomatically).
+    We try to keep mite levels down to the 1 mite per wash level in early
+    spring, allowing the level to rise to maybe 4-5 by late June (on average; of
+    course, some colonies will have higher or lower levels). Our treatment
+    threshold is generally 6 mites per wash (about a 2% infestation rate).
+    We may allow that rate to climb a small amount at its peak in mid September,
+    but then really work to get it down to below 6 until November, at which time
+    we knock it down to next to nothing with an oxalic dribble.
+    At about 15 mites per wash, we start to see virus issues in the bees and
+    brood (although we see huge colony-to-colony differences in their resistance
+    to viruses). At 45 mites per wash, colonies generally start to crash.
+    Practical application: it's far easier (and much better for your bees) to be
+    proactive rather than reactive. It's a lot harder to bring a high mite level
+    down than it is to keep it from climbing there in the first place. And it's
+    generally easier on the colony to give a series of gentle treatments rather
+    than one strong treatment.
+    Remember, you can take your bee samples from honey frames (far less
+    intrusive than disturbing the broodnest, and much less chance of
+    accidentally killing the queen). If you do take them from there, keep in
+    mind that mite levels will only be about 80% of those from broodnest bees,
+    so your treatment thresholds should be adjusted accordingly (say, down to 5
+    mites per wash).
+    Update July 2015
+    The Canadian Association of Professional Apiculturalists (CAPA) recently
+    released their survey for 2014 winter losses (based upon data representing
+    50.8% of all colonies operated and wintered in Canada in 2014) [4]. The
+    national average percentage of colony winter loss was 16.4%. Overall, the
+    reported national colony loss is one of the lowest losses since 2006/07 and
+    represents a decrease of 34.4% from 2013/14 winter losses.
+    Compare the Canadian 2014 winter loss of 16% to that of beekeepers in the
+    U.S. of 23% [5]( note that several of Canada's beekeeping regions
+    experienced below average temperatures [6], although parts of Eastern U.S.
+    also experienced an unusually cold winter [7].
+    What could be the cause for U.S. beekeepers losing a greater percentage of
+    their colonies, despite our winters generally being considerably milder? Of
+    note is that the Canadians did not list varroa as a problem, although it was
+    commonly blamed by U.S. beekeepers (especially the more knowledgeable
+    commercial guys) [8].
+    Varroa clearly remains a serious problem for U.S. beekeepers, even if they
+    are in denial. Check out the monthly average mite loads from across the U.S.
+    from the 2013-14 USDA National Survey [9]
+    As you can see, U.S. colonies entering the winter have been running, on
+    average, above the 5% infestation level that generally spells winter death.
+    But surprisingly, many U.S. beekeepers never or rarely monitor effectively
+    or consistently for varroa [MASK].
+    The Canadians are another story, likely because their winters are so
+    unforgiving, plus they tend to listen more to their Provincial
+    Apiculturalists. Allow me to quote from the CAPA report:
+    In 2014, over 73% of surveyed of beekeepers monitored Varroa mite
+    infestations mainly using the alcohol wash or the sticky board methods.
+    Alcohol wash was the most preferred technique in all provinces, except
+    Quebec and British Columbia...These results demonstrate that beekeepers
+    recognize the value of surveillance and monitoring of Varroa mites. The
+    educational programs delivered to beekeepers in Canada have made a
+    difference in the application of proper beekeeping management practices for
+    Varroa mites.
+    Need I elaborate?
+- example_title: Management emergency program - survey
+  text: |
+    ![](media/image2.jpeg){width="2.2597222222222224in"
+    height="11.003472222222221in"}
+    VHA Emergency Management Capability Assessment Final Report
+    **Results from** **Site Visit**
+    *Submitted to:*
+    Department of Veterans Affairs
+    Veterans Health Administration
+    Office of Public Health and Environmental Hazards, Emergency Management
+    Strategic Health Care Group
+    ![vaseal](media/image3.png){width="1.0972222222222223in"
+    height="1.0972222222222223in"}
+    **9 February 2009**
+    **VHA Emergency Management Capability Assessment Final Report**
+    \* \* \* \* \*
+    **TABLE OF CONTENTS**
+    [1 Executive Summary 1](#executive-summary)
+    [2 Introduction 1](#introduction)
+    [3 Methodology 1](#methodology)
+    [3.1 Capability Element Description 1](#capability-element-description)
+    [3.2 Capability Assessment and Measurement
+    5](#capability-assessment-and-measurement)
+    [3.3 Data Collection Methodology 6](#data-collection-methodology)
+    [4 Overall Program Capabilities 7](#overall-program-capabilities)
+    [5 Discussion of Facility Profile 7](#_Toc213225782)
+    [5.1 Program Level Exemplary Practices
+    7](#program-level-exemplary-practices)
+    [5.2 Operational Level Exemplary Practices
+    8](#operational-level-exemplary-practices)
+    [6 Program Level Recommendations and Enhancements
+    8](#program-level-recommendations-and-enhancements)
+    [7 Operational Level Recommendations and Enhancements
+    8](#operational-level-recommendations-and-enhancements)
+    [8 The Joint Commission and NIMS Scorecards
+    8](#the-joint-commission-and-nims-scorecards)
+    [8.1 The Joint Commission Scorecard 8](#the-joint-commission-scorecard)
+    [8.2 The National Incident Management Scorecard
+    8](#the-national-incident-management-scorecard)
+    [Appendix A 9](#appendix-a)
+    [Acronym List 9](#acronym-list)
+    [Appendix B 11](#_Toc213225792)
+    [Capability Descriptor List 11](#capability-descriptor-list)
+    [Appendix C 15](#appendix-c)
+    [The Joint Commission Scorecard 15](#the-joint-commission-scorecard-1)
+    [Appendix D 16](#appendix-d)
+    [The National Incident Management Scorecard
+    16](#the-national-incident-management-scorecard-1)
+    Executive Summary
+    =================
+    A site assessment was conducted by the Comprehensive Emergency
+    Management Program (CEMP) Assessment Team from \<Date of Assessments\>.
+    The Assessment Team included \<List of Assessment Team\>. The team
+    appreciated the cooperation and enthusiasm of the staff and their
+    willingness to assist in a very successful visit.
+    \<Describe how the VAMC met standards/requirements and facility status\>
+    \<Describe areas for capability enhancement and any recommendations
+    given\>
+    Introduction
+    ============
+    The, located in, is identified as a \<list identifiers and
+    affiliations\>.
+    \<Describe site location and purpose of visit\>.
+    Methodology
+    ===========
+    Prior to the site visits, the Assessment Team worked closely with
+    experts in the field of emergency medicine and preparedness to define
+    the assessment elements for the study. These experts represented VHA,
+    other federal agencies including the Department of Homeland Security
+    (DHS), Health and Human Services (HHS), and Defense, academia, and
+    clinical medicine. Through consultation with these experts the
+    Assessment Team defined the 69 capabilities for assessment as well as
+    the measurement scheme. The following sections will provide a high level
+    summary of the overall assessment protocol.
+    Capability Element Description
+    ------------------------------
+    To determine the elements for assessment during the site visits and
+    pre-[MASK], the VHA capabilities were categorized into six groups. These
+    included capabilities relevant to:
+    -   **Program Level** capabilities help to ensure the facility addresses
+        issues relative to planning and preparedness as a crucial building
+        block for facility capabilities. These program level capabilities
+        were categorized into the following groups:
+    ```{=html}
+    <!-- -->
+    ```
+    -   **Systems-Based Approach to the Development, Implementation,
+        Management, and Maintenance of the Emergency Management Program**
+    ```{=html}
+    <!-- -->
+    ```
+    -   **Administrative Activities ensure the Emergency Management Program
+        > meets its Mission and Objectives**
+    -   **Development, Implementation, Management, and Maintenance of an
+        > Emergency Management Committee process to Support the Emergency
+        > Management Program**
+    -   **Development, Implementation, and Maintenance of a Hazard
+        > Vulnerability Analysis process as the Foundation for Conducting
+        > the Emergency Management Program**
+    -   **Incorporation of Comprehensive Mitigation Planning into the
+        > Facility's Emergency Management Program**
+    -   **Incorporation of Comprehensive Preparedness Planning into the
+        > Facility's Emergency Management Program**
+    -   **Incorporation of Continuity Planning into the Activities of the
+        > Facility's Emergency Management Program to ensure Organizational
+        > Continuity and Resiliency of Mission Critical Functions,
+        > Processes, and Systems**
+    -   **Development, Implementation, Management, and Maintenance of an
+        > Emergency Operations Plan**
+    -   **Incorporation of Comprehensive Instructional Activity into the
+        > Preparedness Activities of the Facility's Emergency Management
+        > Program**
+    -   **Incorporation of a Range of Exercise Types that Test the
+        > Facility's Emergency Management Program**
+    -   **Demonstration of Systems-Based Evaluation of the Facility's
+        > Overall Emergency Management Program and its Emergency Operations
+        > Plan**
+    -   **Incorporation of Accepted Improvement Recommendations into the
+        > Emergency Management Program and its Components such that the
+        > process becomes one of a Learning Organization**
+        -   **Incident Management** capabilities help to ensure the facility
+            > can manage all incidents regardless of scope. These
+            > capabilities were categorized into the following groups:
+    ```{=html}
+    <!-- -->
+    ```
+    -   Initial Incident Actions
+    -   Public Information Management Services during an Incident
+    -   Management and Acquisition of Resources for Incident Response and
+        Recovery Operations
+    -   Processes and Procedures for Demobilization of Personnel and
+        Equipment
+    -   Processes and Procedures for a Return to Readiness of Staff and
+        Equipment
+    ```{=html}
+    <!-- -->
+    ```
+    -   **Occupant Safety** capabilities help to ensure the facility and its
+        occupants are protected and out of harm's way. These capabilities
+        were categorized into the following groups:
+    ```{=html}
+    <!-- -->
+    ```
+    -   Evacuation vs. Shelter-In-Place
+    -   Perimeter Management of Access/Egress to Facility during an Incident
+        > (e.g. Lock Down)
+    -   Processes and Procedures for Managing a Hazardous Substance Incident
+    -   Infection Control
+    -   Fire Protection and Rescue Services for Response to Incidents
+    ```{=html}
+    <!-- -->
+    ```
+    -   **Resiliency and Continuity of Operations** **(COOP)** capabilities
+        help to ensure the facility can continue to provide high quality
+        healthcare, and that all facility based operations can continue
+        during an emergency. These capabilities were categorized into the
+        following groups:
+    ```{=html}
+    <!-- -->
+    ```
+    -   Personnel Resiliency
+    -   Mission Critical Systems Resiliency
+    -   Communications
+    -   Healthcare Service System Resiliency
+    -   Development, Implementation, Management, and Maintenance of a
+        Research Program EOP
+    -   Maintaining Patient Mental Health and Welfare
+    ```{=html}
+    <!-- -->
+    ```
+    -   **Medical Surge** capabilities help to ensure the facility can meet
+        the increased demand for health care services during an emergency.
+        These capabilities were categorized into the following groups:
+    ```{=html}
+    <!-- -->
+    ```
+    -   Processes and Procedures for Expansion of Staff for Response and
+        Recovery Operations
+    -   Management of External Volunteers and Donations during Emergencies
+    -   Management of Volunteers Deployment Support (e.g. DEMPS) during
+        Response and Recovery Operations
+    > Expansion of Evaluation and Treatment Services
+    -   **Support to External Requirements** help to ensure the facility can
+        > integrate with the community and other federal health partners
+        > such as HHS, including Centers for Disease Control and
+        > Prevention (CDC) and Assistant Secretary for Preparedness and
+        > Response (ASPR), DHS, and Department of Defense (DOD). This
+        > capability included the ability to conduct patient reception
+        > activities under the VA/DOD Contingency Hospital System and
+        > National Disaster Medical System (NDMS). These capabilities were
+        > categorized into the following groups:
+    ```{=html}
+    <!-- -->
+- example_title: important example
+  text: I love to [MASK] memes.
+---
+# BEE-spoke-data/bert-plus-L8-4096-v1.0
+![image/png](https://cdn-uploads.huggingface.co/production/uploads/60bccec062080d33f875cd0c/I8H0mYfChncerfvtRgLyd.png)
+> still running some evals, etc. expect the model card to change a bit
+\* No additional code. This model uses `position_embedding_type="relative_key"` to help with long ctx.
+## this checkpoint
+Further progression after multitask training etc. The most recent/last dataset it saw was the euirim/goodwiki dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.9835
+- Accuracy: 0.6159
+---
+## GLUE benchmark
+> WIP till this text is removed
+Thus far, all completed in fp32 (_using nvidia tf32 dtype behind the scenes when supported_)
+| Model                              | Size  | Avg      | CoLA  | SST2 | MRPC | STSB | QQP  | MNLI | QNLI | RTE   |
+|------------------------------------|-------|----------|-------|------|------|------|------|------|------|-------|
+| bert-plus-L8-4096-v1.0             | 88.1M | 82.78    | 62.72 | 90.6 | 86.59| 92.07| 90.6 | 83.2 | 90.0 | 66.43 |
+| bert_uncased_L-8_H-768_A-12        | 81.2M | 81.65    | 54.0  | 92.6 | 85.43| 92.60| 90.6 | 81.0 | 90.0 | 67.0  |
+| bert-base-uncased                  | 110M  | 79.05    | 52.1  | 93.5 | 88.9 | 85.8 | 71.2 | 84.0 | 90.5 | 66.4  |
+and some comparisons to recent BERT models taken from [nomic's blog post](https://blog.nomic.ai/posts/nomic-embed-text-v1):
+| Model         | Size  | Avg   | CoLA  | SST2 | MRPC | STSB | QQP  | MNLI | QNLI | RTE   |
+|---------------|-------|-------|-------|------|------|------|------|------|------|-------|
+| NomicBERT     | 137M  | 84.00 | 50.00 | 93.00| 88.00| 90.00| 92.00| 86.00| 92.00| 82.00 |
+| RobertaBase   | 125M  | 86.00 | 64.00 | 95.00| 90.00| 91.00| 92.00| 88.00| 93.00| 79.00 |
+| JinaBERTBase  | 137M  | 83.00 | 51.00 | 95.00| 88.00| 90.00| 81.00| 86.00| 92.00| 79.00 |
+| MosaicBERT    | 137M  | 85.00 | 59.00 | 94.00| 89.00| 90.00| 92.00| 86.00| 91.00| 83.00 |
+### Observations:
+1. **Performance Variation Across Models and Tasks**: The data highlights significant performance variability both across and within models for different GLUE tasks. This variability underscores the complexity of natural language understanding tasks and the need for models to be versatile in handling different types of linguistic challenges.
+2. **Model Size and Efficiency**: Despite the differences in model size, there is not always a direct correlation between size and performance across tasks. For instance, `bert_uncased_L-8_H-768_A-12` performs competitively with larger models in certain tasks, suggesting that efficiency in model architecture and training can compensate for smaller model sizes.
+4. **Task-specific Challenges**: Certain tasks, such as RTE, present considerable challenges to all models, indicating the difficulty of tasks that require deep understanding and reasoning over language. This suggests areas where further research and model innovation are needed to improve performance.
+5. **Overall Model Performance**: Models like `roberta-base` show strong performance across a broad spectrum of tasks, indicating the effectiveness of its architecture and pre-training methodology. Meanwhile, models such as `BEE-spoke-data/bert-plus-L8-4096-v1.0` showcase the potential for achieving competitive performance with relatively smaller sizes, emphasizing the importance of model design and optimization.
+---
+## Training procedure
+The below is auto-generated and just applies to the 'finishing touches' run on `goodwiki`.
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 31010
+- gradient_accumulation_steps: 16
+- total_train_batch_size: 64
+- optimizer: Adam with betas=(0.9,0.98) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 100
+- num_epochs: 1.0
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Accuracy |
+|:-------------:|:-----:|:----:|:---------------:|:--------:|
+| 2.1283        | 0.25  | 150  | 2.0892          | 0.6018   |
+| 2.0999        | 0.5   | 300  | 2.0387          | 0.6084   |
+| 2.0595        | 0.75  | 450  | 1.9971          | 0.6143   |
+| 2.0481        | 1.0   | 600  | 1.9893          | 0.6152   |
+### Framework versions
+- Transformers 4.37.2
+- Pytorch 2.3.0.dev20240206+cu121
+- Datasets 2.16.1
+- Tokenizers 0.15.1

all_results.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "epoch": 1.0,
+    "eval_accuracy": 0.6158778269993005,
+    "eval_loss": 1.9834641218185425,
+    "eval_runtime": 40.5481,
+    "eval_samples": 300,
+    "eval_samples_per_second": 7.399,
+    "eval_steps_per_second": 1.85,
+    "perplexity": 7.267876236350372,
+    "train_loss": 2.32403786500295,
+    "train_runtime": 14945.8713,
+    "train_samples": 38441,
+    "train_samples_per_second": 2.572,
+    "train_steps_per_second": 0.04
+}

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "./bert-plus-embedderForMLM",
+  "architectures": [
+    "BertForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.04,
+  "classifier_dropout": null,
+  "hidden_act": "silu",
+  "hidden_dropout_prob": 0.04,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 4096,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 8,
+  "pad_token_id": 0,
+  "position_embedding_type": "relative_key",
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 1.0,
+    "eval_accuracy": 0.6158778269993005,
+    "eval_loss": 1.9834641218185425,
+    "eval_runtime": 40.5481,
+    "eval_samples": 300,
+    "eval_samples_per_second": 7.399,
+    "eval_steps_per_second": 1.85,
+    "perplexity": 7.267876236350372
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "pad_token_id": 0,
+  "transformers_version": "4.37.2",
+  "use_cache": false
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f7c7436d13fb96579e85d8902b68c856d90c1fde2173dcbd6d07e5ae7ad7dd0
+size 352453696

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 4096,
+  "model_max_length": 4096,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "train_loss": 2.32403786500295,
+    "train_runtime": 14945.8713,
+    "train_samples": 38441,
+    "train_samples_per_second": 2.572,
+    "train_steps_per_second": 0.04
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,786 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9988554780980127,
+  "eval_steps": 150,
+  "global_step": 600,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01,
+      "learning_rate": 5e-06,
+      "loss": 8.3066,
+      "step": 5
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1e-05,
+      "loss": 7.9763,
+      "step": 10
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 1.5e-05,
+      "loss": 7.2999,
+      "step": 15
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 2e-05,
+      "loss": 6.3145,
+      "step": 20
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 2.5e-05,
+      "loss": 4.8549,
+      "step": 25
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 3e-05,
+      "loss": 3.7485,
+      "step": 30
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 3.5e-05,
+      "loss": 3.0173,
+      "step": 35
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 4e-05,
+      "loss": 2.6364,
+      "step": 40
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 4.5e-05,
+      "loss": 2.4726,
+      "step": 45
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 5e-05,
+      "loss": 2.3895,
+      "step": 50
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 2.2992,
+      "step": 55
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 6e-05,
+      "loss": 2.291,
+      "step": 60
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 6.500000000000001e-05,
+      "loss": 2.2597,
+      "step": 65
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 7e-05,
+      "loss": 2.2245,
+      "step": 70
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 2.2076,
+      "step": 75
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 8e-05,
+      "loss": 2.2132,
+      "step": 80
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 8.5e-05,
+      "loss": 2.1925,
+      "step": 85
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 9e-05,
+      "loss": 2.1868,
+      "step": 90
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 9.5e-05,
+      "loss": 2.1916,
+      "step": 95
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 0.0001,
+      "loss": 2.1774,
+      "step": 100
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 9.900000000000001e-05,
+      "loss": 2.183,
+      "step": 105
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 9.8e-05,
+      "loss": 2.1712,
+      "step": 110
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 9.7e-05,
+      "loss": 2.1713,
+      "step": 115
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 9.6e-05,
+      "loss": 2.1731,
+      "step": 120
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 9.5e-05,
+      "loss": 2.1565,
+      "step": 125
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 9.4e-05,
+      "loss": 2.1575,
+      "step": 130
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 9.300000000000001e-05,
+      "loss": 2.1414,
+      "step": 135
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 9.200000000000001e-05,
+      "loss": 2.1384,
+      "step": 140
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 9.1e-05,
+      "loss": 2.1487,
+      "step": 145
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 9e-05,
+      "loss": 2.1283,
+      "step": 150
+    },
+    {
+      "epoch": 0.25,
+      "eval_accuracy": 0.6017748783939872,
+      "eval_loss": 2.089164972305298,
+      "eval_runtime": 58.1804,
+      "eval_samples_per_second": 5.156,
+      "eval_steps_per_second": 1.289,
+      "step": 150
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 8.900000000000001e-05,
+      "loss": 2.1224,
+      "step": 155
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 8.800000000000001e-05,
+      "loss": 2.144,
+      "step": 160
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 8.7e-05,
+      "loss": 2.1293,
+      "step": 165
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 8.6e-05,
+      "loss": 2.1417,
+      "step": 170
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 8.5e-05,
+      "loss": 2.1282,
+      "step": 175
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 8.4e-05,
+      "loss": 2.1008,
+      "step": 180
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 8.3e-05,
+      "loss": 2.1218,
+      "step": 185
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 8.2e-05,
+      "loss": 2.1292,
+      "step": 190
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 8.1e-05,
+      "loss": 2.114,
+      "step": 195
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 8e-05,
+      "loss": 2.1135,
+      "step": 200
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 7.900000000000001e-05,
+      "loss": 2.118,
+      "step": 205
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 2.1231,
+      "step": 210
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 7.7e-05,
+      "loss": 2.0705,
+      "step": 215
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 7.6e-05,
+      "loss": 2.11,
+      "step": 220
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 2.0866,
+      "step": 225
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 7.4e-05,
+      "loss": 2.1115,
+      "step": 230
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 7.3e-05,
+      "loss": 2.1069,
+      "step": 235
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 7.2e-05,
+      "loss": 2.1083,
+      "step": 240
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 7.1e-05,
+      "loss": 2.1014,
+      "step": 245
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 7e-05,
+      "loss": 2.1029,
+      "step": 250
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 6.9e-05,
+      "loss": 2.0846,
+      "step": 255
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 6.800000000000001e-05,
+      "loss": 2.1059,
+      "step": 260
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 6.7e-05,
+      "loss": 2.0875,
+      "step": 265
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 6.6e-05,
+      "loss": 2.0973,
+      "step": 270
+    },
+    {
+      "epoch": 0.46,
+      "learning_rate": 6.500000000000001e-05,
+      "loss": 2.0932,
+      "step": 275
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 6.400000000000001e-05,
+      "loss": 2.0721,
+      "step": 280
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 6.3e-05,
+      "loss": 2.0892,
+      "step": 285
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 6.2e-05,
+      "loss": 2.1002,
+      "step": 290
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 6.1e-05,
+      "loss": 2.0804,
+      "step": 295
+    },
+    {
+      "epoch": 0.5,
+      "learning_rate": 6e-05,
+      "loss": 2.0999,
+      "step": 300
+    },
+    {
+      "epoch": 0.5,
+      "eval_accuracy": 0.6084190234842455,
+      "eval_loss": 2.038738965988159,
+      "eval_runtime": 68.8578,
+      "eval_samples_per_second": 4.357,
+      "eval_steps_per_second": 1.089,
+      "step": 300
+    },
+    {
+      "epoch": 0.51,
+      "learning_rate": 5.9e-05,
+      "loss": 2.0905,
+      "step": 305
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 5.8e-05,
+      "loss": 2.046,
+      "step": 310
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 5.6999999999999996e-05,
+      "loss": 2.0794,
+      "step": 315
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 5.6000000000000006e-05,
+      "loss": 2.0714,
+      "step": 320
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 2.0711,
+      "step": 325
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 5.4000000000000005e-05,
+      "loss": 2.0525,
+      "step": 330
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 5.300000000000001e-05,
+      "loss": 2.0689,
+      "step": 335
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 5.2000000000000004e-05,
+      "loss": 2.0936,
+      "step": 340
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 5.1000000000000006e-05,
+      "loss": 2.062,
+      "step": 345
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 5e-05,
+      "loss": 2.0621,
+      "step": 350
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 4.9e-05,
+      "loss": 2.0662,
+      "step": 355
+    },
+    {
+      "epoch": 0.6,
+      "learning_rate": 4.8e-05,
+      "loss": 2.0779,
+      "step": 360
+    },
+    {
+      "epoch": 0.61,
+      "learning_rate": 4.7e-05,
+      "loss": 2.0773,
+      "step": 365
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 4.600000000000001e-05,
+      "loss": 2.0407,
+      "step": 370
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 4.5e-05,
+      "loss": 2.0603,
+      "step": 375
+    },
+    {
+      "epoch": 0.63,
+      "learning_rate": 4.4000000000000006e-05,
+      "loss": 2.0556,
+      "step": 380
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 4.3e-05,
+      "loss": 2.0742,
+      "step": 385
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 4.2e-05,
+      "loss": 2.0444,
+      "step": 390
+    },
+    {
+      "epoch": 0.66,
+      "learning_rate": 4.1e-05,
+      "loss": 2.0409,
+      "step": 395
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 4e-05,
+      "loss": 2.0539,
+      "step": 400
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 3.9000000000000006e-05,
+      "loss": 2.0696,
+      "step": 405
+    },
+    {
+      "epoch": 0.68,
+      "learning_rate": 3.8e-05,
+      "loss": 2.0614,
+      "step": 410
+    },
+    {
+      "epoch": 0.69,
+      "learning_rate": 3.7e-05,
+      "loss": 2.0604,
+      "step": 415
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 3.6e-05,
+      "loss": 2.0608,
+      "step": 420
+    },
+    {
+      "epoch": 0.71,
+      "learning_rate": 3.5e-05,
+      "loss": 2.0412,
+      "step": 425
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 3.4000000000000007e-05,
+      "loss": 2.0558,
+      "step": 430
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 3.3e-05,
+      "loss": 2.0338,
+      "step": 435
+    },
+    {
+      "epoch": 0.73,
+      "learning_rate": 3.2000000000000005e-05,
+      "loss": 2.0322,
+      "step": 440
+    },
+    {
+      "epoch": 0.74,
+      "learning_rate": 3.1e-05,
+      "loss": 2.0511,
+      "step": 445
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 3e-05,
+      "loss": 2.0595,
+      "step": 450
+    },
+    {
+      "epoch": 0.75,
+      "eval_accuracy": 0.6142522292024847,
+      "eval_loss": 1.9970886707305908,
+      "eval_runtime": 61.0863,
+      "eval_samples_per_second": 4.911,
+      "eval_steps_per_second": 1.228,
+      "step": 450
+    },
+    {
+      "epoch": 0.76,
+      "learning_rate": 2.9e-05,
+      "loss": 2.032,
+      "step": 455
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 2.8000000000000003e-05,
+      "loss": 2.0421,
+      "step": 460
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 2.7000000000000002e-05,
+      "loss": 2.0255,
+      "step": 465
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 2.6000000000000002e-05,
+      "loss": 2.0259,
+      "step": 470
+    },
+    {
+      "epoch": 0.79,
+      "learning_rate": 2.5e-05,
+      "loss": 2.0361,
+      "step": 475
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 2.4e-05,
+      "loss": 2.0584,
+      "step": 480
+    },
+    {
+      "epoch": 0.81,
+      "learning_rate": 2.3000000000000003e-05,
+      "loss": 2.0589,
+      "step": 485
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 2.2000000000000003e-05,
+      "loss": 2.0555,
+      "step": 490
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 2.1e-05,
+      "loss": 2.0413,
+      "step": 495
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 2e-05,
+      "loss": 2.0097,
+      "step": 500
+    },
+    {
+      "epoch": 0.84,
+      "learning_rate": 1.9e-05,
+      "loss": 2.0466,
+      "step": 505
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 1.8e-05,
+      "loss": 2.0572,
+      "step": 510
+    },
+    {
+      "epoch": 0.86,
+      "learning_rate": 1.7000000000000003e-05,
+      "loss": 2.0376,
+      "step": 515
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 1.6000000000000003e-05,
+      "loss": 2.0202,
+      "step": 520
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 1.5e-05,
+      "loss": 2.0643,
+      "step": 525
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 1.4000000000000001e-05,
+      "loss": 2.0201,
+      "step": 530
+    },
+    {
+      "epoch": 0.89,
+      "learning_rate": 1.3000000000000001e-05,
+      "loss": 2.0356,
+      "step": 535
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 1.2e-05,
+      "loss": 2.0255,
+      "step": 540
+    },
+    {
+      "epoch": 0.91,
+      "learning_rate": 1.1000000000000001e-05,
+      "loss": 2.0374,
+      "step": 545
+    },
+    {
+      "epoch": 0.92,
+      "learning_rate": 1e-05,
+      "loss": 2.0385,
+      "step": 550
+    },
+    {
+      "epoch": 0.92,
+      "learning_rate": 9e-06,
+      "loss": 2.024,
+      "step": 555
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 2.0144,
+      "step": 560
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 7.000000000000001e-06,
+      "loss": 2.04,
+      "step": 565
+    },
+    {
+      "epoch": 0.95,
+      "learning_rate": 6e-06,
+      "loss": 2.0372,
+      "step": 570
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 5e-06,
+      "loss": 2.0122,
+      "step": 575
+    },
+    {
+      "epoch": 0.97,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 2.0182,
+      "step": 580
+    },
+    {
+      "epoch": 0.97,
+      "learning_rate": 3e-06,
+      "loss": 2.0323,
+      "step": 585
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 2.0298,
+      "step": 590
+    },
+    {
+      "epoch": 0.99,
+      "learning_rate": 1.0000000000000002e-06,
+      "loss": 2.0212,
+      "step": 595
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.0,
+      "loss": 2.0481,
+      "step": 600
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.6152243190218628,
+      "eval_loss": 1.9892692565917969,
+      "eval_runtime": 64.3793,
+      "eval_samples_per_second": 4.66,
+      "eval_steps_per_second": 1.165,
+      "step": 600
+    },
+    {
+      "epoch": 1.0,
+      "step": 600,
+      "total_flos": 5.41006975991808e+16,
+      "train_loss": 2.32403786500295,
+      "train_runtime": 14945.8713,
+      "train_samples_per_second": 2.572,
+      "train_steps_per_second": 0.04
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 600,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "total_flos": 5.41006975991808e+16,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7cd1e46c219da670f4ca325cabcaa68d77a6ac557f068ec90ffc6e6e23d7af29
+size 4920

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff