Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +569 -0
- config.json +45 -0
- config_sentence_transformers.json +14 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +945 -0
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,569 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:3016
|
| 9 |
+
- loss:MultipleNegativesRankingLoss
|
| 10 |
+
base_model: nomic-ai/modernbert-embed-base
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: The Business-to-Business Sales and Key Accounts Executive executes
|
| 13 |
+
technical sales strategies and business plans, identifies potential new business
|
| 14 |
+
avenues and platforms through the client base, rectifies technical issues for
|
| 15 |
+
clients during the sales process, and contributes to the organising of networking
|
| 16 |
+
events to enhance client relationships. He/She implements new account development
|
| 17 |
+
strategies to capitalise on potential opportunities. He works in a customer-centric,
|
| 18 |
+
dynamic and challenging sales environment. He is a resourceful, self-driven and
|
| 19 |
+
sales-savvy team player who is able to interact with others effectively and work
|
| 20 |
+
under pressure within tight deadlines to deliver sales results.
|
| 21 |
+
sentences:
|
| 22 |
+
- The Senior Risk Analyst is responsible for collaborating with various internal
|
| 23 |
+
teams to pinpoint and articulate complex risks linked to particular business areas.
|
| 24 |
+
This role involves evaluating and emphasizing significant risks to aid in comprehensive
|
| 25 |
+
risk evaluations and shaping risk profiles. The analyst ensures compliance with
|
| 26 |
+
established risk policies and procedures while facilitating effective risk communication
|
| 27 |
+
and reporting. Additionally, the Senior Risk Analyst reviews and improves risk
|
| 28 |
+
assessment methodologies throughout the organisation’s regional operations, contributing
|
| 29 |
+
to ongoing risk research initiatives. The ideal candidate is analytical, detail-oriented,
|
| 30 |
+
methodical, and demonstrates strong problem-solving and critical thinking abilities.
|
| 31 |
+
- The Business-to-Business Marketing and Brand Executive develops marketing campaigns
|
| 32 |
+
and branding strategies, analyses market trends to identify new promotional channels,
|
| 33 |
+
manages product positioning, and coordinates advertising events to boost brand
|
| 34 |
+
visibility. This role requires creativity, strategic thinking, and the ability
|
| 35 |
+
to work collaboratively in a deadline-driven environment to enhance the company’s
|
| 36 |
+
market presence.
|
| 37 |
+
- The Business-to-Business Sales and Key Accounts Executive is responsible for deploying
|
| 38 |
+
effective sales tactics and developing comprehensive business plans to expand
|
| 39 |
+
the client portfolio. This role involves identifying new market opportunities
|
| 40 |
+
within existing accounts, resolving any technical concerns raised by clients during
|
| 41 |
+
the sales cycle, and assisting in the coordination of networking events aimed
|
| 42 |
+
at strengthening client engagement. Operating in a fast-paced and customer-focused
|
| 43 |
+
sales environment, the executive must be proactive, commercially astute, and able
|
| 44 |
+
to collaborate well with colleagues while managing tight deadlines to achieve
|
| 45 |
+
targeted sales outcomes.
|
| 46 |
+
- source_sentence: The Senior Customer Service Officer/Customer Service Specialist
|
| 47 |
+
is responsible for analysing and planning overall customer service excellence,
|
| 48 |
+
developing and reviewing process reviews and implementing customer service process
|
| 49 |
+
review initiatives. He/She is also responsible for measuring results and managing
|
| 50 |
+
change management communications. Systematic and analytical, he is required to
|
| 51 |
+
identify areas for improvements and perform customer quality analysis. He is also
|
| 52 |
+
expected to coordinate closely with internal and external stakeholders to implement
|
| 53 |
+
new processes.
|
| 54 |
+
sentences:
|
| 55 |
+
- The Senior Customer Service Specialist oversees the design and execution of strategies
|
| 56 |
+
aimed at enhancing customer service quality. This role involves evaluating current
|
| 57 |
+
service processes, initiating improvements, and tracking performance metrics.
|
| 58 |
+
With strong analytical skills, the Specialist identifies opportunities for service
|
| 59 |
+
enhancement and leads communication efforts related to change management. Collaboration
|
| 60 |
+
with various internal teams and external partners is essential to successfully
|
| 61 |
+
roll out new customer service initiatives.
|
| 62 |
+
- The Senior Marketing Officer is responsible for developing and executing marketing
|
| 63 |
+
campaigns, managing brand promotion activities, and conducting market research
|
| 64 |
+
to identify consumer trends. This role requires strong creative and communication
|
| 65 |
+
skills to engage target audiences and coordinate with advertising agencies. The
|
| 66 |
+
officer also monitors campaign effectiveness and adjusts strategies accordingly
|
| 67 |
+
to maximize market reach and customer engagement.
|
| 68 |
+
- The Associate Applications Support Engineer is tasked with supporting and maintaining
|
| 69 |
+
designated software applications, whether developed internally or sourced from
|
| 70 |
+
third parties. This role requires a thorough understanding of application functionality
|
| 71 |
+
and backend processes. The engineer collaborates closely with application development,
|
| 72 |
+
transition, and testing teams to troubleshoot and document application issues
|
| 73 |
+
effectively. Working within a team environment, the individual is skilled in application
|
| 74 |
+
development, monitoring tools, and relevant software platforms deployed by the
|
| 75 |
+
organisation. Strong analytical thinking and problem-solving abilities are essential,
|
| 76 |
+
along with excellent communication and interpersonal skills to address technical
|
| 77 |
+
challenges promptly.
|
| 78 |
+
- source_sentence: The Pre-Sales Consultant is responsible for providing pre-sales
|
| 79 |
+
technical expertise to the sales team and clients during the sales process. He/She
|
| 80 |
+
delivers presentations and technical demonstrations of the organisation's products
|
| 81 |
+
to prospective clients. He translates the client's business requirements into
|
| 82 |
+
technical specifications and requirements, and provides technical inputs for proposals,
|
| 83 |
+
tenders, bids and any relevant documents. He uses prescribed guidelines or policies
|
| 84 |
+
to analyse and solve problems. He works in a fast-paced and dynamic environment,
|
| 85 |
+
and travels frequently to clients' premises for technical sales pitches and meetings.
|
| 86 |
+
He is familiar with client relationship management and sales tools. He possesses
|
| 87 |
+
deep product and technical knowledge, and is knowledgeable of the trends, developments
|
| 88 |
+
and challenges of the industry domain. The Pre-Sales Consultant displays effective
|
| 89 |
+
listening skills and is inquisitive in nature. He possesses deep technical and
|
| 90 |
+
domain knowledge, pays attention to detail, and has strong analytical and problem-solving
|
| 91 |
+
capabilities. He has a service-oriented personality and is a team player who works
|
| 92 |
+
towards developing solutions collaboratively.
|
| 93 |
+
sentences:
|
| 94 |
+
- The Sales Operations Analyst manages internal sales processes and data analytics
|
| 95 |
+
to optimize the effectiveness of the sales team within the technology sector.
|
| 96 |
+
This role focuses on generating sales reports, analyzing performance metrics,
|
| 97 |
+
and streamlining customer data workflows rather than direct client engagement.
|
| 98 |
+
The analyst supports strategic decision-making by providing actionable insights
|
| 99 |
+
through CRM systems and sales software but is not involved in technical demonstrations
|
| 100 |
+
or client-facing presentations. Working primarily from the office, this position
|
| 101 |
+
requires proficiency in data analysis, reporting tools, and a thorough understanding
|
| 102 |
+
of sales operations, rather
|
| 103 |
+
- The Producer is responsible for managing the full production process from initial
|
| 104 |
+
concept development through pre-production and the actual production phase. They
|
| 105 |
+
coordinate a team that handles both creative and logistical aspects to guarantee
|
| 106 |
+
seamless production workflows. The Producer collaborates with various internal
|
| 107 |
+
departments and external partners to recruit technical and creative personnel,
|
| 108 |
+
schedule timelines, and organize production activities. Additionally, they engage
|
| 109 |
+
with regulatory bodies to obtain necessary legal and compliance clearances. Managing
|
| 110 |
+
the production budget, including pricing strategies and securing financial support
|
| 111 |
+
through sponsorships and funding, also falls under their purview. They furthermore
|
| 112 |
+
create and implement marketing plans to effectively promote the production.
|
| 113 |
+
- The Pre-Sales Consultant provides expert technical support throughout the sales
|
| 114 |
+
cycle, collaborating closely with the sales team and prospective clients. This
|
| 115 |
+
role involves delivering detailed product demonstrations and presentations that
|
| 116 |
+
align with client needs. The consultant interprets business requirements into
|
| 117 |
+
precise technical specifications, contributing key insights to proposals, bids,
|
| 118 |
+
and tender documents. Operating within established policies, the consultant addresses
|
| 119 |
+
technical challenges effectively. Frequent travel to client sites for sales engagements
|
| 120 |
+
is expected in this fast-moving environment. Proficiency in client relationship
|
| 121 |
+
management tools and sales platforms is essential. The consultant maintains up-to-date
|
| 122 |
+
knowledge of product features and industry trends, demonstrating strong listening
|
| 123 |
+
skills, curiosity, and meticulous attention to detail. A collaborative, service-minded
|
| 124 |
+
approach and advanced analytical abilities are vital for success in this position.
|
| 125 |
+
- source_sentence: The Head of Port Business Development/Operations works closely
|
| 126 |
+
with management and external stakeholders in strategic planning to grow the business
|
| 127 |
+
portfolio and enterprise value and formulating the organisation's risk management
|
| 128 |
+
strategy and mitigation measures. He/She keeps abreast of the latest developments
|
| 129 |
+
in the organisation and industry and inspires a culture of process improvement
|
| 130 |
+
to leverage on new technologies to enhance port productivity and efficiency. He
|
| 131 |
+
also drives service delivery excellence and adds value to customers and the business.
|
| 132 |
+
He is a well-organised person with excellent planning skills, and mentors a team.
|
| 133 |
+
sentences:
|
| 134 |
+
- The Senior Maritime Compliance Officer oversees regulatory adherence and conducts
|
| 135 |
+
audits within the shipping industry, ensuring vessels meet international safety
|
| 136 |
+
and environmental standards. This role requires deep knowledge of maritime laws
|
| 137 |
+
and regulations, proficiency in compliance reporting, and coordination with regulatory
|
| 138 |
+
bodies. The officer develops compliance policies, conducts risk assessments, and
|
| 139 |
+
provides training to operational teams. While the position involves leadership
|
| 140 |
+
duties, it focuses primarily on regulatory enforcement rather than business development
|
| 141 |
+
or operational efficiency.
|
| 142 |
+
- The R&D Chemist conducts intricate laboratory experiments, analyses, and evaluations
|
| 143 |
+
to advance Research and Development initiatives. This role involves formulating
|
| 144 |
+
research goals and proposals aimed at pioneering new products and supporting projects
|
| 145 |
+
involving novel technologies, materials, or ingredients. The chemist contributes
|
| 146 |
+
to the creation of innovative formulations and solutions, ensuring adherence to
|
| 147 |
+
Intellectual Property rights and patent laws, while establishing product specifications
|
| 148 |
+
for scaling up production processes. Detailed documentation of research activities
|
| 149 |
+
is maintained for both internal stakeholders and external clients, with insights
|
| 150 |
+
provided to enhance product development based on experimental findings. The chemist
|
| 151 |
+
also oversees the Management of Hazardous Chemicals Programme in the lab, aligning
|
| 152 |
+
with Workplace Safety and Health and Environmental Management System standards.
|
| 153 |
+
Collaborating within a multidisciplinary laboratory team and partnering with application
|
| 154 |
+
and technical service groups, the R&D Chemist demonstrates strong teamwork, creativity,
|
| 155 |
+
and problem-solving abilities, manages multiple projects autonomously, and excels
|
| 156 |
+
in technical communication and presentations.
|
| 157 |
+
- The Head of Port Business Development and Operations collaborates with senior
|
| 158 |
+
management and external partners to devise strategic initiatives aimed at expanding
|
| 159 |
+
the business portfolio and increasing enterprise value. This role involves developing
|
| 160 |
+
risk management frameworks and mitigation plans for the organisation. Staying
|
| 161 |
+
informed on industry trends and organisational changes, the incumbent fosters
|
| 162 |
+
a culture of continuous process improvement by integrating innovative technologies
|
| 163 |
+
to boost port productivity and operational efficiency. Additionally, they champion
|
| 164 |
+
service excellence to enhance customer satisfaction and business outcomes. The
|
| 165 |
+
position demands strong organisational capabilities, advanced planning skills,
|
| 166 |
+
and includes mentoring responsibilities for a team.
|
| 167 |
+
- source_sentence: The Financial Planner/Insurance Agent/Bancassurance Specialist
|
| 168 |
+
is responsible for developing and implementing financial plans to help customers
|
| 169 |
+
meet their objectives, and managing customer relationships. He/She understands
|
| 170 |
+
customer profiles and recommends suitable solutions to them. He is also in charge
|
| 171 |
+
of attracting his own customers through networking sessions, relationship building
|
| 172 |
+
and referrals. The Financial Planner/Insurance Agent/Bancassurance Specialist's
|
| 173 |
+
duties might require him to work on weekends and after office hours and may involve
|
| 174 |
+
travel to customers' locations. He has a friendly and outgoing nature and is able
|
| 175 |
+
to build rapport with customers easily in order to establish trust.
|
| 176 |
+
sentences:
|
| 177 |
+
- The Pharmacy Technician Executive supports pharmacists by preparing sterile and
|
| 178 |
+
non-sterile medications according to prescriptions and oversees quality assurance
|
| 179 |
+
processes and departmental improvement projects. This role ensures adherence to
|
| 180 |
+
medication safety protocols across diverse healthcare environments including hospitals,
|
| 181 |
+
outpatient clinics, polyclinics, and retail pharmacies. The incumbent is expected
|
| 182 |
+
to demonstrate initiative, attention to detail, and strong interpersonal, leadership,
|
| 183 |
+
and analytical abilities.
|
| 184 |
+
- The Financial Planner/Insurance Agent/Bancassurance Specialist designs and executes
|
| 185 |
+
tailored financial strategies to assist clients in achieving their financial goals
|
| 186 |
+
while maintaining strong client relationships. This role requires a thorough understanding
|
| 187 |
+
of client needs to propose appropriate financial products. The specialist actively
|
| 188 |
+
seeks new clients through networking, referrals, and relationship management.
|
| 189 |
+
Flexibility in working hours, including evenings, weekends, and occasional travel
|
| 190 |
+
to client sites, is essential. Strong interpersonal skills and an approachable
|
| 191 |
+
demeanor enable effective trust-building with customers.
|
| 192 |
+
- The Financial Analyst/Investment Consultant/Bancassurance Coordinator conducts
|
| 193 |
+
in-depth market research and analyzes investment portfolios to provide strategic
|
| 194 |
+
advice to corporate clients. This role focuses on evaluating financial data rather
|
| 195 |
+
than direct customer acquisition and requires collaboration with internal teams
|
| 196 |
+
rather than individual networking. Work hours are generally standard office hours
|
| 197 |
+
with minimal client site visits. The position demands analytical proficiency and
|
| 198 |
+
detailed reporting skills, with limited emphasis on personal rapport or direct
|
| 199 |
+
sales activities.
|
| 200 |
+
datasets:
|
| 201 |
+
- dnth/ssf-train-valid-v2
|
| 202 |
+
pipeline_tag: sentence-similarity
|
| 203 |
+
library_name: sentence-transformers
|
| 204 |
+
---
|
| 205 |
+
|
| 206 |
+
# SentenceTransformer based on nomic-ai/modernbert-embed-base
|
| 207 |
+
|
| 208 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base) on the [ssf-train-valid-v2](https://huggingface.co/datasets/dnth/ssf-train-valid-v2) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 209 |
+
|
| 210 |
+
## Model Details
|
| 211 |
+
|
| 212 |
+
### Model Description
|
| 213 |
+
- **Model Type:** Sentence Transformer
|
| 214 |
+
- **Base model:** [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base) <!-- at revision d556a88e332558790b210f7bdbe87da2fa94a8d8 -->
|
| 215 |
+
- **Maximum Sequence Length:** 8192 tokens
|
| 216 |
+
- **Output Dimensionality:** 768 dimensions
|
| 217 |
+
- **Similarity Function:** Cosine Similarity
|
| 218 |
+
- **Training Dataset:**
|
| 219 |
+
- [ssf-train-valid-v2](https://huggingface.co/datasets/dnth/ssf-train-valid-v2)
|
| 220 |
+
<!-- - **Language:** Unknown -->
|
| 221 |
+
<!-- - **License:** Unknown -->
|
| 222 |
+
|
| 223 |
+
### Model Sources
|
| 224 |
+
|
| 225 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 226 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 227 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 228 |
+
|
| 229 |
+
### Full Model Architecture
|
| 230 |
+
|
| 231 |
+
```
|
| 232 |
+
SentenceTransformer(
|
| 233 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
|
| 234 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 235 |
+
(2): Normalize()
|
| 236 |
+
)
|
| 237 |
+
```
|
| 238 |
+
|
| 239 |
+
## Usage
|
| 240 |
+
|
| 241 |
+
### Direct Usage (Sentence Transformers)
|
| 242 |
+
|
| 243 |
+
First install the Sentence Transformers library:
|
| 244 |
+
|
| 245 |
+
```bash
|
| 246 |
+
pip install -U sentence-transformers
|
| 247 |
+
```
|
| 248 |
+
|
| 249 |
+
Then you can load this model and run inference.
|
| 250 |
+
```python
|
| 251 |
+
from sentence_transformers import SentenceTransformer
|
| 252 |
+
|
| 253 |
+
# Download from the 🤗 Hub
|
| 254 |
+
model = SentenceTransformer("dnth/ssf-retriever-modernbert-embed-base-v2")
|
| 255 |
+
# Run inference
|
| 256 |
+
sentences = [
|
| 257 |
+
"The Financial Planner/Insurance Agent/Bancassurance Specialist is responsible for developing and implementing financial plans to help customers meet their objectives, and managing customer relationships. He/She understands customer profiles and recommends suitable solutions to them. He is also in charge of attracting his own customers through networking sessions, relationship building and referrals. The Financial Planner/Insurance Agent/Bancassurance Specialist's duties might require him to work on weekends and after office hours and may involve travel to customers' locations. He has a friendly and outgoing nature and is able to build rapport with customers easily in order to establish trust.",
|
| 258 |
+
'The Financial Planner/Insurance Agent/Bancassurance Specialist designs and executes tailored financial strategies to assist clients in achieving their financial goals while maintaining strong client relationships. This role requires a thorough understanding of client needs to propose appropriate financial products. The specialist actively seeks new clients through networking, referrals, and relationship management. Flexibility in working hours, including evenings, weekends, and occasional travel to client sites, is essential. Strong interpersonal skills and an approachable demeanor enable effective trust-building with customers.',
|
| 259 |
+
'The Financial Analyst/Investment Consultant/Bancassurance Coordinator conducts in-depth market research and analyzes investment portfolios to provide strategic advice to corporate clients. This role focuses on evaluating financial data rather than direct customer acquisition and requires collaboration with internal teams rather than individual networking. Work hours are generally standard office hours with minimal client site visits. The position demands analytical proficiency and detailed reporting skills, with limited emphasis on personal rapport or direct sales activities.',
|
| 260 |
+
]
|
| 261 |
+
embeddings = model.encode(sentences)
|
| 262 |
+
print(embeddings.shape)
|
| 263 |
+
# [3, 768]
|
| 264 |
+
|
| 265 |
+
# Get the similarity scores for the embeddings
|
| 266 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 267 |
+
print(similarities)
|
| 268 |
+
# tensor([[1.0000, 0.8627, 0.5380],
|
| 269 |
+
# [0.8627, 1.0000, 0.5945],
|
| 270 |
+
# [0.5380, 0.5945, 1.0000]])
|
| 271 |
+
```
|
| 272 |
+
|
| 273 |
+
<!--
|
| 274 |
+
### Direct Usage (Transformers)
|
| 275 |
+
|
| 276 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 277 |
+
|
| 278 |
+
</details>
|
| 279 |
+
-->
|
| 280 |
+
|
| 281 |
+
<!--
|
| 282 |
+
### Downstream Usage (Sentence Transformers)
|
| 283 |
+
|
| 284 |
+
You can finetune this model on your own dataset.
|
| 285 |
+
|
| 286 |
+
<details><summary>Click to expand</summary>
|
| 287 |
+
|
| 288 |
+
</details>
|
| 289 |
+
-->
|
| 290 |
+
|
| 291 |
+
<!--
|
| 292 |
+
### Out-of-Scope Use
|
| 293 |
+
|
| 294 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 295 |
+
-->
|
| 296 |
+
|
| 297 |
+
<!--
|
| 298 |
+
## Bias, Risks and Limitations
|
| 299 |
+
|
| 300 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 301 |
+
-->
|
| 302 |
+
|
| 303 |
+
<!--
|
| 304 |
+
### Recommendations
|
| 305 |
+
|
| 306 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 307 |
+
-->
|
| 308 |
+
|
| 309 |
+
## Training Details
|
| 310 |
+
|
| 311 |
+
### Training Dataset
|
| 312 |
+
|
| 313 |
+
#### ssf-train-valid-v2
|
| 314 |
+
|
| 315 |
+
* Dataset: [ssf-train-valid-v2](https://huggingface.co/datasets/dnth/ssf-train-valid-v2) at [fb19ed3](https://huggingface.co/datasets/dnth/ssf-train-valid-v2/tree/fb19ed3200e91badb0cb224e0debe909f1d7c2bd)
|
| 316 |
+
* Size: 3,016 training samples
|
| 317 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
| 318 |
+
* Approximate statistics based on the first 1000 samples:
|
| 319 |
+
| | anchor | positive | negative |
|
| 320 |
+
|:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 321 |
+
| type | string | string | string |
|
| 322 |
+
| details | <ul><li>min: 57 tokens</li><li>mean: 167.72 tokens</li><li>max: 403 tokens</li></ul> | <ul><li>min: 54 tokens</li><li>mean: 135.01 tokens</li><li>max: 250 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 100.13 tokens</li><li>max: 212 tokens</li></ul> |
|
| 323 |
+
* Samples:
|
| 324 |
+
| anchor | positive | negative |
|
| 325 |
+
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 326 |
+
| <code>The Senior Technician (Automatic Fare Collection) is responsible for preventive and corrective maintenance of Automatic Fare Collection (AFC) systems. His/Her duties include troubleshooting of faults, providing technical guidance and on-the-job coaching to his team, as well as supervising the work of contractors and external stakeholders in ensuring compliance to safety requirements and operating standards. He is required to carry out his duties in the depot, workshop and/or at various train stations during train operating hours. He is technically inclined and well-verse in various AFC systems and machines and builds good teamwork amongst his team to support maintenance activities.</code> | <code>The Senior Technician (Automatic Fare Collection) oversees both preventive and corrective maintenance tasks related to Automatic Fare Collection (AFC) systems. This role involves diagnosing system faults, offering expert technical advice, and mentoring team members on operational procedures. The technician also manages contractors and external partners, ensuring adherence to safety protocols and operational guidelines. Responsibilities are performed across depots, workshops, and multiple train stations during service hours. A solid technical background in AFC equipment and strong team leadership skills are essential to effectively support maintenance operations.</code> | <code>The Senior Technician (Automatic Fare Collection) is responsible for conducting routine inspections and repairs of automated ticket vending machines in retail environments. This role includes assisting customers with machine usage, coordinating with sales teams, and ensuring compliance with commercial service standards. Work is primarily carried out within shopping malls during business hours. The technician requires customer service skills and basic technical knowledge of vending equipment but is not involved in supervising maintenance teams or external contractors.</code> |
|
| 327 |
+
| <code>The Venue Operations Manager is responsible for overseeing the running of venue operations, including the logistics requirements. He/She works closely with event services department to ensure client requirements are fulfilled in compliance to local health and safety standards. He reviews event plans to ensure generation of maximum yield for organisation. Meticulous and resourceful, he possesses excellent problem-solving skills and is able to react quickly to deviations in the project plans. He is able to work in a flexible workweek, including weekends, evenings, and public holidays, and is comfortable working in both an indoor and outdoor environment depending on the nature and requirements of the events.</code> | <code>The Venue Operations Manager oversees all aspects of venue management, coordinating logistics and collaborating with the event services team to meet client expectations while adhering to local health and safety regulations. This role involves reviewing event strategies to optimize organizational revenue and demands strong problem-solving abilities to address unforeseen challenges promptly. The manager must be adaptable to a variable schedule, including weekends, evenings, and public holidays, and comfortable operating in diverse environments, both indoor and outdoor, based on event needs.</code> | <code>The Venue Marketing Manager is responsible for developing and executing promotional campaigns for the venue, working closely with the sales department to attract clients and increase bookings. They analyze market trends to maximize revenue opportunities and must be creative and communicative, addressing branding challenges as they arise. This role typically operates standard weekday hours and focuses primarily on indoor office environments rather than on-site event coordination.</code> |
|
| 328 |
+
| <code>The Liquefied Natural Gas Research Analyst supports the LNG Trading team in identifying opportunities for closing deals and increasing portfolio value. He/She conducts research using market and economic data for the preparation of market reports and analyses data regarding risks associated with trading deals. He engages clients on presentations on market insights and liaises with key stakeholders for the preparation of hedging activities. He works in a dynamic and fast-paced environment where he must provide accurate analyses and research material to support the closing of deals. He is a decisive, analytical and self-motivated individual who is comfortable with numbers and able to work under pressure.</code> | <code>The Liquefied Natural Gas Research Analyst collaborates closely with the LNG Trading team to discover profitable trading opportunities and enhance portfolio performance. This role involves gathering and analyzing market and economic data to produce comprehensive reports and assess the risks linked to trading transactions. The analyst presents market insights to clients and coordinates with stakeholders to facilitate hedging strategies. Operating in a fast-moving and challenging environment, the analyst must deliver precise research and data analyses to aid deal execution. Candidates should be analytical, proactive, confident with numerical data, and capable of working effectively under tight deadlines.</code> | <code>The Renewable Energy Project Coordinator manages the planning and execution of solar and wind energy initiatives within the energy sector. This position requires coordinating with multiple vendors and regulatory bodies, overseeing project timelines, and ensuring compliance with environmental standards. The coordinator communicates progress updates to stakeholders and resolves logistical challenges to keep projects on track. Strong organizational skills, experience in project management, and the ability to navigate regulatory frameworks are essential. This role focuses on operational delivery rather than market research or trading analysis.</code> |
|
| 329 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 330 |
+
```json
|
| 331 |
+
{
|
| 332 |
+
"scale": 20.0,
|
| 333 |
+
"similarity_fct": "cos_sim",
|
| 334 |
+
"gather_across_devices": false
|
| 335 |
+
}
|
| 336 |
+
```
|
| 337 |
+
|
| 338 |
+
### Evaluation Dataset
|
| 339 |
+
|
| 340 |
+
#### ssf-train-valid-v2
|
| 341 |
+
|
| 342 |
+
* Dataset: [ssf-train-valid-v2](https://huggingface.co/datasets/dnth/ssf-train-valid-v2) at [fb19ed3](https://huggingface.co/datasets/dnth/ssf-train-valid-v2/tree/fb19ed3200e91badb0cb224e0debe909f1d7c2bd)
|
| 343 |
+
* Size: 754 evaluation samples
|
| 344 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
| 345 |
+
* Approximate statistics based on the first 754 samples:
|
| 346 |
+
| | anchor | positive | negative |
|
| 347 |
+
|:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
| 348 |
+
| type | string | string | string |
|
| 349 |
+
| details | <ul><li>min: 58 tokens</li><li>mean: 166.89 tokens</li><li>max: 347 tokens</li></ul> | <ul><li>min: 58 tokens</li><li>mean: 136.23 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 19 tokens</li><li>mean: 99.02 tokens</li><li>max: 202 tokens</li></ul> |
|
| 350 |
+
* Samples:
|
| 351 |
+
| anchor | positive | negative |
|
| 352 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 353 |
+
| <code>The Head of Client Implementation is responsible for driving strategic relationship building activities and ensuring client implementation plans are carried out at standards that are satisfactory to clients. He/She oversees the team's compliance with implementation policies and regulations when executing tasks during implementation, often collaborating with relevant internal teams for the successful design and delivery of client implementation plans. The Head of Client Implementation possesses strong leadership, interpersonal and teamwork skills. His work environment is highly diverse and evolves based on clients' requirements and needs. He is flexible to change and has quick problem-solving skills.</code> | <code>The Head of Client Implementation leads the strategic development of client relationships and ensures that implementation projects meet client expectations and quality standards. This role supervises the team's adherence to implementation guidelines and regulatory requirements, coordinating closely with internal departments to design and deliver effective client onboarding plans. The Head of Client Implementation demonstrates strong leadership, excellent communication, and teamwork abilities. Working in a dynamic and multicultural environment, this individual adapts swiftly to changing client demands and efficiently resolves challenges.</code> | <code>The Project Manager in Client Services focuses on managing project timelines and resource allocation within the client support division. Instead of overseeing implementation strategies, this role emphasizes coordinating operational activities and ensuring service delivery efficiency. The Project Manager requires strong organizational and planning skills and works under a structured reporting hierarchy, primarily handling routine client service requests rather than strategic relationship management.</code> |
|
| 354 |
+
| <code>The Set Designer executes the realisation of the overall visual aspects of the production by conceptualising the set design or locations for film, video or a concept based on the script and the overall visual concept. He/She is responsible for studying scripts and interpreting the descriptions of locations, creating sketches and drawings, translating these into technical drawings and models for sets to be built or locations to be identified and prepared. He lays out a comprehensive set of requirements for the materials needed for set design. He oversees the right look or feel for a production and ensure that other areas of technical production such as lighting or sound recording are aligned with the set. He also identifies and commissions set construction vendors. He prepares cost estimates for set design and construction and manages project schedule of his team. He oversees the construction of set and ensures that the sets evoke the intended style, mood and theme. He/She may specialis...</code> | <code>The Set Designer is responsible for bringing the visual concept of a production to life by designing and coordinating the physical environment for film, video, or other visual media based on the script and overall artistic direction. This role involves analyzing scripts to interpret location and set requirements, producing sketches and detailed technical drawings, and developing models for set construction or location preparation. The Set Designer specifies material needs, supervises the aesthetic alignment of the set with lighting and sound departments, and manages relationships with construction vendors. They prepare cost estimates, oversee project timelines, and ensure sets reflect the intended mood, style, and theme. Specialization in set decoration or prop management may be required, with responsibilities including the artistic selection and upkeep of props. The role demands creativity, strong coordination with other technical teams, proficiency in design techniques and fabricatio...</code> | <code>The Set Designer manages the logistical planning and operational execution of event setups, coordinating venue layouts and technical support for conferences, exhibitions, and corporate functions. Responsible for assessing client requirements, preparing floor plans, and directing the installation of audio-visual equipment and staging, the Set Designer ensures all elements align with event objectives and branding guidelines. They oversee vendor</code> |
|
| 355 |
+
| <code>The Senior Producer/Producer - News is responsible for managing the daily news agenda, overall news content and flow of a newscast. He/She has an overview of the agendas being pursued and looks out for newsworthy stories that will attract and engage target audience. He manages the resourcing of news content and is responsible for assigning news stories to internal teams. He is also required to edit scripts and reports that are submitted for the newscast and ensure that editorial guidelines and policies set by the organisation are adhered to. He is in charge of maintaining the newscast's presence and image on social media platforms and monitors the competitors' activities and trends. In addition, he collaborates with the production teams to ensure continuity of content across different programmes throughout the day. He typically works in a newsroom but may be assigned field activities at times. He should be able to adapt quickly to changes and perform well in a dynamic environment. He s...</code> | <code>The Senior Producer/Producer - News oversees the daily planning and execution of news broadcasts, ensuring the seamless flow and compelling content throughout the newscast. This role involves identifying and prioritizing news stories that resonate with the target audience while managing the allocation of editorial resources across teams. The incumbent reviews and refines scripts and reports to guarantee compliance with the organization’s editorial policies. Additionally, this position maintains the newscast’s brand presence on social media channels and keeps abreast of competitor activities and industry trends. Collaboration with production units is essential to maintain content consistency across multiple programs during the broadcast day. Operating primarily within a newsroom environment, the Senior Producer/Producer must be adaptable to fast-paced changes and possess solid knowledge of both local and global current events. Experience with production technology such as cameras, audio...</code> | <code>The Marketing Coordinator - News Media is responsible for planning and executing marketing campaigns to promote news programming across various digital platforms. This role focuses on developing brand awareness strategies, managing advertising budgets, and coordinating with external agencies for content creation and distribution. The incumbent monitors market trends and audience analytics to optimize campaign performance but does not</code> |
|
| 356 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 357 |
+
```json
|
| 358 |
+
{
|
| 359 |
+
"scale": 20.0,
|
| 360 |
+
"similarity_fct": "cos_sim",
|
| 361 |
+
"gather_across_devices": false
|
| 362 |
+
}
|
| 363 |
+
```
|
| 364 |
+
|
| 365 |
+
### Training Hyperparameters
|
| 366 |
+
#### Non-Default Hyperparameters
|
| 367 |
+
|
| 368 |
+
- `eval_strategy`: epoch
|
| 369 |
+
- `per_device_train_batch_size`: 32
|
| 370 |
+
- `per_device_eval_batch_size`: 16
|
| 371 |
+
- `gradient_accumulation_steps`: 16
|
| 372 |
+
- `learning_rate`: 2e-05
|
| 373 |
+
- `num_train_epochs`: 5
|
| 374 |
+
- `lr_scheduler_type`: cosine
|
| 375 |
+
- `warmup_ratio`: 0.1
|
| 376 |
+
- `bf16`: True
|
| 377 |
+
- `tf32`: False
|
| 378 |
+
- `load_best_model_at_end`: True
|
| 379 |
+
- `batch_sampler`: no_duplicates
|
| 380 |
+
|
| 381 |
+
#### All Hyperparameters
|
| 382 |
+
<details><summary>Click to expand</summary>
|
| 383 |
+
|
| 384 |
+
- `overwrite_output_dir`: False
|
| 385 |
+
- `do_predict`: False
|
| 386 |
+
- `eval_strategy`: epoch
|
| 387 |
+
- `prediction_loss_only`: True
|
| 388 |
+
- `per_device_train_batch_size`: 32
|
| 389 |
+
- `per_device_eval_batch_size`: 16
|
| 390 |
+
- `per_gpu_train_batch_size`: None
|
| 391 |
+
- `per_gpu_eval_batch_size`: None
|
| 392 |
+
- `gradient_accumulation_steps`: 16
|
| 393 |
+
- `eval_accumulation_steps`: None
|
| 394 |
+
- `torch_empty_cache_steps`: None
|
| 395 |
+
- `learning_rate`: 2e-05
|
| 396 |
+
- `weight_decay`: 0.0
|
| 397 |
+
- `adam_beta1`: 0.9
|
| 398 |
+
- `adam_beta2`: 0.999
|
| 399 |
+
- `adam_epsilon`: 1e-08
|
| 400 |
+
- `max_grad_norm`: 1.0
|
| 401 |
+
- `num_train_epochs`: 5
|
| 402 |
+
- `max_steps`: -1
|
| 403 |
+
- `lr_scheduler_type`: cosine
|
| 404 |
+
- `lr_scheduler_kwargs`: {}
|
| 405 |
+
- `warmup_ratio`: 0.1
|
| 406 |
+
- `warmup_steps`: 0
|
| 407 |
+
- `log_level`: passive
|
| 408 |
+
- `log_level_replica`: warning
|
| 409 |
+
- `log_on_each_node`: True
|
| 410 |
+
- `logging_nan_inf_filter`: True
|
| 411 |
+
- `save_safetensors`: True
|
| 412 |
+
- `save_on_each_node`: False
|
| 413 |
+
- `save_only_model`: False
|
| 414 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 415 |
+
- `no_cuda`: False
|
| 416 |
+
- `use_cpu`: False
|
| 417 |
+
- `use_mps_device`: False
|
| 418 |
+
- `seed`: 42
|
| 419 |
+
- `data_seed`: None
|
| 420 |
+
- `jit_mode_eval`: False
|
| 421 |
+
- `use_ipex`: False
|
| 422 |
+
- `bf16`: True
|
| 423 |
+
- `fp16`: False
|
| 424 |
+
- `fp16_opt_level`: O1
|
| 425 |
+
- `half_precision_backend`: auto
|
| 426 |
+
- `bf16_full_eval`: False
|
| 427 |
+
- `fp16_full_eval`: False
|
| 428 |
+
- `tf32`: False
|
| 429 |
+
- `local_rank`: 0
|
| 430 |
+
- `ddp_backend`: None
|
| 431 |
+
- `tpu_num_cores`: None
|
| 432 |
+
- `tpu_metrics_debug`: False
|
| 433 |
+
- `debug`: []
|
| 434 |
+
- `dataloader_drop_last`: False
|
| 435 |
+
- `dataloader_num_workers`: 0
|
| 436 |
+
- `dataloader_prefetch_factor`: None
|
| 437 |
+
- `past_index`: -1
|
| 438 |
+
- `disable_tqdm`: False
|
| 439 |
+
- `remove_unused_columns`: True
|
| 440 |
+
- `label_names`: None
|
| 441 |
+
- `load_best_model_at_end`: True
|
| 442 |
+
- `ignore_data_skip`: False
|
| 443 |
+
- `fsdp`: []
|
| 444 |
+
- `fsdp_min_num_params`: 0
|
| 445 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 446 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 447 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 448 |
+
- `deepspeed`: None
|
| 449 |
+
- `label_smoothing_factor`: 0.0
|
| 450 |
+
- `optim`: adamw_torch_fused
|
| 451 |
+
- `optim_args`: None
|
| 452 |
+
- `adafactor`: False
|
| 453 |
+
- `group_by_length`: False
|
| 454 |
+
- `length_column_name`: length
|
| 455 |
+
- `ddp_find_unused_parameters`: None
|
| 456 |
+
- `ddp_bucket_cap_mb`: None
|
| 457 |
+
- `ddp_broadcast_buffers`: False
|
| 458 |
+
- `dataloader_pin_memory`: True
|
| 459 |
+
- `dataloader_persistent_workers`: False
|
| 460 |
+
- `skip_memory_metrics`: True
|
| 461 |
+
- `use_legacy_prediction_loop`: False
|
| 462 |
+
- `push_to_hub`: False
|
| 463 |
+
- `resume_from_checkpoint`: None
|
| 464 |
+
- `hub_model_id`: None
|
| 465 |
+
- `hub_strategy`: every_save
|
| 466 |
+
- `hub_private_repo`: None
|
| 467 |
+
- `hub_always_push`: False
|
| 468 |
+
- `hub_revision`: None
|
| 469 |
+
- `gradient_checkpointing`: False
|
| 470 |
+
- `gradient_checkpointing_kwargs`: None
|
| 471 |
+
- `include_inputs_for_metrics`: False
|
| 472 |
+
- `include_for_metrics`: []
|
| 473 |
+
- `eval_do_concat_batches`: True
|
| 474 |
+
- `fp16_backend`: auto
|
| 475 |
+
- `push_to_hub_model_id`: None
|
| 476 |
+
- `push_to_hub_organization`: None
|
| 477 |
+
- `mp_parameters`:
|
| 478 |
+
- `auto_find_batch_size`: False
|
| 479 |
+
- `full_determinism`: False
|
| 480 |
+
- `torchdynamo`: None
|
| 481 |
+
- `ray_scope`: last
|
| 482 |
+
- `ddp_timeout`: 1800
|
| 483 |
+
- `torch_compile`: False
|
| 484 |
+
- `torch_compile_backend`: None
|
| 485 |
+
- `torch_compile_mode`: None
|
| 486 |
+
- `include_tokens_per_second`: False
|
| 487 |
+
- `include_num_input_tokens_seen`: False
|
| 488 |
+
- `neftune_noise_alpha`: None
|
| 489 |
+
- `optim_target_modules`: None
|
| 490 |
+
- `batch_eval_metrics`: False
|
| 491 |
+
- `eval_on_start`: False
|
| 492 |
+
- `use_liger_kernel`: False
|
| 493 |
+
- `liger_kernel_config`: None
|
| 494 |
+
- `eval_use_gather_object`: False
|
| 495 |
+
- `average_tokens_across_devices`: False
|
| 496 |
+
- `prompts`: None
|
| 497 |
+
- `batch_sampler`: no_duplicates
|
| 498 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 499 |
+
- `router_mapping`: {}
|
| 500 |
+
- `learning_rate_mapping`: {}
|
| 501 |
+
|
| 502 |
+
</details>
|
| 503 |
+
|
| 504 |
+
### Training Logs
|
| 505 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
| 506 |
+
|:-------:|:------:|:-------------:|:---------------:|
|
| 507 |
+
| 1.0 | 6 | 0.1487 | 0.0112 |
|
| 508 |
+
| 2.0 | 12 | 0.0095 | 0.0037 |
|
| 509 |
+
| 3.0 | 18 | 0.0047 | 0.0026 |
|
| 510 |
+
| 4.0 | 24 | 0.0041 | 0.0023 |
|
| 511 |
+
| **5.0** | **30** | **0.0032** | **0.0022** |
|
| 512 |
+
|
| 513 |
+
* The bold row denotes the saved checkpoint.
|
| 514 |
+
|
| 515 |
+
### Framework Versions
|
| 516 |
+
- Python: 3.12.8
|
| 517 |
+
- Sentence Transformers: 5.1.0
|
| 518 |
+
- Transformers: 4.55.0
|
| 519 |
+
- PyTorch: 2.8.0+cu128
|
| 520 |
+
- Accelerate: 1.10.0
|
| 521 |
+
- Datasets: 4.0.0
|
| 522 |
+
- Tokenizers: 0.21.4
|
| 523 |
+
|
| 524 |
+
## Citation
|
| 525 |
+
|
| 526 |
+
### BibTeX
|
| 527 |
+
|
| 528 |
+
#### Sentence Transformers
|
| 529 |
+
```bibtex
|
| 530 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 531 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 532 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 533 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 534 |
+
month = "11",
|
| 535 |
+
year = "2019",
|
| 536 |
+
publisher = "Association for Computational Linguistics",
|
| 537 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 538 |
+
}
|
| 539 |
+
```
|
| 540 |
+
|
| 541 |
+
#### MultipleNegativesRankingLoss
|
| 542 |
+
```bibtex
|
| 543 |
+
@misc{henderson2017efficient,
|
| 544 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 545 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 546 |
+
year={2017},
|
| 547 |
+
eprint={1705.00652},
|
| 548 |
+
archivePrefix={arXiv},
|
| 549 |
+
primaryClass={cs.CL}
|
| 550 |
+
}
|
| 551 |
+
```
|
| 552 |
+
|
| 553 |
+
<!--
|
| 554 |
+
## Glossary
|
| 555 |
+
|
| 556 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 557 |
+
-->
|
| 558 |
+
|
| 559 |
+
<!--
|
| 560 |
+
## Model Card Authors
|
| 561 |
+
|
| 562 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 563 |
+
-->
|
| 564 |
+
|
| 565 |
+
<!--
|
| 566 |
+
## Model Card Contact
|
| 567 |
+
|
| 568 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 569 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"ModernBertModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 50281,
|
| 8 |
+
"classifier_activation": "gelu",
|
| 9 |
+
"classifier_bias": false,
|
| 10 |
+
"classifier_dropout": 0.0,
|
| 11 |
+
"classifier_pooling": "mean",
|
| 12 |
+
"cls_token_id": 50281,
|
| 13 |
+
"decoder_bias": true,
|
| 14 |
+
"deterministic_flash_attn": false,
|
| 15 |
+
"embedding_dropout": 0.0,
|
| 16 |
+
"eos_token_id": 50282,
|
| 17 |
+
"global_attn_every_n_layers": 3,
|
| 18 |
+
"global_rope_theta": 160000.0,
|
| 19 |
+
"gradient_checkpointing": false,
|
| 20 |
+
"hidden_activation": "gelu",
|
| 21 |
+
"hidden_size": 768,
|
| 22 |
+
"initializer_cutoff_factor": 2.0,
|
| 23 |
+
"initializer_range": 0.02,
|
| 24 |
+
"intermediate_size": 1152,
|
| 25 |
+
"layer_norm_eps": 1e-05,
|
| 26 |
+
"local_attention": 128,
|
| 27 |
+
"local_rope_theta": 10000.0,
|
| 28 |
+
"max_position_embeddings": 8192,
|
| 29 |
+
"mlp_bias": false,
|
| 30 |
+
"mlp_dropout": 0.0,
|
| 31 |
+
"model_type": "modernbert",
|
| 32 |
+
"norm_bias": false,
|
| 33 |
+
"norm_eps": 1e-05,
|
| 34 |
+
"num_attention_heads": 12,
|
| 35 |
+
"num_hidden_layers": 22,
|
| 36 |
+
"pad_token_id": 50283,
|
| 37 |
+
"position_embedding_type": "absolute",
|
| 38 |
+
"repad_logits_with_grad": false,
|
| 39 |
+
"sep_token_id": 50282,
|
| 40 |
+
"sparse_pred_ignore_index": -100,
|
| 41 |
+
"sparse_prediction": false,
|
| 42 |
+
"torch_dtype": "float32",
|
| 43 |
+
"transformers_version": "4.55.0",
|
| 44 |
+
"vocab_size": 50368
|
| 45 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.1.0",
|
| 4 |
+
"transformers": "4.55.0",
|
| 5 |
+
"pytorch": "2.8.0+cu128"
|
| 6 |
+
},
|
| 7 |
+
"prompts": {
|
| 8 |
+
"query": "",
|
| 9 |
+
"document": ""
|
| 10 |
+
},
|
| 11 |
+
"default_prompt_name": null,
|
| 12 |
+
"similarity_fn_name": "cosine",
|
| 13 |
+
"model_type": "SentenceTransformer"
|
| 14 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ac0bd0f9732c50703ab44f7523255a5f3af314abc9c63b0e3506f1d0c33e206
|
| 3 |
+
size 596070136
|
modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 8192,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": true,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,945 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "|||IP_ADDRESS|||",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": true,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": false
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<|padding|>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"50254": {
|
| 20 |
+
"content": " ",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": true,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": false
|
| 26 |
+
},
|
| 27 |
+
"50255": {
|
| 28 |
+
"content": " ",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": false
|
| 34 |
+
},
|
| 35 |
+
"50256": {
|
| 36 |
+
"content": " ",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": true,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": false
|
| 42 |
+
},
|
| 43 |
+
"50257": {
|
| 44 |
+
"content": " ",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": true,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": false
|
| 50 |
+
},
|
| 51 |
+
"50258": {
|
| 52 |
+
"content": " ",
|
| 53 |
+
"lstrip": false,
|
| 54 |
+
"normalized": true,
|
| 55 |
+
"rstrip": false,
|
| 56 |
+
"single_word": false,
|
| 57 |
+
"special": false
|
| 58 |
+
},
|
| 59 |
+
"50259": {
|
| 60 |
+
"content": " ",
|
| 61 |
+
"lstrip": false,
|
| 62 |
+
"normalized": true,
|
| 63 |
+
"rstrip": false,
|
| 64 |
+
"single_word": false,
|
| 65 |
+
"special": false
|
| 66 |
+
},
|
| 67 |
+
"50260": {
|
| 68 |
+
"content": " ",
|
| 69 |
+
"lstrip": false,
|
| 70 |
+
"normalized": true,
|
| 71 |
+
"rstrip": false,
|
| 72 |
+
"single_word": false,
|
| 73 |
+
"special": false
|
| 74 |
+
},
|
| 75 |
+
"50261": {
|
| 76 |
+
"content": " ",
|
| 77 |
+
"lstrip": false,
|
| 78 |
+
"normalized": true,
|
| 79 |
+
"rstrip": false,
|
| 80 |
+
"single_word": false,
|
| 81 |
+
"special": false
|
| 82 |
+
},
|
| 83 |
+
"50262": {
|
| 84 |
+
"content": " ",
|
| 85 |
+
"lstrip": false,
|
| 86 |
+
"normalized": true,
|
| 87 |
+
"rstrip": false,
|
| 88 |
+
"single_word": false,
|
| 89 |
+
"special": false
|
| 90 |
+
},
|
| 91 |
+
"50263": {
|
| 92 |
+
"content": " ",
|
| 93 |
+
"lstrip": false,
|
| 94 |
+
"normalized": true,
|
| 95 |
+
"rstrip": false,
|
| 96 |
+
"single_word": false,
|
| 97 |
+
"special": false
|
| 98 |
+
},
|
| 99 |
+
"50264": {
|
| 100 |
+
"content": " ",
|
| 101 |
+
"lstrip": false,
|
| 102 |
+
"normalized": true,
|
| 103 |
+
"rstrip": false,
|
| 104 |
+
"single_word": false,
|
| 105 |
+
"special": false
|
| 106 |
+
},
|
| 107 |
+
"50265": {
|
| 108 |
+
"content": " ",
|
| 109 |
+
"lstrip": false,
|
| 110 |
+
"normalized": true,
|
| 111 |
+
"rstrip": false,
|
| 112 |
+
"single_word": false,
|
| 113 |
+
"special": false
|
| 114 |
+
},
|
| 115 |
+
"50266": {
|
| 116 |
+
"content": " ",
|
| 117 |
+
"lstrip": false,
|
| 118 |
+
"normalized": true,
|
| 119 |
+
"rstrip": false,
|
| 120 |
+
"single_word": false,
|
| 121 |
+
"special": false
|
| 122 |
+
},
|
| 123 |
+
"50267": {
|
| 124 |
+
"content": " ",
|
| 125 |
+
"lstrip": false,
|
| 126 |
+
"normalized": true,
|
| 127 |
+
"rstrip": false,
|
| 128 |
+
"single_word": false,
|
| 129 |
+
"special": false
|
| 130 |
+
},
|
| 131 |
+
"50268": {
|
| 132 |
+
"content": " ",
|
| 133 |
+
"lstrip": false,
|
| 134 |
+
"normalized": true,
|
| 135 |
+
"rstrip": false,
|
| 136 |
+
"single_word": false,
|
| 137 |
+
"special": false
|
| 138 |
+
},
|
| 139 |
+
"50269": {
|
| 140 |
+
"content": " ",
|
| 141 |
+
"lstrip": false,
|
| 142 |
+
"normalized": true,
|
| 143 |
+
"rstrip": false,
|
| 144 |
+
"single_word": false,
|
| 145 |
+
"special": false
|
| 146 |
+
},
|
| 147 |
+
"50270": {
|
| 148 |
+
"content": " ",
|
| 149 |
+
"lstrip": false,
|
| 150 |
+
"normalized": true,
|
| 151 |
+
"rstrip": false,
|
| 152 |
+
"single_word": false,
|
| 153 |
+
"special": false
|
| 154 |
+
},
|
| 155 |
+
"50271": {
|
| 156 |
+
"content": " ",
|
| 157 |
+
"lstrip": false,
|
| 158 |
+
"normalized": true,
|
| 159 |
+
"rstrip": false,
|
| 160 |
+
"single_word": false,
|
| 161 |
+
"special": false
|
| 162 |
+
},
|
| 163 |
+
"50272": {
|
| 164 |
+
"content": " ",
|
| 165 |
+
"lstrip": false,
|
| 166 |
+
"normalized": true,
|
| 167 |
+
"rstrip": false,
|
| 168 |
+
"single_word": false,
|
| 169 |
+
"special": false
|
| 170 |
+
},
|
| 171 |
+
"50273": {
|
| 172 |
+
"content": " ",
|
| 173 |
+
"lstrip": false,
|
| 174 |
+
"normalized": true,
|
| 175 |
+
"rstrip": false,
|
| 176 |
+
"single_word": false,
|
| 177 |
+
"special": false
|
| 178 |
+
},
|
| 179 |
+
"50274": {
|
| 180 |
+
"content": " ",
|
| 181 |
+
"lstrip": false,
|
| 182 |
+
"normalized": true,
|
| 183 |
+
"rstrip": false,
|
| 184 |
+
"single_word": false,
|
| 185 |
+
"special": false
|
| 186 |
+
},
|
| 187 |
+
"50275": {
|
| 188 |
+
"content": " ",
|
| 189 |
+
"lstrip": false,
|
| 190 |
+
"normalized": true,
|
| 191 |
+
"rstrip": false,
|
| 192 |
+
"single_word": false,
|
| 193 |
+
"special": false
|
| 194 |
+
},
|
| 195 |
+
"50276": {
|
| 196 |
+
"content": " ",
|
| 197 |
+
"lstrip": false,
|
| 198 |
+
"normalized": true,
|
| 199 |
+
"rstrip": false,
|
| 200 |
+
"single_word": false,
|
| 201 |
+
"special": false
|
| 202 |
+
},
|
| 203 |
+
"50277": {
|
| 204 |
+
"content": "|||EMAIL_ADDRESS|||",
|
| 205 |
+
"lstrip": false,
|
| 206 |
+
"normalized": true,
|
| 207 |
+
"rstrip": false,
|
| 208 |
+
"single_word": false,
|
| 209 |
+
"special": false
|
| 210 |
+
},
|
| 211 |
+
"50278": {
|
| 212 |
+
"content": "|||PHONE_NUMBER|||",
|
| 213 |
+
"lstrip": false,
|
| 214 |
+
"normalized": true,
|
| 215 |
+
"rstrip": false,
|
| 216 |
+
"single_word": false,
|
| 217 |
+
"special": false
|
| 218 |
+
},
|
| 219 |
+
"50279": {
|
| 220 |
+
"content": "<|endoftext|>",
|
| 221 |
+
"lstrip": false,
|
| 222 |
+
"normalized": false,
|
| 223 |
+
"rstrip": false,
|
| 224 |
+
"single_word": false,
|
| 225 |
+
"special": true
|
| 226 |
+
},
|
| 227 |
+
"50280": {
|
| 228 |
+
"content": "[UNK]",
|
| 229 |
+
"lstrip": false,
|
| 230 |
+
"normalized": false,
|
| 231 |
+
"rstrip": false,
|
| 232 |
+
"single_word": false,
|
| 233 |
+
"special": true
|
| 234 |
+
},
|
| 235 |
+
"50281": {
|
| 236 |
+
"content": "[CLS]",
|
| 237 |
+
"lstrip": false,
|
| 238 |
+
"normalized": false,
|
| 239 |
+
"rstrip": false,
|
| 240 |
+
"single_word": false,
|
| 241 |
+
"special": true
|
| 242 |
+
},
|
| 243 |
+
"50282": {
|
| 244 |
+
"content": "[SEP]",
|
| 245 |
+
"lstrip": false,
|
| 246 |
+
"normalized": false,
|
| 247 |
+
"rstrip": false,
|
| 248 |
+
"single_word": false,
|
| 249 |
+
"special": true
|
| 250 |
+
},
|
| 251 |
+
"50283": {
|
| 252 |
+
"content": "[PAD]",
|
| 253 |
+
"lstrip": false,
|
| 254 |
+
"normalized": false,
|
| 255 |
+
"rstrip": false,
|
| 256 |
+
"single_word": false,
|
| 257 |
+
"special": true
|
| 258 |
+
},
|
| 259 |
+
"50284": {
|
| 260 |
+
"content": "[MASK]",
|
| 261 |
+
"lstrip": true,
|
| 262 |
+
"normalized": false,
|
| 263 |
+
"rstrip": false,
|
| 264 |
+
"single_word": false,
|
| 265 |
+
"special": true
|
| 266 |
+
},
|
| 267 |
+
"50285": {
|
| 268 |
+
"content": "[unused0]",
|
| 269 |
+
"lstrip": false,
|
| 270 |
+
"normalized": true,
|
| 271 |
+
"rstrip": false,
|
| 272 |
+
"single_word": false,
|
| 273 |
+
"special": false
|
| 274 |
+
},
|
| 275 |
+
"50286": {
|
| 276 |
+
"content": "[unused1]",
|
| 277 |
+
"lstrip": false,
|
| 278 |
+
"normalized": true,
|
| 279 |
+
"rstrip": false,
|
| 280 |
+
"single_word": false,
|
| 281 |
+
"special": false
|
| 282 |
+
},
|
| 283 |
+
"50287": {
|
| 284 |
+
"content": "[unused2]",
|
| 285 |
+
"lstrip": false,
|
| 286 |
+
"normalized": true,
|
| 287 |
+
"rstrip": false,
|
| 288 |
+
"single_word": false,
|
| 289 |
+
"special": false
|
| 290 |
+
},
|
| 291 |
+
"50288": {
|
| 292 |
+
"content": "[unused3]",
|
| 293 |
+
"lstrip": false,
|
| 294 |
+
"normalized": true,
|
| 295 |
+
"rstrip": false,
|
| 296 |
+
"single_word": false,
|
| 297 |
+
"special": false
|
| 298 |
+
},
|
| 299 |
+
"50289": {
|
| 300 |
+
"content": "[unused4]",
|
| 301 |
+
"lstrip": false,
|
| 302 |
+
"normalized": true,
|
| 303 |
+
"rstrip": false,
|
| 304 |
+
"single_word": false,
|
| 305 |
+
"special": false
|
| 306 |
+
},
|
| 307 |
+
"50290": {
|
| 308 |
+
"content": "[unused5]",
|
| 309 |
+
"lstrip": false,
|
| 310 |
+
"normalized": true,
|
| 311 |
+
"rstrip": false,
|
| 312 |
+
"single_word": false,
|
| 313 |
+
"special": false
|
| 314 |
+
},
|
| 315 |
+
"50291": {
|
| 316 |
+
"content": "[unused6]",
|
| 317 |
+
"lstrip": false,
|
| 318 |
+
"normalized": true,
|
| 319 |
+
"rstrip": false,
|
| 320 |
+
"single_word": false,
|
| 321 |
+
"special": false
|
| 322 |
+
},
|
| 323 |
+
"50292": {
|
| 324 |
+
"content": "[unused7]",
|
| 325 |
+
"lstrip": false,
|
| 326 |
+
"normalized": true,
|
| 327 |
+
"rstrip": false,
|
| 328 |
+
"single_word": false,
|
| 329 |
+
"special": false
|
| 330 |
+
},
|
| 331 |
+
"50293": {
|
| 332 |
+
"content": "[unused8]",
|
| 333 |
+
"lstrip": false,
|
| 334 |
+
"normalized": true,
|
| 335 |
+
"rstrip": false,
|
| 336 |
+
"single_word": false,
|
| 337 |
+
"special": false
|
| 338 |
+
},
|
| 339 |
+
"50294": {
|
| 340 |
+
"content": "[unused9]",
|
| 341 |
+
"lstrip": false,
|
| 342 |
+
"normalized": true,
|
| 343 |
+
"rstrip": false,
|
| 344 |
+
"single_word": false,
|
| 345 |
+
"special": false
|
| 346 |
+
},
|
| 347 |
+
"50295": {
|
| 348 |
+
"content": "[unused10]",
|
| 349 |
+
"lstrip": false,
|
| 350 |
+
"normalized": true,
|
| 351 |
+
"rstrip": false,
|
| 352 |
+
"single_word": false,
|
| 353 |
+
"special": false
|
| 354 |
+
},
|
| 355 |
+
"50296": {
|
| 356 |
+
"content": "[unused11]",
|
| 357 |
+
"lstrip": false,
|
| 358 |
+
"normalized": true,
|
| 359 |
+
"rstrip": false,
|
| 360 |
+
"single_word": false,
|
| 361 |
+
"special": false
|
| 362 |
+
},
|
| 363 |
+
"50297": {
|
| 364 |
+
"content": "[unused12]",
|
| 365 |
+
"lstrip": false,
|
| 366 |
+
"normalized": true,
|
| 367 |
+
"rstrip": false,
|
| 368 |
+
"single_word": false,
|
| 369 |
+
"special": false
|
| 370 |
+
},
|
| 371 |
+
"50298": {
|
| 372 |
+
"content": "[unused13]",
|
| 373 |
+
"lstrip": false,
|
| 374 |
+
"normalized": true,
|
| 375 |
+
"rstrip": false,
|
| 376 |
+
"single_word": false,
|
| 377 |
+
"special": false
|
| 378 |
+
},
|
| 379 |
+
"50299": {
|
| 380 |
+
"content": "[unused14]",
|
| 381 |
+
"lstrip": false,
|
| 382 |
+
"normalized": true,
|
| 383 |
+
"rstrip": false,
|
| 384 |
+
"single_word": false,
|
| 385 |
+
"special": false
|
| 386 |
+
},
|
| 387 |
+
"50300": {
|
| 388 |
+
"content": "[unused15]",
|
| 389 |
+
"lstrip": false,
|
| 390 |
+
"normalized": true,
|
| 391 |
+
"rstrip": false,
|
| 392 |
+
"single_word": false,
|
| 393 |
+
"special": false
|
| 394 |
+
},
|
| 395 |
+
"50301": {
|
| 396 |
+
"content": "[unused16]",
|
| 397 |
+
"lstrip": false,
|
| 398 |
+
"normalized": true,
|
| 399 |
+
"rstrip": false,
|
| 400 |
+
"single_word": false,
|
| 401 |
+
"special": false
|
| 402 |
+
},
|
| 403 |
+
"50302": {
|
| 404 |
+
"content": "[unused17]",
|
| 405 |
+
"lstrip": false,
|
| 406 |
+
"normalized": true,
|
| 407 |
+
"rstrip": false,
|
| 408 |
+
"single_word": false,
|
| 409 |
+
"special": false
|
| 410 |
+
},
|
| 411 |
+
"50303": {
|
| 412 |
+
"content": "[unused18]",
|
| 413 |
+
"lstrip": false,
|
| 414 |
+
"normalized": true,
|
| 415 |
+
"rstrip": false,
|
| 416 |
+
"single_word": false,
|
| 417 |
+
"special": false
|
| 418 |
+
},
|
| 419 |
+
"50304": {
|
| 420 |
+
"content": "[unused19]",
|
| 421 |
+
"lstrip": false,
|
| 422 |
+
"normalized": true,
|
| 423 |
+
"rstrip": false,
|
| 424 |
+
"single_word": false,
|
| 425 |
+
"special": false
|
| 426 |
+
},
|
| 427 |
+
"50305": {
|
| 428 |
+
"content": "[unused20]",
|
| 429 |
+
"lstrip": false,
|
| 430 |
+
"normalized": true,
|
| 431 |
+
"rstrip": false,
|
| 432 |
+
"single_word": false,
|
| 433 |
+
"special": false
|
| 434 |
+
},
|
| 435 |
+
"50306": {
|
| 436 |
+
"content": "[unused21]",
|
| 437 |
+
"lstrip": false,
|
| 438 |
+
"normalized": true,
|
| 439 |
+
"rstrip": false,
|
| 440 |
+
"single_word": false,
|
| 441 |
+
"special": false
|
| 442 |
+
},
|
| 443 |
+
"50307": {
|
| 444 |
+
"content": "[unused22]",
|
| 445 |
+
"lstrip": false,
|
| 446 |
+
"normalized": true,
|
| 447 |
+
"rstrip": false,
|
| 448 |
+
"single_word": false,
|
| 449 |
+
"special": false
|
| 450 |
+
},
|
| 451 |
+
"50308": {
|
| 452 |
+
"content": "[unused23]",
|
| 453 |
+
"lstrip": false,
|
| 454 |
+
"normalized": true,
|
| 455 |
+
"rstrip": false,
|
| 456 |
+
"single_word": false,
|
| 457 |
+
"special": false
|
| 458 |
+
},
|
| 459 |
+
"50309": {
|
| 460 |
+
"content": "[unused24]",
|
| 461 |
+
"lstrip": false,
|
| 462 |
+
"normalized": true,
|
| 463 |
+
"rstrip": false,
|
| 464 |
+
"single_word": false,
|
| 465 |
+
"special": false
|
| 466 |
+
},
|
| 467 |
+
"50310": {
|
| 468 |
+
"content": "[unused25]",
|
| 469 |
+
"lstrip": false,
|
| 470 |
+
"normalized": true,
|
| 471 |
+
"rstrip": false,
|
| 472 |
+
"single_word": false,
|
| 473 |
+
"special": false
|
| 474 |
+
},
|
| 475 |
+
"50311": {
|
| 476 |
+
"content": "[unused26]",
|
| 477 |
+
"lstrip": false,
|
| 478 |
+
"normalized": true,
|
| 479 |
+
"rstrip": false,
|
| 480 |
+
"single_word": false,
|
| 481 |
+
"special": false
|
| 482 |
+
},
|
| 483 |
+
"50312": {
|
| 484 |
+
"content": "[unused27]",
|
| 485 |
+
"lstrip": false,
|
| 486 |
+
"normalized": true,
|
| 487 |
+
"rstrip": false,
|
| 488 |
+
"single_word": false,
|
| 489 |
+
"special": false
|
| 490 |
+
},
|
| 491 |
+
"50313": {
|
| 492 |
+
"content": "[unused28]",
|
| 493 |
+
"lstrip": false,
|
| 494 |
+
"normalized": true,
|
| 495 |
+
"rstrip": false,
|
| 496 |
+
"single_word": false,
|
| 497 |
+
"special": false
|
| 498 |
+
},
|
| 499 |
+
"50314": {
|
| 500 |
+
"content": "[unused29]",
|
| 501 |
+
"lstrip": false,
|
| 502 |
+
"normalized": true,
|
| 503 |
+
"rstrip": false,
|
| 504 |
+
"single_word": false,
|
| 505 |
+
"special": false
|
| 506 |
+
},
|
| 507 |
+
"50315": {
|
| 508 |
+
"content": "[unused30]",
|
| 509 |
+
"lstrip": false,
|
| 510 |
+
"normalized": true,
|
| 511 |
+
"rstrip": false,
|
| 512 |
+
"single_word": false,
|
| 513 |
+
"special": false
|
| 514 |
+
},
|
| 515 |
+
"50316": {
|
| 516 |
+
"content": "[unused31]",
|
| 517 |
+
"lstrip": false,
|
| 518 |
+
"normalized": true,
|
| 519 |
+
"rstrip": false,
|
| 520 |
+
"single_word": false,
|
| 521 |
+
"special": false
|
| 522 |
+
},
|
| 523 |
+
"50317": {
|
| 524 |
+
"content": "[unused32]",
|
| 525 |
+
"lstrip": false,
|
| 526 |
+
"normalized": true,
|
| 527 |
+
"rstrip": false,
|
| 528 |
+
"single_word": false,
|
| 529 |
+
"special": false
|
| 530 |
+
},
|
| 531 |
+
"50318": {
|
| 532 |
+
"content": "[unused33]",
|
| 533 |
+
"lstrip": false,
|
| 534 |
+
"normalized": true,
|
| 535 |
+
"rstrip": false,
|
| 536 |
+
"single_word": false,
|
| 537 |
+
"special": false
|
| 538 |
+
},
|
| 539 |
+
"50319": {
|
| 540 |
+
"content": "[unused34]",
|
| 541 |
+
"lstrip": false,
|
| 542 |
+
"normalized": true,
|
| 543 |
+
"rstrip": false,
|
| 544 |
+
"single_word": false,
|
| 545 |
+
"special": false
|
| 546 |
+
},
|
| 547 |
+
"50320": {
|
| 548 |
+
"content": "[unused35]",
|
| 549 |
+
"lstrip": false,
|
| 550 |
+
"normalized": true,
|
| 551 |
+
"rstrip": false,
|
| 552 |
+
"single_word": false,
|
| 553 |
+
"special": false
|
| 554 |
+
},
|
| 555 |
+
"50321": {
|
| 556 |
+
"content": "[unused36]",
|
| 557 |
+
"lstrip": false,
|
| 558 |
+
"normalized": true,
|
| 559 |
+
"rstrip": false,
|
| 560 |
+
"single_word": false,
|
| 561 |
+
"special": false
|
| 562 |
+
},
|
| 563 |
+
"50322": {
|
| 564 |
+
"content": "[unused37]",
|
| 565 |
+
"lstrip": false,
|
| 566 |
+
"normalized": true,
|
| 567 |
+
"rstrip": false,
|
| 568 |
+
"single_word": false,
|
| 569 |
+
"special": false
|
| 570 |
+
},
|
| 571 |
+
"50323": {
|
| 572 |
+
"content": "[unused38]",
|
| 573 |
+
"lstrip": false,
|
| 574 |
+
"normalized": true,
|
| 575 |
+
"rstrip": false,
|
| 576 |
+
"single_word": false,
|
| 577 |
+
"special": false
|
| 578 |
+
},
|
| 579 |
+
"50324": {
|
| 580 |
+
"content": "[unused39]",
|
| 581 |
+
"lstrip": false,
|
| 582 |
+
"normalized": true,
|
| 583 |
+
"rstrip": false,
|
| 584 |
+
"single_word": false,
|
| 585 |
+
"special": false
|
| 586 |
+
},
|
| 587 |
+
"50325": {
|
| 588 |
+
"content": "[unused40]",
|
| 589 |
+
"lstrip": false,
|
| 590 |
+
"normalized": true,
|
| 591 |
+
"rstrip": false,
|
| 592 |
+
"single_word": false,
|
| 593 |
+
"special": false
|
| 594 |
+
},
|
| 595 |
+
"50326": {
|
| 596 |
+
"content": "[unused41]",
|
| 597 |
+
"lstrip": false,
|
| 598 |
+
"normalized": true,
|
| 599 |
+
"rstrip": false,
|
| 600 |
+
"single_word": false,
|
| 601 |
+
"special": false
|
| 602 |
+
},
|
| 603 |
+
"50327": {
|
| 604 |
+
"content": "[unused42]",
|
| 605 |
+
"lstrip": false,
|
| 606 |
+
"normalized": true,
|
| 607 |
+
"rstrip": false,
|
| 608 |
+
"single_word": false,
|
| 609 |
+
"special": false
|
| 610 |
+
},
|
| 611 |
+
"50328": {
|
| 612 |
+
"content": "[unused43]",
|
| 613 |
+
"lstrip": false,
|
| 614 |
+
"normalized": true,
|
| 615 |
+
"rstrip": false,
|
| 616 |
+
"single_word": false,
|
| 617 |
+
"special": false
|
| 618 |
+
},
|
| 619 |
+
"50329": {
|
| 620 |
+
"content": "[unused44]",
|
| 621 |
+
"lstrip": false,
|
| 622 |
+
"normalized": true,
|
| 623 |
+
"rstrip": false,
|
| 624 |
+
"single_word": false,
|
| 625 |
+
"special": false
|
| 626 |
+
},
|
| 627 |
+
"50330": {
|
| 628 |
+
"content": "[unused45]",
|
| 629 |
+
"lstrip": false,
|
| 630 |
+
"normalized": true,
|
| 631 |
+
"rstrip": false,
|
| 632 |
+
"single_word": false,
|
| 633 |
+
"special": false
|
| 634 |
+
},
|
| 635 |
+
"50331": {
|
| 636 |
+
"content": "[unused46]",
|
| 637 |
+
"lstrip": false,
|
| 638 |
+
"normalized": true,
|
| 639 |
+
"rstrip": false,
|
| 640 |
+
"single_word": false,
|
| 641 |
+
"special": false
|
| 642 |
+
},
|
| 643 |
+
"50332": {
|
| 644 |
+
"content": "[unused47]",
|
| 645 |
+
"lstrip": false,
|
| 646 |
+
"normalized": true,
|
| 647 |
+
"rstrip": false,
|
| 648 |
+
"single_word": false,
|
| 649 |
+
"special": false
|
| 650 |
+
},
|
| 651 |
+
"50333": {
|
| 652 |
+
"content": "[unused48]",
|
| 653 |
+
"lstrip": false,
|
| 654 |
+
"normalized": true,
|
| 655 |
+
"rstrip": false,
|
| 656 |
+
"single_word": false,
|
| 657 |
+
"special": false
|
| 658 |
+
},
|
| 659 |
+
"50334": {
|
| 660 |
+
"content": "[unused49]",
|
| 661 |
+
"lstrip": false,
|
| 662 |
+
"normalized": true,
|
| 663 |
+
"rstrip": false,
|
| 664 |
+
"single_word": false,
|
| 665 |
+
"special": false
|
| 666 |
+
},
|
| 667 |
+
"50335": {
|
| 668 |
+
"content": "[unused50]",
|
| 669 |
+
"lstrip": false,
|
| 670 |
+
"normalized": true,
|
| 671 |
+
"rstrip": false,
|
| 672 |
+
"single_word": false,
|
| 673 |
+
"special": false
|
| 674 |
+
},
|
| 675 |
+
"50336": {
|
| 676 |
+
"content": "[unused51]",
|
| 677 |
+
"lstrip": false,
|
| 678 |
+
"normalized": true,
|
| 679 |
+
"rstrip": false,
|
| 680 |
+
"single_word": false,
|
| 681 |
+
"special": false
|
| 682 |
+
},
|
| 683 |
+
"50337": {
|
| 684 |
+
"content": "[unused52]",
|
| 685 |
+
"lstrip": false,
|
| 686 |
+
"normalized": true,
|
| 687 |
+
"rstrip": false,
|
| 688 |
+
"single_word": false,
|
| 689 |
+
"special": false
|
| 690 |
+
},
|
| 691 |
+
"50338": {
|
| 692 |
+
"content": "[unused53]",
|
| 693 |
+
"lstrip": false,
|
| 694 |
+
"normalized": true,
|
| 695 |
+
"rstrip": false,
|
| 696 |
+
"single_word": false,
|
| 697 |
+
"special": false
|
| 698 |
+
},
|
| 699 |
+
"50339": {
|
| 700 |
+
"content": "[unused54]",
|
| 701 |
+
"lstrip": false,
|
| 702 |
+
"normalized": true,
|
| 703 |
+
"rstrip": false,
|
| 704 |
+
"single_word": false,
|
| 705 |
+
"special": false
|
| 706 |
+
},
|
| 707 |
+
"50340": {
|
| 708 |
+
"content": "[unused55]",
|
| 709 |
+
"lstrip": false,
|
| 710 |
+
"normalized": true,
|
| 711 |
+
"rstrip": false,
|
| 712 |
+
"single_word": false,
|
| 713 |
+
"special": false
|
| 714 |
+
},
|
| 715 |
+
"50341": {
|
| 716 |
+
"content": "[unused56]",
|
| 717 |
+
"lstrip": false,
|
| 718 |
+
"normalized": true,
|
| 719 |
+
"rstrip": false,
|
| 720 |
+
"single_word": false,
|
| 721 |
+
"special": false
|
| 722 |
+
},
|
| 723 |
+
"50342": {
|
| 724 |
+
"content": "[unused57]",
|
| 725 |
+
"lstrip": false,
|
| 726 |
+
"normalized": true,
|
| 727 |
+
"rstrip": false,
|
| 728 |
+
"single_word": false,
|
| 729 |
+
"special": false
|
| 730 |
+
},
|
| 731 |
+
"50343": {
|
| 732 |
+
"content": "[unused58]",
|
| 733 |
+
"lstrip": false,
|
| 734 |
+
"normalized": true,
|
| 735 |
+
"rstrip": false,
|
| 736 |
+
"single_word": false,
|
| 737 |
+
"special": false
|
| 738 |
+
},
|
| 739 |
+
"50344": {
|
| 740 |
+
"content": "[unused59]",
|
| 741 |
+
"lstrip": false,
|
| 742 |
+
"normalized": true,
|
| 743 |
+
"rstrip": false,
|
| 744 |
+
"single_word": false,
|
| 745 |
+
"special": false
|
| 746 |
+
},
|
| 747 |
+
"50345": {
|
| 748 |
+
"content": "[unused60]",
|
| 749 |
+
"lstrip": false,
|
| 750 |
+
"normalized": true,
|
| 751 |
+
"rstrip": false,
|
| 752 |
+
"single_word": false,
|
| 753 |
+
"special": false
|
| 754 |
+
},
|
| 755 |
+
"50346": {
|
| 756 |
+
"content": "[unused61]",
|
| 757 |
+
"lstrip": false,
|
| 758 |
+
"normalized": true,
|
| 759 |
+
"rstrip": false,
|
| 760 |
+
"single_word": false,
|
| 761 |
+
"special": false
|
| 762 |
+
},
|
| 763 |
+
"50347": {
|
| 764 |
+
"content": "[unused62]",
|
| 765 |
+
"lstrip": false,
|
| 766 |
+
"normalized": true,
|
| 767 |
+
"rstrip": false,
|
| 768 |
+
"single_word": false,
|
| 769 |
+
"special": false
|
| 770 |
+
},
|
| 771 |
+
"50348": {
|
| 772 |
+
"content": "[unused63]",
|
| 773 |
+
"lstrip": false,
|
| 774 |
+
"normalized": true,
|
| 775 |
+
"rstrip": false,
|
| 776 |
+
"single_word": false,
|
| 777 |
+
"special": false
|
| 778 |
+
},
|
| 779 |
+
"50349": {
|
| 780 |
+
"content": "[unused64]",
|
| 781 |
+
"lstrip": false,
|
| 782 |
+
"normalized": true,
|
| 783 |
+
"rstrip": false,
|
| 784 |
+
"single_word": false,
|
| 785 |
+
"special": false
|
| 786 |
+
},
|
| 787 |
+
"50350": {
|
| 788 |
+
"content": "[unused65]",
|
| 789 |
+
"lstrip": false,
|
| 790 |
+
"normalized": true,
|
| 791 |
+
"rstrip": false,
|
| 792 |
+
"single_word": false,
|
| 793 |
+
"special": false
|
| 794 |
+
},
|
| 795 |
+
"50351": {
|
| 796 |
+
"content": "[unused66]",
|
| 797 |
+
"lstrip": false,
|
| 798 |
+
"normalized": true,
|
| 799 |
+
"rstrip": false,
|
| 800 |
+
"single_word": false,
|
| 801 |
+
"special": false
|
| 802 |
+
},
|
| 803 |
+
"50352": {
|
| 804 |
+
"content": "[unused67]",
|
| 805 |
+
"lstrip": false,
|
| 806 |
+
"normalized": true,
|
| 807 |
+
"rstrip": false,
|
| 808 |
+
"single_word": false,
|
| 809 |
+
"special": false
|
| 810 |
+
},
|
| 811 |
+
"50353": {
|
| 812 |
+
"content": "[unused68]",
|
| 813 |
+
"lstrip": false,
|
| 814 |
+
"normalized": true,
|
| 815 |
+
"rstrip": false,
|
| 816 |
+
"single_word": false,
|
| 817 |
+
"special": false
|
| 818 |
+
},
|
| 819 |
+
"50354": {
|
| 820 |
+
"content": "[unused69]",
|
| 821 |
+
"lstrip": false,
|
| 822 |
+
"normalized": true,
|
| 823 |
+
"rstrip": false,
|
| 824 |
+
"single_word": false,
|
| 825 |
+
"special": false
|
| 826 |
+
},
|
| 827 |
+
"50355": {
|
| 828 |
+
"content": "[unused70]",
|
| 829 |
+
"lstrip": false,
|
| 830 |
+
"normalized": true,
|
| 831 |
+
"rstrip": false,
|
| 832 |
+
"single_word": false,
|
| 833 |
+
"special": false
|
| 834 |
+
},
|
| 835 |
+
"50356": {
|
| 836 |
+
"content": "[unused71]",
|
| 837 |
+
"lstrip": false,
|
| 838 |
+
"normalized": true,
|
| 839 |
+
"rstrip": false,
|
| 840 |
+
"single_word": false,
|
| 841 |
+
"special": false
|
| 842 |
+
},
|
| 843 |
+
"50357": {
|
| 844 |
+
"content": "[unused72]",
|
| 845 |
+
"lstrip": false,
|
| 846 |
+
"normalized": true,
|
| 847 |
+
"rstrip": false,
|
| 848 |
+
"single_word": false,
|
| 849 |
+
"special": false
|
| 850 |
+
},
|
| 851 |
+
"50358": {
|
| 852 |
+
"content": "[unused73]",
|
| 853 |
+
"lstrip": false,
|
| 854 |
+
"normalized": true,
|
| 855 |
+
"rstrip": false,
|
| 856 |
+
"single_word": false,
|
| 857 |
+
"special": false
|
| 858 |
+
},
|
| 859 |
+
"50359": {
|
| 860 |
+
"content": "[unused74]",
|
| 861 |
+
"lstrip": false,
|
| 862 |
+
"normalized": true,
|
| 863 |
+
"rstrip": false,
|
| 864 |
+
"single_word": false,
|
| 865 |
+
"special": false
|
| 866 |
+
},
|
| 867 |
+
"50360": {
|
| 868 |
+
"content": "[unused75]",
|
| 869 |
+
"lstrip": false,
|
| 870 |
+
"normalized": true,
|
| 871 |
+
"rstrip": false,
|
| 872 |
+
"single_word": false,
|
| 873 |
+
"special": false
|
| 874 |
+
},
|
| 875 |
+
"50361": {
|
| 876 |
+
"content": "[unused76]",
|
| 877 |
+
"lstrip": false,
|
| 878 |
+
"normalized": true,
|
| 879 |
+
"rstrip": false,
|
| 880 |
+
"single_word": false,
|
| 881 |
+
"special": false
|
| 882 |
+
},
|
| 883 |
+
"50362": {
|
| 884 |
+
"content": "[unused77]",
|
| 885 |
+
"lstrip": false,
|
| 886 |
+
"normalized": true,
|
| 887 |
+
"rstrip": false,
|
| 888 |
+
"single_word": false,
|
| 889 |
+
"special": false
|
| 890 |
+
},
|
| 891 |
+
"50363": {
|
| 892 |
+
"content": "[unused78]",
|
| 893 |
+
"lstrip": false,
|
| 894 |
+
"normalized": true,
|
| 895 |
+
"rstrip": false,
|
| 896 |
+
"single_word": false,
|
| 897 |
+
"special": false
|
| 898 |
+
},
|
| 899 |
+
"50364": {
|
| 900 |
+
"content": "[unused79]",
|
| 901 |
+
"lstrip": false,
|
| 902 |
+
"normalized": true,
|
| 903 |
+
"rstrip": false,
|
| 904 |
+
"single_word": false,
|
| 905 |
+
"special": false
|
| 906 |
+
},
|
| 907 |
+
"50365": {
|
| 908 |
+
"content": "[unused80]",
|
| 909 |
+
"lstrip": false,
|
| 910 |
+
"normalized": true,
|
| 911 |
+
"rstrip": false,
|
| 912 |
+
"single_word": false,
|
| 913 |
+
"special": false
|
| 914 |
+
},
|
| 915 |
+
"50366": {
|
| 916 |
+
"content": "[unused81]",
|
| 917 |
+
"lstrip": false,
|
| 918 |
+
"normalized": true,
|
| 919 |
+
"rstrip": false,
|
| 920 |
+
"single_word": false,
|
| 921 |
+
"special": false
|
| 922 |
+
},
|
| 923 |
+
"50367": {
|
| 924 |
+
"content": "[unused82]",
|
| 925 |
+
"lstrip": false,
|
| 926 |
+
"normalized": true,
|
| 927 |
+
"rstrip": false,
|
| 928 |
+
"single_word": false,
|
| 929 |
+
"special": false
|
| 930 |
+
}
|
| 931 |
+
},
|
| 932 |
+
"clean_up_tokenization_spaces": true,
|
| 933 |
+
"cls_token": "[CLS]",
|
| 934 |
+
"extra_special_tokens": {},
|
| 935 |
+
"mask_token": "[MASK]",
|
| 936 |
+
"model_input_names": [
|
| 937 |
+
"input_ids",
|
| 938 |
+
"attention_mask"
|
| 939 |
+
],
|
| 940 |
+
"model_max_length": 8192,
|
| 941 |
+
"pad_token": "[PAD]",
|
| 942 |
+
"sep_token": "[SEP]",
|
| 943 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 944 |
+
"unk_token": "[UNK]"
|
| 945 |
+
}
|