Upload folder using huggingface_hub
Browse files- 1_Pooling/config.json +10 -0
- README.md +786 -0
- config.json +36 -0
- config_sentence_transformers.json +14 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- tokenizer.json +0 -0
- tokenizer_config.json +17 -0
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": true,
|
| 4 |
+
"pooling_mode_mean_tokens": false,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,786 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:88
|
| 9 |
+
- loss:CachedMultipleNegativesRankingLoss
|
| 10 |
+
base_model: BAAI/bge-base-en-v1.5
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: the nearest technology center under tcsp is too far and fully booked
|
| 13 |
+
so i can t access their advanced machinery skill training or tech advisory for
|
| 14 |
+
upgrading my production line. local msmes like mine are losing competitiveness
|
| 15 |
+
without nearby centers in delhi region. please set up or expand tcsp facilities
|
| 16 |
+
closer and prioritize slots for small units to adopt modern tech fast. issue non-availability
|
| 17 |
+
of nearby technology center under tcsp context the user is reporting that the
|
| 18 |
+
nearest technology center under tcsp is too far and fully booked which is hindering
|
| 19 |
+
access to advanced machinery skill training and tech advisory for upgrading the
|
| 20 |
+
production line and is requesting the establishment or expansion of tcsp facilities
|
| 21 |
+
closer to the delhi region to prioritize slots for small units. details - location
|
| 22 |
+
delhi region requirement establishment or expansion of tcsp facilities priority
|
| 23 |
+
slots for small units
|
| 24 |
+
sentences:
|
| 25 |
+
- Technology, Quality and Institutions. Testing, Quality, Testing Center. this category
|
| 26 |
+
encompasses grievances related to msmes micro small and medium enterprises inability
|
| 27 |
+
to access utilize or rely on government-recognized testing calibration inspection
|
| 28 |
+
or certification services required for regulatory compliance tenders gem listing
|
| 29 |
+
or exports. the category covers a range of issues including delays in the issuance
|
| 30 |
+
of test reports despite samples being submitted and fees paid denial or non-issuance
|
| 31 |
+
of quality or conformity certificates without clear reasons difficulties accessing
|
| 32 |
+
testing or calibration facilities due to - capacity constraints - administrative
|
| 33 |
+
refusal - non-functional equipment procedural and system-level barriers such as
|
| 34 |
+
- unclear or changing documentation requirements - portal mismatches - fees paid
|
| 35 |
+
but testing not scheduled situations where business losses occur due to market
|
| 36 |
+
access being blocked due to pending testing or certification at authorized labs
|
| 37 |
+
or msme testing centers. example issues include testing completed and fees paid
|
| 38 |
+
but test report is not issued even after many weeks quality certification was
|
| 39 |
+
rejected without written reasons despite compliance with guidelines testing center
|
| 40 |
+
is refusing to accept samples citing workload while deadlines are approaching
|
| 41 |
+
fees paid online but testing not scheduled due to portal or procedural issues
|
| 42 |
+
tender or export shipment is blocked because the required test certificate is
|
| 43 |
+
still pending at the testing lab. the purpose of this category is to capture grievances
|
| 44 |
+
related to the operational procedural policy or institutional causes that hinder
|
| 45 |
+
msmes access to government-recognized testing calibration inspection or certification
|
| 46 |
+
services. the category aims to identify and address the root causes of these issues
|
| 47 |
+
including capacity constraints at testing facilities inade
|
| 48 |
+
- Policy and Schemes. Related to MSME Scheme. this category encompasses grievances
|
| 49 |
+
related to central sector schemes directly administered by the ministry of micro
|
| 50 |
+
small and medium enterprises momsme where the ministry itself serves as the implementing
|
| 51 |
+
authority. the category includes schemes such as zero defect zero effect zed credit
|
| 52 |
+
linked capital subsidy scheme clcss lean manufacturing and other centrally managed
|
| 53 |
+
msme support programs. it covers cases where msmes have applied for scheme benefits
|
| 54 |
+
or subsidies received approvals or completed required assessments or certifications
|
| 55 |
+
but the approved financial assistance has not been released or credited. the category
|
| 56 |
+
also captures grievances where claims submitted under ministry-run schemes for
|
| 57 |
+
incentives reimbursements or financial support remain pending for extended periods
|
| 58 |
+
or are rejected without clear or consistent justification. this includes cases
|
| 59 |
+
of rejection due to alleged documentation gaps system-generated ineligibility
|
| 60 |
+
flags disputes over eligible machinery or activities and delays caused by human
|
| 61 |
+
or system-level verification failures. additionally the category includes grievances
|
| 62 |
+
arising from ambiguity or confusion regarding scheme eligibility scope or applicability
|
| 63 |
+
such as uncertainty over mandatory certifications eligibility of second-hand versus
|
| 64 |
+
new machinery applicability to service enterprises or inconsistent interpretations
|
| 65 |
+
of scheme rules by different central or state offices. the category further covers
|
| 66 |
+
portal-related issues affecting scheme access and execution including technical
|
| 67 |
+
errors during registration or document upload login or authentication failures
|
| 68 |
+
contradictory status messages and non-updating dashboards for application claim
|
| 69 |
+
or training progress. these issues typically arise due to system bugs integration
|
| 70 |
+
gaps between multiple portals file format or size restrictions or delays in updating
|
| 71 |
+
portal logic after scheme guideline revisions.
|
| 72 |
+
- Technology, Quality and Institutions. Technology Center System Program (TCSP).
|
| 73 |
+
the technology centre systems programme tcsp is a flagship initiative of the ministry
|
| 74 |
+
of micro small and medium enterprises approved in <NUM> with world bank support
|
| 75 |
+
to significantly strengthen india s msme manufacturing ecosystem through advanced
|
| 76 |
+
technology access and skill development. with a total outlay of about <NUM> <NUM>
|
| 77 |
+
crore initially and later expanded to around <NUM> <NUM> crore the programme modernizes
|
| 78 |
+
<NUM> existing technology centres earlier tool rooms technology development centres
|
| 79 |
+
and establishes <NUM> new technology centres along with <NUM> extension centres
|
| 80 |
+
across the country. these centres are equipped with state-of-the-art infrastructure
|
| 81 |
+
such as cnc machining additive manufacturing and 3d printing robotics and automation
|
| 82 |
+
laser machining electronics system design and manufacturing facilities advanced
|
| 83 |
+
metrology and testing equipment. tcsp aims to enhance msme competitiveness by
|
| 84 |
+
providing localized access to modern manufacturing technologies short- and long-term
|
| 85 |
+
industry-oriented training skilled manpower development prototyping and testing
|
| 86 |
+
services incubation and entrepreneurship support and advisory services. the programme
|
| 87 |
+
also promotes cluster-based development through manufacturing technology partners
|
| 88 |
+
and technology cluster managers supported by a centralized erp system for service
|
| 89 |
+
delivery thereby improving productivity innovation and integration of msmes into
|
| 90 |
+
national and global value chains. examples of common grievances under the tcsp
|
| 91 |
+
include operational delays a newly approved technology centre in an underserved
|
| 92 |
+
state remains partially functional for years due to delayed procurement of machines
|
| 93 |
+
preventing msmes from accessing prototyping and testing facilities. training capacity
|
| 94 |
+
constraints popular courses such as robotics or cnc programming at extension centres
|
| 95 |
+
become oversubscribed resulting in eligible msme workers being denied admission
|
| 96 |
+
despite early registration. weak cluster linkages local technology cluster managers
|
| 97 |
+
fail to connect msmes with oems academic institutions or research bodies limiting
|
| 98 |
+
the intended collaboration and market access benefits. erp system issues msmes
|
| 99 |
+
face repeated difficulties in booking training slots advisory services or accessing
|
| 100 |
+
technical specifications due to downtime or glitches in the centralized erp portal.
|
| 101 |
+
infrastructure shortfalls at extension centres an approved extension centre lacks
|
| 102 |
+
promised metrology or testing equipment restricting its ability to deliver precision
|
| 103 |
+
measurement and quality assurance services to nearby msmes.
|
| 104 |
+
- source_sentence: my grievance are related to yes bank ifsc code- yesb0000565 as
|
| 105 |
+
per msme act and rbi policy i have open new digital kyc verified open the current
|
| 106 |
+
ac in yes bank yes bank provide me ckyc no and my ac no are - <NUM> on - <NUM>
|
| 107 |
+
- <NUM> - <NUM> after ac opening without my permission after few days bank close
|
| 108 |
+
my ac and reply after after few days your ac are activate after four month bank
|
| 109 |
+
not activate my ac and return my ac fund available balance through post so i request
|
| 110 |
+
you please order and take action against bank why close digital ckyc verified
|
| 111 |
+
ac by bank without permission and i request you please open new current ac in
|
| 112 |
+
your bank then i have transferred my old returned ac avalanche dd in your new
|
| 113 |
+
current ac please take action against bank as soon as possible. thanks. issue
|
| 114 |
+
closure of current account without permission and request for new account opening
|
| 115 |
+
context the user is reporting that their current account was closed without their
|
| 116 |
+
permission in yes bank and they are requesting action against the bank and opening
|
| 117 |
+
a new current account in the complainant s bank to transfer the returned balance.
|
| 118 |
+
details - account type current account ifsc code yesb0000565 account number <NUM>
|
| 119 |
+
account opening date <NUM> - <NUM> - <NUM> bank yes bank
|
| 120 |
+
sentences:
|
| 121 |
+
- Technology, Quality and Institutions. Building Awareness on Intellectual Property
|
| 122 |
+
Rights (IPR) for the Micro, Small & Medium Enterprises- an NMCP Scheme. the building
|
| 123 |
+
awareness on intellectual property rights ipr for msmes scheme under the national
|
| 124 |
+
manufacturing competitiveness programme nmcp is an initiative of the ministry
|
| 125 |
+
of msme aimed at sensitizing micro small and medium enterprises to the importance
|
| 126 |
+
of protecting and leveraging intellectual property such as patents trademarks
|
| 127 |
+
copyrights industrial designs and geographical indications. the scheme supports
|
| 128 |
+
the organization of awareness and sensitization programmes including seminars
|
| 129 |
+
and workshops with financial assistance up to <NUM> . <NUM> lakh per event conducts
|
| 130 |
+
pilot studies to assess intellectual property needs in specific industrial clusters
|
| 131 |
+
with support up to <NUM> lakh and facilitates the establishment of intellectual
|
| 132 |
+
property facilitation centres ipfcs in collaboration with technical institutions
|
| 133 |
+
such as tifac. it also provides reimbursement support for ipr registration expenses
|
| 134 |
+
including assistance up to <NUM> lakh for geographical indication registration
|
| 135 |
+
and prescribed limits for other ip filings with proposals approved through a structured
|
| 136 |
+
mechanism involving a project implementation committee and a steering committee.
|
| 137 |
+
overall the scheme seeks to integrate ipr awareness into msme business strategy
|
| 138 |
+
encourage innovation protection strengthen cluster-based ip management and enhance
|
| 139 |
+
competitiveness in liberalized domestic and global markets. examples of common
|
| 140 |
+
grievances under the ipr awareness scheme include reimbursement cap limitation
|
| 141 |
+
an msme incurs <NUM> . <NUM> lakh in patent filing and prosecution costs but receives
|
| 142 |
+
reimbursement only up to the maximum admissible limit leaving part of the eligible
|
| 143 |
+
expense unsupported. workshop content inadequacy an ipr seminar organized for
|
| 144 |
+
a manufacturing cluster lacks subject-matter experts and focuses only on basic
|
| 145 |
+
concepts despite prior identification of advanced patenting and commercialization
|
| 146 |
+
needs. pilot study rejection a cluster-level proposal for an ip needs assessment
|
| 147 |
+
study is rejected due to documentation or data gaps even though it is backed by
|
| 148 |
+
an industry association and local msmes. restricted ipfc access an intellectual
|
| 149 |
+
property facilitation centre limits filing and advisory support only to notified
|
| 150 |
+
clusters denying assistance to otherwise eligible msmes outside those clusters.
|
| 151 |
+
processing delays approved reimbursement or support for geographical indication
|
| 152 |
+
registration remains pending for several months after committee approval affecting
|
| 153 |
+
timely protection and commercialization of the product.
|
| 154 |
+
- UAM/Udyam Registration/Certificate related issues. Updation of Email ID/Mobile
|
| 155 |
+
No. Linked to UDYAM Certificate. this category includes grievances related to
|
| 156 |
+
updating or correcting the email id or mobile number associated with an existing
|
| 157 |
+
udyam registration. contact details provided during registration are used for
|
| 158 |
+
communication verification and authentication when accessing the enterprise profile
|
| 159 |
+
on the portal. if these contact details become outdated incorrect or inaccessible
|
| 160 |
+
the enterprise owner may face difficulty receiving otps accessing the portal or
|
| 161 |
+
managing the registration information. common grievances under this category include
|
| 162 |
+
requests to change the registered mobile number or email address because the original
|
| 163 |
+
number is no longer active the sim card has been lost the email account is no
|
| 164 |
+
longer accessible or the contact details were entered incorrectly during registration.
|
| 165 |
+
some complaints arise when the registered contact details belong to an employee
|
| 166 |
+
or consultant who is no longer associated with the enterprise preventing the current
|
| 167 |
+
owner from receiving verification messages. in other cases entrepreneurs report
|
| 168 |
+
that they cannot update contact details because the system requires authentication
|
| 169 |
+
through the old mobile number or email which they no longer have access to. these
|
| 170 |
+
grievances are typically raised by msme owners proprietors partners directors
|
| 171 |
+
of companies or authorized representatives responsible for managing business registrations.
|
| 172 |
+
small business owners who registered their enterprise personally may request updates
|
| 173 |
+
when their phone number or email changes. in some cases accountants consultants
|
| 174 |
+
or administrative staff handling compliance activities may also submit grievances
|
| 175 |
+
when they cannot access the registration due to outdated contact details. this
|
| 176 |
+
category therefore represents issues related specifically to correcting or updating
|
| 177 |
+
communication details associated with an existing udyam certificate.
|
| 178 |
+
- Starter, Credit and Finance. Any other banking issue. this category encompasses
|
| 179 |
+
grievances related to banking operations conduct or services affecting micro small
|
| 180 |
+
and medium enterprises msmes where the issue involves a bank or financial institution
|
| 181 |
+
but does not fall under a specific loan product credit guarantee scheme or named
|
| 182 |
+
government scheme. the category s purpose is to capture operational and service-level
|
| 183 |
+
banking failures that directly disrupt business functioning working capital flow
|
| 184 |
+
compliance obligations or financial credibility of msmes. scope of the category
|
| 185 |
+
the category covers a wide range of issues including account restrictions and
|
| 186 |
+
freezing msme bank accounts being frozen restricted or partially disabled due
|
| 187 |
+
to kyc re-verification identity mismatches internal compliance flags alleged suspicious
|
| 188 |
+
activity account restrictions despite compliance account restrictions continuing
|
| 189 |
+
despite submission of required documents or compliance with bank instructions
|
| 190 |
+
bank charges fees and penalties disputes related to bank charges fees and penalties
|
| 191 |
+
applied without transparency or contrary to agreed account terms including unexpected
|
| 192 |
+
minimum balance penalties annual maintenance charges on zero-balance accounts
|
| 193 |
+
excessive transaction fees penalties levied without prior intimation and subsequently
|
| 194 |
+
refused for reversal despite documented eligibility incorrect credit reporting
|
| 195 |
+
and credit rating impacts grievances related to incorrect credit reporting and
|
| 196 |
+
credit rating impacts including inaccurate reporting of defaults failure to update
|
| 197 |
+
closed or repaid loans delay or refusal to correct cibil or credit bureau records
|
| 198 |
+
wrongly classifying accounts as non-performing assets npas downgrading credit
|
| 199 |
+
ratings due to internal errors bank
|
| 200 |
+
- source_sentence: recently we registered ourselves for udyam registration but after
|
| 201 |
+
scanning the qr code verification is failing. issue qr code verification failure
|
| 202 |
+
for udyam registration context the user is reporting that qr code verification
|
| 203 |
+
is failing after registering for udyam registration. details - registration type
|
| 204 |
+
udyam registration verification issue qr code verification failure
|
| 205 |
+
sentences:
|
| 206 |
+
- UAM/Udyam Registration/Certificate related issues. Updation of Address Details.
|
| 207 |
+
this category refers to grievances related to updating or correcting the registered
|
| 208 |
+
address of an enterprise in the udyam registration records. the registered address
|
| 209 |
+
represents the official location of the enterprise and accurate address information
|
| 210 |
+
is important for maintaining correct business records and identifying the location
|
| 211 |
+
of operations. grievances under this category commonly occur when the enterprise
|
| 212 |
+
has changed its business location after registration or when the address recorded
|
| 213 |
+
during registration was entered incorrectly. entrepreneurs may request an update
|
| 214 |
+
if the business has shifted to a new office factory or operational location. some
|
| 215 |
+
grievances arise when the address contains typographical errors incomplete information
|
| 216 |
+
or mismatched details that do not correspond with supporting documents. in other
|
| 217 |
+
cases users may report that they are unable to modify the address details through
|
| 218 |
+
the portal or that the changes they submitted have not been updated in the registration
|
| 219 |
+
record. these grievances are usually raised by msme owners proprietors partners
|
| 220 |
+
of partnership firms directors of companies or authorized representatives responsible
|
| 221 |
+
for maintaining enterprise records. small business owners who relocate their operations
|
| 222 |
+
to a different city or premises may request updates to ensure the registration
|
| 223 |
+
reflects the correct address. similarly compliance officers accountants or consultants
|
| 224 |
+
managing regulatory documentation for the enterprise may submit grievances when
|
| 225 |
+
they identify discrepancies between the registered address and the actual business
|
| 226 |
+
location.
|
| 227 |
+
- Policy and Schemes. DBT / IT desk including Annual Report. dbt it desk including
|
| 228 |
+
the annual report in msme refers to the data dbt wing functioning under the office
|
| 229 |
+
of the development commissioner msme which is responsible for administering direct
|
| 230 |
+
benefit transfer dbt of subsidies under msme schemes managing it and digital infrastructure
|
| 231 |
+
and compiling the ministry s annual report. the wing oversees end-to-end dbt processes
|
| 232 |
+
for scheme reimbursements such as ict and cloud computing subsidies where msmes
|
| 233 |
+
initially incur eligible expenses and subsequently receive reimbursements directly
|
| 234 |
+
into aadhaar-linked bank accounts through the public financial management system
|
| 235 |
+
often after technical verification by agencies like telecommunications consultants
|
| 236 |
+
india limited. it ensures compliance with national dbt standards in coordination
|
| 237 |
+
with the dbt mission and national informatics centre maintains and upgrades msme
|
| 238 |
+
it systems including the udyam registration portal supports cloud-based it adoption
|
| 239 |
+
for msmes undertakes data analytics and mis reporting and onboards schemes to
|
| 240 |
+
the national dbt framework. the wing also prepares the annual report of the ministry
|
| 241 |
+
of msme consolidating performance indicators financial outlays scheme outcomes
|
| 242 |
+
udyam registration trends and macro-level contributions such as msme share in
|
| 243 |
+
gdp and employment which are used for parliament cabinet briefings and policy
|
| 244 |
+
evaluation. while this framework promotes transparency leak-proof subsidy delivery
|
| 245 |
+
evidence-based policymaking and digital efficiency stakeholders frequently raise
|
| 246 |
+
grievances related to dbt execution data accuracy it reliability and reporting
|
| 247 |
+
quality. examples of grievances include msmes experiencing delays in receipt of
|
| 248 |
+
approved ict or cloud service subsidies due to pfms transaction or verification
|
| 249 |
+
glitches reimbursement failures arising from aadhaar bank account linkage mismatches
|
| 250 |
+
despite valid udyam registration inaccuracies or under-reporting of scheme achievements
|
| 251 |
+
udyam registrations or msme gdp contribution in the annual report affecting policy
|
| 252 |
+
advocacy and planning temporary downtime or access issues on udyam or other msme
|
| 253 |
+
it portals during registration or subsidy claim periods and gaps in mis capture
|
| 254 |
+
where scheme data duplications or leakages are not properly reflected in dbt dashboards
|
| 255 |
+
or the annual report prompting appeals for correction and system strengthening.
|
| 256 |
+
- UAM/Udyam Registration/Certificate related issues. QR Code Printed on UDYAM Certificate
|
| 257 |
+
Not Readable (Technical). this category includes grievances related to qr codes
|
| 258 |
+
printed on the udyam certificate that cannot be scanned or read properly. the
|
| 259 |
+
qr code is intended to allow quick verification of the certificate and its associated
|
| 260 |
+
enterprise information. if the qr code cannot be scanned users may face difficulty
|
| 261 |
+
verifying the certificate or sharing it for official purposes. grievances under
|
| 262 |
+
this category typically involve situations where the qr code on the downloaded
|
| 263 |
+
or printed certificate appears blurred distorted or unresponsive when scanned
|
| 264 |
+
with a qr reader. some users report that the qr code does not open any verification
|
| 265 |
+
page after scanning while others find that the scanning application fails to recognize
|
| 266 |
+
the code at all. these issues may arise due to errors during certificate generation
|
| 267 |
+
problems with the downloaded file or printing-related distortions that make the
|
| 268 |
+
qr code unreadable. these grievances are generally raised by msme owners proprietors
|
| 269 |
+
partners directors or authorized representatives who use the udyam certificate
|
| 270 |
+
as official documentation for their enterprise. small business owners who attempt
|
| 271 |
+
to share the certificate for verification purposes may discover that the qr code
|
| 272 |
+
is not functioning correctly. consultants accountants or administrative staff
|
| 273 |
+
responsible for maintaining business documentation may also submit grievances
|
| 274 |
+
when they identify that the qr code on the certificate cannot be scanned or verified.
|
| 275 |
+
- source_sentence: respectrd sir i have registered person of pm vishwakarma scheme
|
| 276 |
+
as a broom maker i have also complited the trannung of broom maker at bhusawal
|
| 277 |
+
pm vishwakarma center before three month ago sir but the tool kit of the pm vishwakarma
|
| 278 |
+
scheme rs <NUM> - has not given or got to me so please your request to given of
|
| 279 |
+
scheme benefit as a tool kit and loan ammount of <NUM> - rs issue non-receipt
|
| 280 |
+
of tool kit and loan under pm vishwakarma scheme context the user is reporting
|
| 281 |
+
non-receipt of the tool kit worth rs <NUM> <NUM> and loan amount of rs <NUM> <NUM>
|
| 282 |
+
under the pm vishwakarma scheme despite completing the training three months ago.
|
| 283 |
+
details - scheme pm vishwakarma scheme occupation broom maker training location
|
| 284 |
+
bhusawal pm vishwakarma center amount requested rs <NUM> tool kit rs <NUM> loan
|
| 285 |
+
sentences:
|
| 286 |
+
- UAM/Udyam Registration/Certificate related issues. Migration from UAM to UDYAM.
|
| 287 |
+
this category refers to grievances related to the migration of enterprises registered
|
| 288 |
+
under the earlier udyog aadhaar memorandum uam system to the current udyam registration
|
| 289 |
+
system. the uam registration system was used earlier for msme registration but
|
| 290 |
+
enterprises registered under that system were required to migrate their registration
|
| 291 |
+
details to the newer udyam portal to maintain updated records. during this migration
|
| 292 |
+
process some enterprises encounter difficulties in transferring or verifying their
|
| 293 |
+
existing registration details. grievances under this category typically include
|
| 294 |
+
issues where business owners are unable to complete the migration process from
|
| 295 |
+
uam to udyam due to errors or system restrictions. entrepreneurs may report that
|
| 296 |
+
their uam number is not being recognized by the portal or that the migration process
|
| 297 |
+
stops due to validation errors related to aadhaar pan or enterprise details. some
|
| 298 |
+
users also experience problems when the migrated information does not match the
|
| 299 |
+
original uam registration resulting in incomplete or incorrect enterprise records.
|
| 300 |
+
other grievances may involve duplicate records being generated during migration
|
| 301 |
+
or difficulty accessing the migrated certificate after the process is completed.
|
| 302 |
+
these grievances are usually raised by existing msme owners who had previously
|
| 303 |
+
registered their enterprises under the uam system and are attempting to migrate
|
| 304 |
+
their information to the udyam portal. the complainants may include proprietors
|
| 305 |
+
partners of partnership firms directors of companies or authorized representatives
|
| 306 |
+
responsible for regulatory compliance of the enterprise. in many cases small business
|
| 307 |
+
owners who registered their enterprises several years ago under uam may attempt
|
| 308 |
+
migration when they need updated msme documentation and they raise grievances
|
| 309 |
+
if the migration process cannot be completed successfully.
|
| 310 |
+
- Policy and Schemes. PM Vishwakarma. the pm vishwakarma category encompasses the
|
| 311 |
+
registration skill certification and benefit disbursal processes for artisans
|
| 312 |
+
and craftspeople. the system aims to provide easy registration skill certification
|
| 313 |
+
toolkit incentives credit support and strong market linkage. however operational
|
| 314 |
+
issues eligibility interpretation challenges and bank coordination failures lead
|
| 315 |
+
to breakdowns at the stages of registration certification benefit disbursal and
|
| 316 |
+
bank linkage. common grievance scenarios registration stuck at pending verification
|
| 317 |
+
applicants may experience delays in the registration process with applications
|
| 318 |
+
remaining stuck at pending verification for <NUM> days without any response from
|
| 319 |
+
the local officer. aadhaar-based registration failures aadhaar-based registration
|
| 320 |
+
may fail due to occupation mismatch despite the individual being a traditional
|
| 321 |
+
carpenter for <NUM> years. non-receipt of toolkit incentives artisans and craftspeople
|
| 322 |
+
may not receive the toolkit incentive despite completing skill training and assessment.
|
| 323 |
+
bank refusal of pm vishwakarma loans banks may refuse to provide pm vishwakarma
|
| 324 |
+
loans due to unclear scheme guidelines. incorrect trade listing trades eligible
|
| 325 |
+
under the scheme may not be listed correctly in the portal s dropdown options.
|
| 326 |
+
operational procedural policy and institutional causes operational
|
| 327 |
+
- Policy and Schemes. Related to GST. this category encompasses grievances related
|
| 328 |
+
to operational and procedural frictions under the goods and services tax gst framework
|
| 329 |
+
that directly affect micro small and medium enterprises msmes cash flow invoicing
|
| 330 |
+
and day-to-day business continuity. the category includes the following subcategories
|
| 331 |
+
<NUM> . gst registration issues applications remaining pending verification pan-gst
|
| 332 |
+
name mismatches leading to rejection confusion arising during migration from uam
|
| 333 |
+
udyam-linked records to gst rejection of registration due to pan and gst name
|
| 334 |
+
mismatch non-response from portal support <NUM> . gst refund delays eligible refunds
|
| 335 |
+
especially export-related input tax credit not disbursed within reasonable timelines
|
| 336 |
+
despite correct filings refund status shows processed without actual credit due
|
| 337 |
+
to backend mismatches delayed disbursement of input tax credit refunds for export
|
| 338 |
+
sales refund status shows processed but no amount has been credited due to backend
|
| 339 |
+
mismatch <NUM> . input tax credit itc blockages credits not reflecting because
|
| 340 |
+
supplier invoices are missing on the portal invoices being wrongly flagged as
|
| 341 |
+
ineligible itc reversals triggered by hsn mismatches or delayed supplier compliance
|
| 342 |
+
supplier invoices not reflecting on the gst portal forcing msmes to pay tax from
|
| 343 |
+
their own funds the category primarily captures operational rather than legal
|
| 344 |
+
grievances. while champions does not adjudicate tax disputes it acts as an escalation
|
| 345 |
+
and coordination channel with gstn or relevant tax authorities to resolve delays
|
| 346 |
+
portal errors and process breakdowns impacting msmes. the purpose of this category
|
| 347 |
+
is to address the following - resolve gst registration issues
|
| 348 |
+
- source_sentence: the msme portal software keeps crashing during udyam registration
|
| 349 |
+
renewal and scheme applications with error messages and failed uploads every time
|
| 350 |
+
i try. support team gives no help and i can t access my digital certificates or
|
| 351 |
+
track status. this software glitch blocks my business from government benefits
|
| 352 |
+
and loans. please fix the bugs improve server speed and add better error guides
|
| 353 |
+
right away. issue software glitch in msme portal during udyam registration renewal
|
| 354 |
+
and scheme applications context the user is reporting frequent crashes of the
|
| 355 |
+
msme portal software during udyam registration renewal and scheme applications
|
| 356 |
+
resulting in failed uploads error messages and inability to access digital certificates
|
| 357 |
+
or track status which is hindering business access to government benefits and
|
| 358 |
+
loans. details - software msme portal software issue frequent crashes during udyam
|
| 359 |
+
registration renewal and scheme applications error messages failed uploads and
|
| 360 |
+
error messages impact inability to access digital certificates track status and
|
| 361 |
+
access government benefits and loans
|
| 362 |
+
sentences:
|
| 363 |
+
- Starter, Credit and Finance. Loans from Banks. this category loans from banks
|
| 364 |
+
encompasses grievances related to access to credit from banks where micro small
|
| 365 |
+
and medium enterprises msmes have applied for loans and the bottleneck lies at
|
| 366 |
+
the bank level. the scope of this category includes issues involving commercial
|
| 367 |
+
banks regional rural banks rrbs and cooperative banks. it specifically addresses
|
| 368 |
+
situations where the problem is neither related to rbi policy government scheme
|
| 369 |
+
design nor buyer default but arises from bank-side processing handling or decision-making
|
| 370 |
+
of loan applications. the category captures the following scenarios - msmes have
|
| 371 |
+
submitted loan applications along with required documentation complied with bank
|
| 372 |
+
procedures and followed up through branches or portals but the application remains
|
| 373 |
+
pending without a formal decision. - banks keep applications under prolonged under
|
| 374 |
+
process or pending for verification status without issuing deficiency letters
|
| 375 |
+
timelines or written communication. - situations involving repeated or circular
|
| 376 |
+
document demands that effectively stall credit access. - grievances where branch-level
|
| 377 |
+
offices do not forward eligible loan applications to regional or head offices.
|
| 378 |
+
- delays in internal approvals. - avoidance of issuing a clear sanction or rejection
|
| 379 |
+
decision despite prolonged engagement. these cases reflect administrative stalling
|
| 380 |
+
rather than informed credit rejection based on risk or eligibility. the category
|
| 381 |
+
includes the following example issues - i applied for a term loan under the msme
|
| 382 |
+
category and submitted all documents but the bank has kept the application under
|
| 383 |
+
process for several months without any written update. - my loan application status
|
| 384 |
+
has been showing pending for verification on the bank portal for over <NUM> days
|
| 385 |
+
with no deficiency letter issued. - the bank is repeatedly asking for documents
|
| 386 |
+
that were already submitted causing unnecessary delay in loan processing. - the
|
| 387 |
+
branch is not forwarding
|
| 388 |
+
- Technology, Quality and Institutions. Related to NSIC. this category encompasses
|
| 389 |
+
grievances related to the support and facilitation services provided by the national
|
| 390 |
+
small industries corporation nsic to micro small and medium enterprises msmes
|
| 391 |
+
. the scope of this category includes issues arising from the areas of raw material
|
| 392 |
+
assistance market access and risk mitigation through guarantees. specifically
|
| 393 |
+
it covers situations where approved raw material assistance is not released on
|
| 394 |
+
time supplier coordination fails after nsic approval material supplied through
|
| 395 |
+
nsic is delayed or does not meet specifications or documentation and regional
|
| 396 |
+
office processes stall procurement. the category also captures failures in marketing
|
| 397 |
+
support including - delayed or missing inclusion in tenders gem or psu vendor
|
| 398 |
+
listings - late communication of bid opportunities - problems in nsic-sponsored
|
| 399 |
+
exhibitions or buyer-connect programs additionally it includes issues related
|
| 400 |
+
to performance and emd guarantees such as - delayed issuance - incorrect formats
|
| 401 |
+
- non-renewal despite payment - rejection by psus - lack of response when guarantees
|
| 402 |
+
are invoked these grievances typically result in missed orders blocked working
|
| 403 |
+
capital contract delays or loss of business credibility and arise from execution
|
| 404 |
+
coordination or service delivery breakdowns rather than policy interpretation.
|
| 405 |
+
the category is further divided into the following subcategories <NUM> . corporate
|
| 406 |
+
communication single point registration scheme and exhibition consortia and tender
|
| 407 |
+
marketing <NUM> . internal audit and law recovery <NUM> . human resource <NUM>
|
| 408 |
+
. vigilance law recovery <NUM> . international cooperation <NUM> . bank guarantee
|
| 409 |
+
monitoring <NUM> . finance accounts <NUM> . national sc st hub <NUM> . chief vigilance
|
| 410 |
+
officer <NUM> . contract procurement grievance officer <NUM> . digital services
|
| 411 |
+
facilitation and training <NUM> .space marketing cell event management cell <NUM>
|
| 412 |
+
.raw material assistance bank guarantee bill discounting bank tieup csr administration
|
| 413 |
+
<NUM> .technology liaison officer for sc st pwd cmr <NUM> .epf trust superannuation
|
| 414 |
+
pension trust <NUM> .center public information officers cpio <NUM> .company secretary
|
| 415 |
+
- Technology, Quality and Institutions. Software Related. software-related initiatives
|
| 416 |
+
for msmes mainly center on the digital msme scheme under the national manufacturing
|
| 417 |
+
competitiveness programme which promotes adoption of information and communication
|
| 418 |
+
technologies through cloud-based erp crm and accounting software to digitalize
|
| 419 |
+
day-to-day business operations. the scheme combines awareness workshops needs
|
| 420 |
+
assessment and financial support in the form of subsidies covering about <NUM>
|
| 421 |
+
<NUM> of eligible costs subject to a ceiling of <NUM> lakh over two years specifically
|
| 422 |
+
targeting micro and small enterprises. these initiatives are reinforced by complementary
|
| 423 |
+
efforts such as software-enabled facilities under technology centre programmes
|
| 424 |
+
for electronics and esdm sectors digital quality and process parameters under
|
| 425 |
+
zed certification and software-focused modules within entrepreneurship and skill
|
| 426 |
+
development programmes. together these measures aim to standardize workflows automate
|
| 427 |
+
inventory finance and customer management reduce operational inefficiencies and
|
| 428 |
+
inventory holding support online sales and compliance and enhance overall competitiveness
|
| 429 |
+
without requiring heavy upfront investment in hardware. examples of grievances
|
| 430 |
+
include subsidy denial an msme implementing a cloud-based erp costing <NUM> .
|
| 431 |
+
<NUM> lakh receives no reimbursement beyond the <NUM> lakh cap despite meeting
|
| 432 |
+
all eligibility conditions. software ineligibility a cloud application selected
|
| 433 |
+
after needs assessment is later rejected as non-standard or non-approved forcing
|
| 434 |
+
the enterprise to abandon or restart implementation mid-way. inadequate training
|
| 435 |
+
awareness workshops focus only on theoretical benefits of digitalization and fail
|
| 436 |
+
to provide hands-on demonstrations or practical guidance on using erp or crm software.
|
| 437 |
+
post-subsidy continuity issue after the two-year subsidized period ends steep
|
| 438 |
+
renewal or subscription costs make the software unaffordable disrupting business
|
| 439 |
+
operations. needs mismatch an msme assessed for crm requirements is instead provided
|
| 440 |
+
accounting software limiting the usefulness of the digital intervention and affecting
|
| 441 |
+
adoption outcomes.
|
| 442 |
+
pipeline_tag: sentence-similarity
|
| 443 |
+
library_name: sentence-transformers
|
| 444 |
+
metrics:
|
| 445 |
+
- pearson_cosine
|
| 446 |
+
- spearman_cosine
|
| 447 |
+
model-index:
|
| 448 |
+
- name: SentenceTransformer based on BAAI/bge-base-en-v1.5
|
| 449 |
+
results:
|
| 450 |
+
- task:
|
| 451 |
+
type: semantic-similarity
|
| 452 |
+
name: Semantic Similarity
|
| 453 |
+
dataset:
|
| 454 |
+
name: Unknown
|
| 455 |
+
type: unknown
|
| 456 |
+
metrics:
|
| 457 |
+
- type: pearson_cosine
|
| 458 |
+
value: .nan
|
| 459 |
+
name: Pearson Cosine
|
| 460 |
+
- type: spearman_cosine
|
| 461 |
+
value: .nan
|
| 462 |
+
name: Spearman Cosine
|
| 463 |
+
---
|
| 464 |
+
|
| 465 |
+
# SentenceTransformer based on BAAI/bge-base-en-v1.5
|
| 466 |
+
|
| 467 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 468 |
+
|
| 469 |
+
## Model Details
|
| 470 |
+
|
| 471 |
+
### Model Description
|
| 472 |
+
- **Model Type:** Sentence Transformer
|
| 473 |
+
- **Base model:** [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) <!-- at revision a5beb1e3e68b9ab74eb54cfd186867f64f240e1a -->
|
| 474 |
+
- **Maximum Sequence Length:** 256 tokens
|
| 475 |
+
- **Output Dimensionality:** 768 dimensions
|
| 476 |
+
- **Similarity Function:** Cosine Similarity
|
| 477 |
+
<!-- - **Training Dataset:** Unknown -->
|
| 478 |
+
<!-- - **Language:** Unknown -->
|
| 479 |
+
<!-- - **License:** Unknown -->
|
| 480 |
+
|
| 481 |
+
### Model Sources
|
| 482 |
+
|
| 483 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 484 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers)
|
| 485 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 486 |
+
|
| 487 |
+
### Full Model Architecture
|
| 488 |
+
|
| 489 |
+
```
|
| 490 |
+
SentenceTransformer(
|
| 491 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': True, 'architecture': 'BertModel'})
|
| 492 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 493 |
+
(2): Normalize()
|
| 494 |
+
)
|
| 495 |
+
```
|
| 496 |
+
|
| 497 |
+
## Usage
|
| 498 |
+
|
| 499 |
+
### Direct Usage (Sentence Transformers)
|
| 500 |
+
|
| 501 |
+
First install the Sentence Transformers library:
|
| 502 |
+
|
| 503 |
+
```bash
|
| 504 |
+
pip install -U sentence-transformers
|
| 505 |
+
```
|
| 506 |
+
|
| 507 |
+
Then you can load this model and run inference.
|
| 508 |
+
```python
|
| 509 |
+
from sentence_transformers import SentenceTransformer
|
| 510 |
+
|
| 511 |
+
# Download from the 🤗 Hub
|
| 512 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
| 513 |
+
# Run inference
|
| 514 |
+
sentences = [
|
| 515 |
+
'the msme portal software keeps crashing during udyam registration renewal and scheme applications with error messages and failed uploads every time i try. support team gives no help and i can t access my digital certificates or track status. this software glitch blocks my business from government benefits and loans. please fix the bugs improve server speed and add better error guides right away. issue software glitch in msme portal during udyam registration renewal and scheme applications context the user is reporting frequent crashes of the msme portal software during udyam registration renewal and scheme applications resulting in failed uploads error messages and inability to access digital certificates or track status which is hindering business access to government benefits and loans. details - software msme portal software issue frequent crashes during udyam registration renewal and scheme applications error messages failed uploads and error messages impact inability to access digital certificates track status and access government benefits and loans',
|
| 516 |
+
'Technology, Quality and Institutions. Software Related. software-related initiatives for msmes mainly center on the digital msme scheme under the national manufacturing competitiveness programme which promotes adoption of information and communication technologies through cloud-based erp crm and accounting software to digitalize day-to-day business operations. the scheme combines awareness workshops needs assessment and financial support in the form of subsidies covering about <NUM> <NUM> of eligible costs subject to a ceiling of <NUM> lakh over two years specifically targeting micro and small enterprises. these initiatives are reinforced by complementary efforts such as software-enabled facilities under technology centre programmes for electronics and esdm sectors digital quality and process parameters under zed certification and software-focused modules within entrepreneurship and skill development programmes. together these measures aim to standardize workflows automate inventory finance and customer management reduce operational inefficiencies and inventory holding support online sales and compliance and enhance overall competitiveness without requiring heavy upfront investment in hardware. examples of grievances include subsidy denial an msme implementing a cloud-based erp costing <NUM> . <NUM> lakh receives no reimbursement beyond the <NUM> lakh cap despite meeting all eligibility conditions. software ineligibility a cloud application selected after needs assessment is later rejected as non-standard or non-approved forcing the enterprise to abandon or restart implementation mid-way. inadequate training awareness workshops focus only on theoretical benefits of digitalization and fail to provide hands-on demonstrations or practical guidance on using erp or crm software. post-subsidy continuity issue after the two-year subsidized period ends steep renewal or subscription costs make the software unaffordable disrupting business operations. needs mismatch an msme assessed for crm requirements is instead provided accounting software limiting the usefulness of the digital intervention and affecting adoption outcomes.',
|
| 517 |
+
'Technology, Quality and Institutions. Related to NSIC. this category encompasses grievances related to the support and facilitation services provided by the national small industries corporation nsic to micro small and medium enterprises msmes . the scope of this category includes issues arising from the areas of raw material assistance market access and risk mitigation through guarantees. specifically it covers situations where approved raw material assistance is not released on time supplier coordination fails after nsic approval material supplied through nsic is delayed or does not meet specifications or documentation and regional office processes stall procurement. the category also captures failures in marketing support including - delayed or missing inclusion in tenders gem or psu vendor listings - late communication of bid opportunities - problems in nsic-sponsored exhibitions or buyer-connect programs additionally it includes issues related to performance and emd guarantees such as - delayed issuance - incorrect formats - non-renewal despite payment - rejection by psus - lack of response when guarantees are invoked these grievances typically result in missed orders blocked working capital contract delays or loss of business credibility and arise from execution coordination or service delivery breakdowns rather than policy interpretation. the category is further divided into the following subcategories <NUM> . corporate communication single point registration scheme and exhibition consortia and tender marketing <NUM> . internal audit and law recovery <NUM> . human resource <NUM> . vigilance law recovery <NUM> . international cooperation <NUM> . bank guarantee monitoring <NUM> . finance accounts <NUM> . national sc st hub <NUM> . chief vigilance officer <NUM> . contract procurement grievance officer <NUM> . digital services facilitation and training <NUM> .space marketing cell event management cell <NUM> .raw material assistance bank guarantee bill discounting bank tieup csr administration <NUM> .technology liaison officer for sc st pwd cmr <NUM> .epf trust superannuation pension trust <NUM> .center public information officers cpio <NUM> .company secretary',
|
| 518 |
+
]
|
| 519 |
+
embeddings = model.encode(sentences)
|
| 520 |
+
print(embeddings.shape)
|
| 521 |
+
# [3, 768]
|
| 522 |
+
|
| 523 |
+
# Get the similarity scores for the embeddings
|
| 524 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 525 |
+
print(similarities)
|
| 526 |
+
# tensor([[1.0000, 0.6265, 0.5981],
|
| 527 |
+
# [0.6265, 1.0000, 0.7013],
|
| 528 |
+
# [0.5981, 0.7013, 1.0000]])
|
| 529 |
+
```
|
| 530 |
+
|
| 531 |
+
<!--
|
| 532 |
+
### Direct Usage (Transformers)
|
| 533 |
+
|
| 534 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 535 |
+
|
| 536 |
+
</details>
|
| 537 |
+
-->
|
| 538 |
+
|
| 539 |
+
<!--
|
| 540 |
+
### Downstream Usage (Sentence Transformers)
|
| 541 |
+
|
| 542 |
+
You can finetune this model on your own dataset.
|
| 543 |
+
|
| 544 |
+
<details><summary>Click to expand</summary>
|
| 545 |
+
|
| 546 |
+
</details>
|
| 547 |
+
-->
|
| 548 |
+
|
| 549 |
+
<!--
|
| 550 |
+
### Out-of-Scope Use
|
| 551 |
+
|
| 552 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 553 |
+
-->
|
| 554 |
+
|
| 555 |
+
## Evaluation
|
| 556 |
+
|
| 557 |
+
### Metrics
|
| 558 |
+
|
| 559 |
+
#### Semantic Similarity
|
| 560 |
+
|
| 561 |
+
* Evaluated with [<code>EmbeddingSimilarityEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.EmbeddingSimilarityEvaluator)
|
| 562 |
+
|
| 563 |
+
| Metric | Value |
|
| 564 |
+
|:--------------------|:--------|
|
| 565 |
+
| pearson_cosine | nan |
|
| 566 |
+
| **spearman_cosine** | **nan** |
|
| 567 |
+
|
| 568 |
+
<!--
|
| 569 |
+
## Bias, Risks and Limitations
|
| 570 |
+
|
| 571 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 572 |
+
-->
|
| 573 |
+
|
| 574 |
+
<!--
|
| 575 |
+
### Recommendations
|
| 576 |
+
|
| 577 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 578 |
+
-->
|
| 579 |
+
|
| 580 |
+
## Training Details
|
| 581 |
+
|
| 582 |
+
### Training Dataset
|
| 583 |
+
|
| 584 |
+
#### Unnamed Dataset
|
| 585 |
+
|
| 586 |
+
* Size: 88 training samples
|
| 587 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
| 588 |
+
* Approximate statistics based on the first 88 samples:
|
| 589 |
+
| | sentence_0 | sentence_1 |
|
| 590 |
+
|:--------|:-------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|
|
| 591 |
+
| type | string | string |
|
| 592 |
+
| details | <ul><li>min: 46 tokens</li><li>mean: 172.95 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 199 tokens</li><li>mean: 253.88 tokens</li><li>max: 256 tokens</li></ul> |
|
| 593 |
+
* Samples:
|
| 594 |
+
| sentence_0 | sentence_1 |
|
| 595 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 596 |
+
| <code>with due respect i md mafijul husen would like to intimate that when i trying to edit my existing udyam registration certificate <udyam_no> certificate of my enterprise viz. md mafijul husen then i failed to enter otp as my earlier mobile number <phone_no> has been changed and the given gmail id is also inactive. hence it is my request to change my mobile number so that i can edit my existing udyam registration <udyam_no> certificate. my pan no is <pan_no> and aadhaar number is <uam_no> . issue update of mobile number and gmail id for udyam registration certificate editing context the user is requesting an update of the mobile number and gmail id associated with the existing udyam registration certificate udyam-wb- <NUM> - <NUM> to facilitate editing of the certificate. details - udyam registration certificate no udyam-wb- <NUM> - <NUM> old mobile no <NUM> old gmail id inactive pan no aetph0941n aadhar no <NUM></code> | <code>UAM/Udyam Registration/Certificate related issues. Updation of Email ID/Mobile No. Linked to UDYAM Certificate. this category includes grievances related to updating or correcting the email id or mobile number associated with an existing udyam registration. contact details provided during registration are used for communication verification and authentication when accessing the enterprise profile on the portal. if these contact details become outdated incorrect or inaccessible the enterprise owner may face difficulty receiving otps accessing the portal or managing the registration information. common grievances under this category include requests to change the registered mobile number or email address because the original number is no longer active the sim card has been lost the email account is no longer accessible or the contact details were entered incorrectly during registration. some complaints arise when the registered contact details belong to an employee or consultant who is n...</code> |
|
| 597 |
+
| <code>we had applied for msme registration under the application number <udyam_no> m <NUM> on 22nd march <NUM> . after reviewing the status and considering our circumstances we kindly request that our case be transferred to the micro and small enterprises facilitation council msefc for further processing and resolution. we believe that the msefc councils intervention will help address any concerns or disputes that may have arisen regarding our application. we are hopeful that this request will be processed swiftly and in accordance with the necessary regulations. thank you for your attention to this matter. we look forward to your prompt assistance in facilitating this request. issue request for transfer of msme registration case to msefc context the user is requesting to transfer their msme registration case to the micro and small enterprises facilitation council msefc for further processing and resolution. details - application number udyam-dl- <NUM> - <NUM> m <NUM> application date 22nd m...</code> | <code>Technology, Quality and Institutions. Related to NI-MSME. this category encompasses grievances related to training capacity-building and certification programs administered by the national institute for micro small and medium enterprises ni-msme for micro small and medium enterprises msmes entrepreneurs and their employees. the scope of this category includes issues arising from the delivery of training programs such as repeatedly postponed schedules without prior notification inaccessible online training portals unclear eligibility criteria unavailable trainers insufficient mentoring outdated or non-practical course content additionally this category captures certification-related issues including delayed issuance of certificates certificates issued with incorrect details difficulty verifying certificates online failure to deliver certificates after course completion furthermore the category includes course enrollment and admission disputes such as unjustified rejection of enrollment ...</code> |
|
| 598 |
+
| <code>insurancy company national insurance company limited branch name of insurance company branch if other khamgaon branch date of application <NUM> - <NUM> - <NUM> policy number <NUM> my claim is kept pending even after submitting all the documents after changing all the requirements as changed by various surveyors. issue delayed insurance claim under national insurance company limited context the user is reporting that the insurance claim submitted on <NUM> - <NUM> - <NUM> with policy number <NUM> is still pending despite submission of all required documents as per changes made by various surveyors. details - policy number <NUM> claim submission date <NUM> - <NUM> - <NUM> branch khamgaon</code> | <code>Starter, Credit and Finance. Insurance Claim related issues. this category encompasses grievances related to insurance claims associated with various government-backed and private insurance products. the scope includes <NUM> . esic employees state insurance corporation insurance benefits <NUM> . epfo employees provident fund organisation -linked insurance benefits including edli employees deposit linked insurance <NUM> . cgtmse credit guarantee fund trust for micro and small enterprises -linked insurance elements <NUM> . private or general business insurance products where a government department psu public sector undertaking or bank acts as an intermediary or implementing authority the category covers a range of issues including opaque rejection decisions undocumented policy exclusions administrative closure without explanation shifting of risk and liability onto msmes micro small and medium enterprises or employees document and data mismatches across multiple systems such as aadhaar ...</code> |
|
| 599 |
+
* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
|
| 600 |
+
```json
|
| 601 |
+
{
|
| 602 |
+
"scale": 20.0,
|
| 603 |
+
"similarity_fct": "cos_sim",
|
| 604 |
+
"mini_batch_size": 32,
|
| 605 |
+
"gather_across_devices": false
|
| 606 |
+
}
|
| 607 |
+
```
|
| 608 |
+
|
| 609 |
+
### Training Hyperparameters
|
| 610 |
+
#### Non-Default Hyperparameters
|
| 611 |
+
|
| 612 |
+
- `per_device_train_batch_size`: 64
|
| 613 |
+
- `per_device_eval_batch_size`: 64
|
| 614 |
+
- `num_train_epochs`: 6
|
| 615 |
+
- `fp16`: True
|
| 616 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 617 |
+
|
| 618 |
+
#### All Hyperparameters
|
| 619 |
+
<details><summary>Click to expand</summary>
|
| 620 |
+
|
| 621 |
+
- `do_predict`: False
|
| 622 |
+
- `eval_strategy`: no
|
| 623 |
+
- `prediction_loss_only`: True
|
| 624 |
+
- `per_device_train_batch_size`: 64
|
| 625 |
+
- `per_device_eval_batch_size`: 64
|
| 626 |
+
- `gradient_accumulation_steps`: 1
|
| 627 |
+
- `eval_accumulation_steps`: None
|
| 628 |
+
- `torch_empty_cache_steps`: None
|
| 629 |
+
- `learning_rate`: 5e-05
|
| 630 |
+
- `weight_decay`: 0.0
|
| 631 |
+
- `adam_beta1`: 0.9
|
| 632 |
+
- `adam_beta2`: 0.999
|
| 633 |
+
- `adam_epsilon`: 1e-08
|
| 634 |
+
- `max_grad_norm`: 1
|
| 635 |
+
- `num_train_epochs`: 6
|
| 636 |
+
- `max_steps`: -1
|
| 637 |
+
- `lr_scheduler_type`: linear
|
| 638 |
+
- `lr_scheduler_kwargs`: None
|
| 639 |
+
- `warmup_ratio`: None
|
| 640 |
+
- `warmup_steps`: 0
|
| 641 |
+
- `log_level`: passive
|
| 642 |
+
- `log_level_replica`: warning
|
| 643 |
+
- `log_on_each_node`: True
|
| 644 |
+
- `logging_nan_inf_filter`: True
|
| 645 |
+
- `enable_jit_checkpoint`: False
|
| 646 |
+
- `save_on_each_node`: False
|
| 647 |
+
- `save_only_model`: False
|
| 648 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 649 |
+
- `use_cpu`: False
|
| 650 |
+
- `seed`: 42
|
| 651 |
+
- `data_seed`: None
|
| 652 |
+
- `bf16`: False
|
| 653 |
+
- `fp16`: True
|
| 654 |
+
- `bf16_full_eval`: False
|
| 655 |
+
- `fp16_full_eval`: False
|
| 656 |
+
- `tf32`: None
|
| 657 |
+
- `local_rank`: -1
|
| 658 |
+
- `ddp_backend`: None
|
| 659 |
+
- `debug`: []
|
| 660 |
+
- `dataloader_drop_last`: False
|
| 661 |
+
- `dataloader_num_workers`: 0
|
| 662 |
+
- `dataloader_prefetch_factor`: None
|
| 663 |
+
- `disable_tqdm`: False
|
| 664 |
+
- `remove_unused_columns`: True
|
| 665 |
+
- `label_names`: None
|
| 666 |
+
- `load_best_model_at_end`: False
|
| 667 |
+
- `ignore_data_skip`: False
|
| 668 |
+
- `fsdp`: []
|
| 669 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 670 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 671 |
+
- `parallelism_config`: None
|
| 672 |
+
- `deepspeed`: None
|
| 673 |
+
- `label_smoothing_factor`: 0.0
|
| 674 |
+
- `optim`: adamw_torch_fused
|
| 675 |
+
- `optim_args`: None
|
| 676 |
+
- `group_by_length`: False
|
| 677 |
+
- `length_column_name`: length
|
| 678 |
+
- `project`: huggingface
|
| 679 |
+
- `trackio_space_id`: trackio
|
| 680 |
+
- `ddp_find_unused_parameters`: None
|
| 681 |
+
- `ddp_bucket_cap_mb`: None
|
| 682 |
+
- `ddp_broadcast_buffers`: False
|
| 683 |
+
- `dataloader_pin_memory`: True
|
| 684 |
+
- `dataloader_persistent_workers`: False
|
| 685 |
+
- `skip_memory_metrics`: True
|
| 686 |
+
- `push_to_hub`: False
|
| 687 |
+
- `resume_from_checkpoint`: None
|
| 688 |
+
- `hub_model_id`: None
|
| 689 |
+
- `hub_strategy`: every_save
|
| 690 |
+
- `hub_private_repo`: None
|
| 691 |
+
- `hub_always_push`: False
|
| 692 |
+
- `hub_revision`: None
|
| 693 |
+
- `gradient_checkpointing`: False
|
| 694 |
+
- `gradient_checkpointing_kwargs`: None
|
| 695 |
+
- `include_for_metrics`: []
|
| 696 |
+
- `eval_do_concat_batches`: True
|
| 697 |
+
- `auto_find_batch_size`: False
|
| 698 |
+
- `full_determinism`: False
|
| 699 |
+
- `ddp_timeout`: 1800
|
| 700 |
+
- `torch_compile`: False
|
| 701 |
+
- `torch_compile_backend`: None
|
| 702 |
+
- `torch_compile_mode`: None
|
| 703 |
+
- `include_num_input_tokens_seen`: no
|
| 704 |
+
- `neftune_noise_alpha`: None
|
| 705 |
+
- `optim_target_modules`: None
|
| 706 |
+
- `batch_eval_metrics`: False
|
| 707 |
+
- `eval_on_start`: False
|
| 708 |
+
- `use_liger_kernel`: False
|
| 709 |
+
- `liger_kernel_config`: None
|
| 710 |
+
- `eval_use_gather_object`: False
|
| 711 |
+
- `average_tokens_across_devices`: True
|
| 712 |
+
- `use_cache`: False
|
| 713 |
+
- `prompts`: None
|
| 714 |
+
- `batch_sampler`: batch_sampler
|
| 715 |
+
- `multi_dataset_batch_sampler`: round_robin
|
| 716 |
+
- `router_mapping`: {}
|
| 717 |
+
- `learning_rate_mapping`: {}
|
| 718 |
+
|
| 719 |
+
</details>
|
| 720 |
+
|
| 721 |
+
### Training Logs
|
| 722 |
+
| Epoch | Step | spearman_cosine |
|
| 723 |
+
|:-----:|:----:|:---------------:|
|
| 724 |
+
| 1.0 | 2 | nan |
|
| 725 |
+
| 2.0 | 4 | nan |
|
| 726 |
+
| 3.0 | 6 | nan |
|
| 727 |
+
| 4.0 | 8 | nan |
|
| 728 |
+
| 5.0 | 10 | nan |
|
| 729 |
+
| 6.0 | 12 | nan |
|
| 730 |
+
|
| 731 |
+
|
| 732 |
+
### Framework Versions
|
| 733 |
+
- Python: 3.12.12
|
| 734 |
+
- Sentence Transformers: 5.2.3
|
| 735 |
+
- Transformers: 5.0.0
|
| 736 |
+
- PyTorch: 2.10.0+cu128
|
| 737 |
+
- Accelerate: 1.12.0
|
| 738 |
+
- Datasets: 4.0.0
|
| 739 |
+
- Tokenizers: 0.22.2
|
| 740 |
+
|
| 741 |
+
## Citation
|
| 742 |
+
|
| 743 |
+
### BibTeX
|
| 744 |
+
|
| 745 |
+
#### Sentence Transformers
|
| 746 |
+
```bibtex
|
| 747 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 748 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 749 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 750 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 751 |
+
month = "11",
|
| 752 |
+
year = "2019",
|
| 753 |
+
publisher = "Association for Computational Linguistics",
|
| 754 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 755 |
+
}
|
| 756 |
+
```
|
| 757 |
+
|
| 758 |
+
#### CachedMultipleNegativesRankingLoss
|
| 759 |
+
```bibtex
|
| 760 |
+
@misc{gao2021scaling,
|
| 761 |
+
title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
|
| 762 |
+
author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
|
| 763 |
+
year={2021},
|
| 764 |
+
eprint={2101.06983},
|
| 765 |
+
archivePrefix={arXiv},
|
| 766 |
+
primaryClass={cs.LG}
|
| 767 |
+
}
|
| 768 |
+
```
|
| 769 |
+
|
| 770 |
+
<!--
|
| 771 |
+
## Glossary
|
| 772 |
+
|
| 773 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 774 |
+
-->
|
| 775 |
+
|
| 776 |
+
<!--
|
| 777 |
+
## Model Card Authors
|
| 778 |
+
|
| 779 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 780 |
+
-->
|
| 781 |
+
|
| 782 |
+
<!--
|
| 783 |
+
## Model Card Contact
|
| 784 |
+
|
| 785 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 786 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_cross_attention": false,
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertModel"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"bos_token_id": null,
|
| 8 |
+
"classifier_dropout": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": null,
|
| 11 |
+
"gradient_checkpointing": false,
|
| 12 |
+
"hidden_act": "gelu",
|
| 13 |
+
"hidden_dropout_prob": 0.1,
|
| 14 |
+
"hidden_size": 768,
|
| 15 |
+
"id2label": {
|
| 16 |
+
"0": "LABEL_0"
|
| 17 |
+
},
|
| 18 |
+
"initializer_range": 0.02,
|
| 19 |
+
"intermediate_size": 3072,
|
| 20 |
+
"is_decoder": false,
|
| 21 |
+
"label2id": {
|
| 22 |
+
"LABEL_0": 0
|
| 23 |
+
},
|
| 24 |
+
"layer_norm_eps": 1e-12,
|
| 25 |
+
"max_position_embeddings": 512,
|
| 26 |
+
"model_type": "bert",
|
| 27 |
+
"num_attention_heads": 12,
|
| 28 |
+
"num_hidden_layers": 12,
|
| 29 |
+
"pad_token_id": 0,
|
| 30 |
+
"position_embedding_type": "absolute",
|
| 31 |
+
"tie_word_embeddings": true,
|
| 32 |
+
"transformers_version": "5.0.0",
|
| 33 |
+
"type_vocab_size": 2,
|
| 34 |
+
"use_cache": true,
|
| 35 |
+
"vocab_size": 30522
|
| 36 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"sentence_transformers": "5.2.3",
|
| 4 |
+
"transformers": "5.0.0",
|
| 5 |
+
"pytorch": "2.10.0+cu128"
|
| 6 |
+
},
|
| 7 |
+
"model_type": "SentenceTransformer",
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b781a1937a165255a16f6ff544c763a6c941d0403fa67d5f6915c59d5ebb26b
|
| 3 |
+
size 437951304
|
modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.models.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 256,
|
| 3 |
+
"do_lower_case": true
|
| 4 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"clean_up_tokenization_spaces": true,
|
| 4 |
+
"cls_token": "[CLS]",
|
| 5 |
+
"do_basic_tokenize": true,
|
| 6 |
+
"do_lower_case": true,
|
| 7 |
+
"is_local": false,
|
| 8 |
+
"mask_token": "[MASK]",
|
| 9 |
+
"model_max_length": 512,
|
| 10 |
+
"never_split": null,
|
| 11 |
+
"pad_token": "[PAD]",
|
| 12 |
+
"sep_token": "[SEP]",
|
| 13 |
+
"strip_accents": null,
|
| 14 |
+
"tokenize_chinese_chars": true,
|
| 15 |
+
"tokenizer_class": "BertTokenizer",
|
| 16 |
+
"unk_token": "[UNK]"
|
| 17 |
+
}
|