| --- |
| tags: |
| - sentence-transformers |
| - sentence-similarity |
| - feature-extraction |
| - dense |
| - generated_from_trainer |
| - dataset_size:277492 |
| - loss:CachedMultipleNegativesRankingLoss |
| base_model: benjamintli/modernbert-code-v3-hard-negatives |
| widget: |
| - source_sentence: '// Uint is a helper routine that allocates a new uint value to |
| store v and |
| |
| // returns a pointer to it. This is useful when assigning optional parameters.' |
| sentences: |
| - "func (c *Animation) GetCurrentTimeWithParams(v *AnimationGetCurrentTimeParams)\ |
| \ (float64, error) {\n\tresp, err := gcdmessage.SendCustomReturn(c.target, c.target.GetSendCh(),\ |
| \ &gcdmessage.ParamRequest{Id: c.target.GetId(), Method: \"Animation.getCurrentTime\"\ |
| , Params: v})\n\tif err != nil {\n\t\treturn 0, err\n\t}\n\n\tvar chromeData struct\ |
| \ {\n\t\tResult struct {\n\t\t\tCurrentTime float64\n\t\t}\n\t}\n\n\tif resp ==\ |
| \ nil {\n\t\treturn 0, &gcdmessage.ChromeEmptyResponseErr{}\n\t}\n\n\t// test\ |
| \ if error first\n\tcerr := &gcdmessage.ChromeErrorResponse{}\n\tjson.Unmarshal(resp.Data,\ |
| \ cerr)\n\tif cerr != nil && cerr.Error != nil {\n\t\treturn 0, &gcdmessage.ChromeRequestErr{Resp:\ |
| \ cerr}\n\t}\n\n\tif err := json.Unmarshal(resp.Data, &chromeData); err != nil\ |
| \ {\n\t\treturn 0, err\n\t}\n\n\treturn chromeData.Result.CurrentTime, nil\n}" |
| - "func Uint(v uint) *uint {\n\tp := new(uint)\n\t*p = v\n\treturn p\n}" |
| - "def after_init_app(self, app: FlaskUnchained):\n \"\"\"\n Configure\ |
| \ the JSON encoder for Flask to be able to serialize Enums,\n LocalProxy\ |
| \ objects, and SQLAlchemy models.\n \"\"\"\n self.set_json_encoder(app)\n\ |
| \ app.before_first_request(self.register_model_resources)" |
| - source_sentence: 'Returns a template for the parent of this template. |
| |
| |
| @throws ValidationException if the template has no parent.' |
| sentences: |
| - "func BodyContainsOr(values ...string) ResponseCondition {\n\treturn func(res\ |
| \ *http.Response) error {\n\t\tbody, err := ioutil.ReadAll(res.Body)\n\t\tif err\ |
| \ != nil {\n\t\t\treturn fmt.Errorf(\"failed to read response body: %s\", err)\n\ |
| \t\t}\n\n\t\tfor _, value := range values {\n\t\t\tif strings.Contains(string(body),\ |
| \ value) {\n\t\t\t\treturn nil\n\t\t\t}\n\t\t}\n\t\treturn fmt.Errorf(\"could\ |
| \ not find '%v' in body '%s'\", values, string(body))\n\t}\n}" |
| - "protected function after_update($result) {\n global $DB;\n\n if\ |
| \ (!$result) {\n $this->beforeupdate = null;\n return;\n\ |
| \ }\n\n // The parent ID has changed, we need to fix all the paths\ |
| \ of the children.\n if ($this->beforeupdate->get('parentid') != $this->get('parentid'))\ |
| \ {\n $beforepath = $this->beforeupdate->get('path') . $this->get('id')\ |
| \ . '/';\n\n $like = $DB->sql_like('path', '?');\n $likesearch\ |
| \ = $DB->sql_like_escape($beforepath) . '%';\n\n $table = '{' . self::TABLE\ |
| \ . '}';\n $sql = \"UPDATE $table SET path = REPLACE(path, ?, ?) WHERE\ |
| \ \" . $like;\n $DB->execute($sql, array(\n $beforepath,\n\ |
| \ $this->get('path') . $this->get('id') . '/',\n \ |
| \ $likesearch\n ));\n\n // Resolving sortorder holes left\ |
| \ after changing parent.\n $table = '{' . self::TABLE . '}';\n \ |
| \ $sql = \"UPDATE $table SET sortorder = sortorder -1 \"\n \ |
| \ . \" WHERE competencyframeworkid = ? AND parentid = ? AND sortorder\ |
| \ > ?\";\n $DB->execute($sql, array($this->get('competencyframeworkid'),\n\ |
| \ $this->beforeupdate->get('parentid'),\n\ |
| \ $this->beforeupdate->get('sortorder')\n\ |
| \ ));\n }\n\n $this->beforeupdate\ |
| \ = null;\n }" |
| - "public PathTemplate parentTemplate() {\n int i = segments.size();\n Segment\ |
| \ seg = segments.get(--i);\n if (seg.kind() == SegmentKind.END_BINDING) {\n\ |
| \ while (i > 0 && segments.get(--i).kind() != SegmentKind.BINDING) {}\n \ |
| \ }\n if (i == 0) {\n throw new ValidationException(\"template does\ |
| \ not have a parent\");\n }\n return new PathTemplate(segments.subList(0,\ |
| \ i), urlEncoding);\n }" |
| - source_sentence: 'Build a potentially nested fieldgroup |
| |
| |
| @param mixed $valueOrGroup Value of item, or title of group |
| |
| @param string|array $titleOrOptions Title of item, or options in grouip |
| |
| @return ArrayData Data for this item' |
| sentences: |
| - "protected function getFieldOption($valueOrGroup, $titleOrOptions)\n {\n \ |
| \ // Return flat option\n if (!is_array($titleOrOptions)) {\n \ |
| \ return parent::getFieldOption($valueOrGroup, $titleOrOptions);\n \ |
| \ }\n\n // Build children from options list\n $options = new\ |
| \ ArrayList();\n foreach ($titleOrOptions as $childValue => $childTitle)\ |
| \ {\n $options->push($this->getFieldOption($childValue, $childTitle));\n\ |
| \ }\n\n return new ArrayData(array(\n 'Title' => $valueOrGroup,\n\ |
| \ 'Options' => $options\n ));\n }" |
| - "public static function minify($content, array $options = [])\n {\n \ |
| \ $min = preg_replace(['/[\\n\\r]/', '/\\>[^\\S ]+/s', '/[^\\S ]+\\</s', '/(\\\ |
| s)+/s', ], ['', '>', '<', '\\\\1'], trim($content));\n $min = str_replace(['>\ |
| \ <'], ['><'], $min);\n \n if (ArrayHelper::getValue($options, 'comments',\ |
| \ false)) {\n $min = preg_replace('/<!--(.*)-->/Uis', '', $min);\n\ |
| \ }\n \n return $min;\n }" |
| - "private function loadXInclude(XInclude $xinclude, $filePath){\n //load\ |
| \ DOMDocument\n $xml = new DOMDocument();\n $loadSuccess = $xml->load($filePath);\n\ |
| \ $node = $xml->documentElement;\n if($loadSuccess && !is_null($node)){\n\ |
| \ //parse the href content\n $parser = new ParserFactory($xml);\n\ |
| \ $parser->loadContainerStatic($node, $xinclude->getBody());\n \ |
| \ }else{\n throw new XIncludeException('Cannot load the XInclude\ |
| \ DOM XML', $xinclude);\n }\n }" |
| - source_sentence: "Check for new unread messages and send them to the custom api\n\ |
| \n @param client_id: ID of client user" |
| sentences: |
| - "public function getLatMap()\n {\n if (null === $this->latMap) {\n \ |
| \ $this->latMap = $this->getTransliterationMap(Settings::ALPHABET_LAT);\n\ |
| \ }\n\n return $this->latMap;\n }" |
| - "def check_new_messages(client_id):\n \"\"\"Check for new unread messages and\ |
| \ send them to the custom api\n\n @param client_id: ID of client user\n \ |
| \ \"\"\"\n # Return if driver is not defined or if whatsapp is not logged in.\n\ |
| \ # Stop the timer as well\n if client_id not in drivers or not drivers[client_id]\ |
| \ or not drivers[client_id].is_logged_in():\n timers[client_id].stop()\n\ |
| \ return\n\n # Acquire a lock on thread\n if not acquire_semaphore(client_id,\ |
| \ True):\n return\n\n try:\n # Get all unread messages\n \ |
| \ res = drivers[client_id].get_unread()\n # Mark all of them as seen\n\ |
| \ for message_group in res:\n message_group.chat.send_seen()\n\ |
| \ # Release thread lock\n release_semaphore(client_id)\n \ |
| \ # If we have new messages, do something with it\n if res:\n \ |
| \ print(res)\n except:\n pass\n finally:\n # Release lock\ |
| \ anyway, safekeeping\n release_semaphore(client_id)" |
| - "def get_uppermost_library_root_state(self):\n \"\"\"Find state_copy of\ |
| \ uppermost LibraryState\n\n Method checks if there is a parent library\ |
| \ root state and assigns it to be the current library root state till\n \ |
| \ there is no further parent library root state.\n \"\"\"\n\n library_root_state\ |
| \ = self.get_next_upper_library_root_state()\n parent_library_root_state\ |
| \ = library_root_state\n # initial a library root state has to be found\ |
| \ and if there is no further parent root state\n # parent_library_root_state\ |
| \ and library_root_state are no more identical\n while parent_library_root_state\ |
| \ and library_root_state is parent_library_root_state:\n if library_root_state:\n\ |
| \ parent_library_root_state = library_root_state.parent.get_next_upper_library_root_state()\n\ |
| \n if parent_library_root_state:\n library_root_state\ |
| \ = parent_library_root_state\n\n return library_root_state" |
| - source_sentence: If MultiTenantMiddleware is used, filter queryset by request.site_id |
| sentences: |
| - "def reduce_ticks(ax, which, maxticks=3):\n \"\"\"Given a pyplot axis, resamples\ |
| \ its `which`-axis ticks such that are at most\n `maxticks` left.\n\n Parameters\n\ |
| \ ----------\n ax : axis\n The axis to adjust.\n which : {'x'\ |
| \ | 'y'}\n Which axis to adjust.\n maxticks : {3, int}\n Maximum\ |
| \ number of ticks to use.\n\n Returns\n -------\n array\n An array\ |
| \ of the selected ticks.\n \"\"\"\n ticks = getattr(ax, 'get_{}ticks'.format(which))()\n\ |
| \ if len(ticks) > maxticks:\n # make sure the left/right value is not\ |
| \ at the edge\n minax, maxax = getattr(ax, 'get_{}lim'.format(which))()\n\ |
| \ dw = abs(maxax-minax)/10.\n start_idx, end_idx = 0, len(ticks)\n\ |
| \ if ticks[0] < minax + dw:\n start_idx += 1\n if ticks[-1]\ |
| \ > maxax - dw:\n end_idx -= 1\n # get reduction factor\n \ |
| \ fac = int(len(ticks) / maxticks)\n ticks = ticks[start_idx:end_idx:fac]\n\ |
| \ return ticks" |
| - "function (isPublic, name, data, ttl, published_at, coreid) {\n var rawFn\ |
| \ = function (msg) {\n try {\n msg.setMaxAge(parseInt((ttl\ |
| \ && (ttl >= 0)) ? ttl : 60));\n if (published_at) {\n \ |
| \ msg.setTimestamp(moment(published_at).toDate());\n \ |
| \ }\n }\n catch (ex) {\n logger.error(\"\ |
| onCoreHeard - \" + ex);\n }\n return msg;\n };\n\n\ |
| \ var msgName = (isPublic) ? \"PublicEvent\" : \"PrivateEvent\";\n \ |
| \ var userID = (this.userID || \"\").toLowerCase() + \"/\";\n name =\ |
| \ (name) ? name.toString() : name;\n if (name && name.indexOf && (name.indexOf(userID)\ |
| \ == 0)) {\n name = name.substring(userID.length);\n }\n\n \ |
| \ data = (data) ? data.toString() : data;\n this.sendNONTypeMessage(msgName,\ |
| \ { event_name: name, _raw: rawFn }, data);\n }" |
| - "def get_queryset(self):\n '''\n If MultiTenantMiddleware is used,\ |
| \ filter queryset by request.site_id\n '''\n queryset = super(PageList,\ |
| \ self).get_queryset()\n if hasattr(self.request, 'site_id'):\n \ |
| \ queryset = queryset.filter(site_id=self.request.site_id)\n return\ |
| \ queryset" |
| datasets: |
| - benjamintli/code-retrieval-hard-negatives-llm-verified-merged |
| - benjamintli/code-retrieval-combined-v2 |
| pipeline_tag: sentence-similarity |
| library_name: sentence-transformers |
| metrics: |
| - cosine_accuracy@1 |
| - cosine_accuracy@3 |
| - cosine_accuracy@5 |
| - cosine_accuracy@10 |
| - cosine_precision@1 |
| - cosine_precision@3 |
| - cosine_precision@5 |
| - cosine_precision@10 |
| - cosine_recall@1 |
| - cosine_recall@3 |
| - cosine_recall@5 |
| - cosine_recall@10 |
| - cosine_ndcg@10 |
| - cosine_mrr@10 |
| - cosine_map@100 |
| model-index: |
| - name: SentenceTransformer based on benjamintli/modernbert-code-v3-hard-negatives |
| results: |
| - task: |
| type: information-retrieval |
| name: Information Retrieval |
| dataset: |
| name: eval |
| type: eval |
| metrics: |
| - type: cosine_accuracy@1 |
| value: 0.8943333333333333 |
| name: Cosine Accuracy@1 |
| - type: cosine_accuracy@3 |
| value: 0.943 |
| name: Cosine Accuracy@3 |
| - type: cosine_accuracy@5 |
| value: 0.963 |
| name: Cosine Accuracy@5 |
| - type: cosine_accuracy@10 |
| value: 0.976 |
| name: Cosine Accuracy@10 |
| - type: cosine_precision@1 |
| value: 0.8943333333333333 |
| name: Cosine Precision@1 |
| - type: cosine_precision@3 |
| value: 0.31433333333333335 |
| name: Cosine Precision@3 |
| - type: cosine_precision@5 |
| value: 0.1926 |
| name: Cosine Precision@5 |
| - type: cosine_precision@10 |
| value: 0.0976 |
| name: Cosine Precision@10 |
| - type: cosine_recall@1 |
| value: 0.8943333333333333 |
| name: Cosine Recall@1 |
| - type: cosine_recall@3 |
| value: 0.943 |
| name: Cosine Recall@3 |
| - type: cosine_recall@5 |
| value: 0.963 |
| name: Cosine Recall@5 |
| - type: cosine_recall@10 |
| value: 0.976 |
| name: Cosine Recall@10 |
| - type: cosine_ndcg@10 |
| value: 0.9359015737200269 |
| name: Cosine Ndcg@10 |
| - type: cosine_mrr@10 |
| value: 0.9229293650793654 |
| name: Cosine Mrr@10 |
| - type: cosine_map@100 |
| value: 0.9239732035430454 |
| name: Cosine Map@100 |
| --- |
| |
| # SentenceTransformer based on benjamintli/modernbert-code-v3-hard-negatives |
|
|
| This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [benjamintli/modernbert-code-v3-hard-negatives](https://huggingface.co/benjamintli/modernbert-code-v3-hard-negatives) on the [code-retrieval-hard-negatives-llm-verified-merged](https://huggingface.co/datasets/benjamintli/code-retrieval-hard-negatives-llm-verified-merged) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more. |
|
|
| ## Model Details |
|
|
| ### Model Description |
| - **Model Type:** Sentence Transformer |
| - **Base model:** [benjamintli/modernbert-code-v3-hard-negatives](https://huggingface.co/benjamintli/modernbert-code-v3-hard-negatives) <!-- at revision b98be847a673e86584e0ae7adaf8946afd62e3b0 --> |
| - **Maximum Sequence Length:** 1024 tokens |
| - **Output Dimensionality:** 768 dimensions |
| - **Similarity Function:** Cosine Similarity |
| - **Training Dataset:** |
| - [code-retrieval-hard-negatives-llm-verified-merged](https://huggingface.co/datasets/benjamintli/code-retrieval-hard-negatives-llm-verified-merged) |
| <!-- - **Language:** Unknown --> |
| <!-- - **License:** Unknown --> |
|
|
| ### Model Sources |
|
|
| - **Documentation:** [Sentence Transformers Documentation](https://sbert.net) |
| - **Repository:** [Sentence Transformers on GitHub](https://github.com/huggingface/sentence-transformers) |
| - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) |
|
|
| ### Full Model Architecture |
|
|
| ``` |
| SentenceTransformer( |
| (0): Transformer({'max_seq_length': 1024, 'do_lower_case': False, 'architecture': 'OptimizedModule'}) |
| (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) |
| ) |
| ``` |
|
|
| ## Usage |
|
|
| ### Direct Usage (Sentence Transformers) |
|
|
| First install the Sentence Transformers library: |
|
|
| ```bash |
| pip install -U sentence-transformers |
| ``` |
|
|
| Then you can load this model and run inference. |
| ```python |
| from sentence_transformers import SentenceTransformer |
| |
| # Download from the 🤗 Hub |
| model = SentenceTransformer("modernbert-code-v4-hard-negatives") |
| # Run inference |
| queries = [ |
| "If MultiTenantMiddleware is used, filter queryset by request.site_id", |
| ] |
| documents = [ |
| "def get_queryset(self):\n '''\n If MultiTenantMiddleware is used, filter queryset by request.site_id\n '''\n queryset = super(PageList, self).get_queryset()\n if hasattr(self.request, 'site_id'):\n queryset = queryset.filter(site_id=self.request.site_id)\n return queryset", |
| 'def reduce_ticks(ax, which, maxticks=3):\n """Given a pyplot axis, resamples its `which`-axis ticks such that are at most\n `maxticks` left.\n\n Parameters\n ----------\n ax : axis\n The axis to adjust.\n which : {\'x\' | \'y\'}\n Which axis to adjust.\n maxticks : {3, int}\n Maximum number of ticks to use.\n\n Returns\n -------\n array\n An array of the selected ticks.\n """\n ticks = getattr(ax, \'get_{}ticks\'.format(which))()\n if len(ticks) > maxticks:\n # make sure the left/right value is not at the edge\n minax, maxax = getattr(ax, \'get_{}lim\'.format(which))()\n dw = abs(maxax-minax)/10.\n start_idx, end_idx = 0, len(ticks)\n if ticks[0] < minax + dw:\n start_idx += 1\n if ticks[-1] > maxax - dw:\n end_idx -= 1\n # get reduction factor\n fac = int(len(ticks) / maxticks)\n ticks = ticks[start_idx:end_idx:fac]\n return ticks', |
| 'function (isPublic, name, data, ttl, published_at, coreid) {\n var rawFn = function (msg) {\n try {\n msg.setMaxAge(parseInt((ttl && (ttl >= 0)) ? ttl : 60));\n if (published_at) {\n msg.setTimestamp(moment(published_at).toDate());\n }\n }\n catch (ex) {\n logger.error("onCoreHeard - " + ex);\n }\n return msg;\n };\n\n var msgName = (isPublic) ? "PublicEvent" : "PrivateEvent";\n var userID = (this.userID || "").toLowerCase() + "/";\n name = (name) ? name.toString() : name;\n if (name && name.indexOf && (name.indexOf(userID) == 0)) {\n name = name.substring(userID.length);\n }\n\n data = (data) ? data.toString() : data;\n this.sendNONTypeMessage(msgName, { event_name: name, _raw: rawFn }, data);\n }', |
| ] |
| query_embeddings = model.encode_query(queries) |
| document_embeddings = model.encode_document(documents) |
| print(query_embeddings.shape, document_embeddings.shape) |
| # [1, 768] [3, 768] |
| |
| # Get the similarity scores for the embeddings |
| similarities = model.similarity(query_embeddings, document_embeddings) |
| print(similarities) |
| # tensor([[ 0.8836, -0.0275, 0.0176]]) |
| ``` |
|
|
| <!-- |
| ### Direct Usage (Transformers) |
|
|
| <details><summary>Click to see the direct usage in Transformers</summary> |
|
|
| </details> |
| --> |
|
|
| <!-- |
| ### Downstream Usage (Sentence Transformers) |
|
|
| You can finetune this model on your own dataset. |
|
|
| <details><summary>Click to expand</summary> |
|
|
| </details> |
| --> |
|
|
| <!-- |
| ### Out-of-Scope Use |
|
|
| *List how the model may foreseeably be misused and address what users ought not to do with the model.* |
| --> |
|
|
| ## Evaluation |
|
|
| ### Metrics |
|
|
| #### Information Retrieval |
|
|
| * Dataset: `eval` |
| * Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) |
|
|
| | Metric | Value | |
| |:--------------------|:-----------| |
| | cosine_accuracy@1 | 0.8943 | |
| | cosine_accuracy@3 | 0.943 | |
| | cosine_accuracy@5 | 0.963 | |
| | cosine_accuracy@10 | 0.976 | |
| | cosine_precision@1 | 0.8943 | |
| | cosine_precision@3 | 0.3143 | |
| | cosine_precision@5 | 0.1926 | |
| | cosine_precision@10 | 0.0976 | |
| | cosine_recall@1 | 0.8943 | |
| | cosine_recall@3 | 0.943 | |
| | cosine_recall@5 | 0.963 | |
| | cosine_recall@10 | 0.976 | |
| | **cosine_ndcg@10** | **0.9359** | |
| | cosine_mrr@10 | 0.9229 | |
| | cosine_map@100 | 0.924 | |
| |
| <!-- |
| ## Bias, Risks and Limitations |
| |
| *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.* |
| --> |
| |
| <!-- |
| ### Recommendations |
| |
| *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.* |
| --> |
| |
| ## Training Details |
| |
| ### Training Dataset |
| |
| #### code-retrieval-hard-negatives-llm-verified-merged |
| |
| * Dataset: [code-retrieval-hard-negatives-llm-verified-merged](https://huggingface.co/datasets/benjamintli/code-retrieval-hard-negatives-llm-verified-merged) at [459ec4b](https://huggingface.co/datasets/benjamintli/code-retrieval-hard-negatives-llm-verified-merged/tree/459ec4ba07a32a325d73b065fdd40cb017cb9aea) |
| * Size: 277,492 training samples |
| * Columns: <code>query</code>, <code>positive</code>, <code>negative_0</code>, <code>negative_1</code>, <code>negative_2</code>, <code>negative_3</code>, <code>negative_4</code>, and <code>negative_5</code> |
| * Approximate statistics based on the first 1000 samples: |
| | | query | positive | negative_0 | negative_1 | negative_2 | negative_3 | negative_4 | negative_5 | |
| |:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------| |
| | type | string | string | string | string | string | string | string | string | |
| | details | <ul><li>min: 6 tokens</li><li>mean: 298.97 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 9 tokens</li><li>mean: 191.06 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 215.07 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 214.94 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 16 tokens</li><li>mean: 215.65 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 215.93 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 219.31 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 219.5 tokens</li><li>max: 1024 tokens</li></ul> | |
| * Samples: |
| | query | positive | negative_0 | negative_1 | negative_2 | negative_3 | negative_4 | negative_5 | |
| |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |
| | <code> A valid parentheses sequence is a non-empty string where each character is either '(' or ')', which satisfies the following constraint:<br><br>You can find a way to repeat erasing adjacent pairs of parentheses '()' until it becomes empty.<br><br> For example, '(())' and '()((()()))' are valid parentheses sequences, but ')()(' and '(()' are not.<br><br> Mike has a valid parentheses sequence. He really likes everything about his sequence, except the fact that it is quite long. So Mike has recently decided that he will replace his parentheses sequence with a new one in the near future. But not every valid parentheses sequence will satisfy him. To help you understand his requirements we'll introduce the pseudocode of function F(S):<br><br> FUNCTION F( S - a valid parentheses sequence )<br> BEGIN<br> balance = 0<br> max_balance = 0<br> FOR index FROM 1 TO LENGTH(S)<br> BEGIN<br> if S[index] == '(' then balance = balance + 1<br> if S[index] == ')' then balance = balance - 1<br> max_balance = max( max_balance, balance )<br> END<br> ...</code> | <code>try:<br> for i in range(int(input())):<br> s=input()<br> balance=0<br> max_balance=0<br> for i in s:<br> if i=='(':balance+=1<br> else:<br> balance-=1<br> max_balance=max(max_balance,balance)<br> print('('*max_balance,')'*max_balance,sep="")<br>except Exception as e:<br> print(e)<br> </code> | <code>t=int(input())<br><br>for tt in range(t):<br> a,b,p=map(int,input().split())<br> s=input()<br> n=len(s)<br> cost = [0]*n<br> cost[-1] = 0<br> typ = ''<br> i=n-2<br> while i>=0:<br> if s[i]==typ:<br> cost[i] = cost[i+1]<br> else:<br> typ = s[i]<br> cost[i] = cost[i+1] + (a if typ=='A' else b)<br> i-=1<br> i=0<br> while cost[i] > p:<br> i+=1<br> print(i+1)</code> | <code>test=int(input())<br>for i in range(test):<br> s=input()<br> b=len(s)<br> list1=[]<br> for j in range(len(s)):<br> if s[j]=='.':<br> list1.append(j)<br> for i in list1:<br> if b-i-1 in list1 :<br> if i!=b-i-1 and ((s[i] and s[b-i-1]) != 'a' ):<br> s=s[:i]+'a'+s[i+1:b-i-1]+'a'+s[b-i:]<br> else:<br> s=s[:i]+'a'+s[i+1:]<br> else:<br> s=s[:i]+s[b-i-1]+s[i+1:]<br><br> if s==s[::-1]:<br> print(s)<br> else:<br> print(-1)<br><br> </code> | <code>from collections import Counter
<br>def solve(A,B):
<br> a = Counter(A)
<br> b = Counter(B)
<br> ans = 0
<br> for i in a:
<br> if i in b:
<br> ans += min(a[i],b[i])
<br>
<br> return ans
<br>
<br>
<br>t = int(input())
<br>
<br>for _ in range(t):
<br> A = input()
<br> B = input()
<br> print(solve(A,B))</code> | <code>l=list(map(int,input()))<br>t=-1<br>x=-1<br>y=-1<br>for i in range(len(l)):<br> s=l[i]<br> a=i+1<br> b=i+1<br> for j in range(i+1,len(l)):<br> if l[i]<l[j]:<br> s=s+l[j]<br> b=j+1<br> else:<br> break<br> if s>t:<br> t=s<br> x=a<br> y=b<br>print(t,end=":")<br>print(x,y,sep="-")</code> | <code>t=eval(input())<br> <br>a=[]<br>b=[]<br> <br>top=-1<br> <br>for __ in range(0,t):<br> <br> x=input().split()<br> <br> if(x[0]!="-1" and x[0]!="0"):<br> <br> add=int(x[0])<br> <br> if top!=-1 and add>a[top][0] :<br> <br> b[top]+=1<br> <br> else:<br> a.append((add,x[1]))<br> <br> b.append(0)<br> top+=1<br> <br> <br> elif (x[0]=="-1"):<br> <br> #print("%s %s" %(b[top],a[top][1]))<br> print((b[top]), end=' ')<br> print(a[top][1])<br> foo=a.pop()<br> bar=b.pop()<br> top-=1</code> | <code>t=eval(input())<br> <br>a=[]<br>b=[]<br> <br>top=-1<br> <br>for __ in range(0,t):<br> <br> x=input().split()<br> <br> if(x[0]!="-1" and x[0]!="0"):<br> <br> add=int(x[0])<br> <br> if top!=-1 and add>a[top][0] :<br> <br> b[top]+=1<br> <br> else:<br> a.append((add,x[1]))<br> <br> b.append(0)<br> top+=1<br> <br> <br> elif (x[0]=="-1"):<br> <br> #print("%s %s" %(b[top],a[top][1]))<br> print((b[top]), end=' ')<br> print(a[top][1])<br> foo=a.pop()<br> bar=b.pop()<br> top-=1</code> | |
| | <code> Chef has a cubic die with 6 faces kept on an infinite plane. Each face has a distinct integer in the range [1,6] written on it, but the exact arrangement of the numbers on the faces of the die is unknown to Chef. Curiosity gets the better of Chef and he wants to find out o(1), o(2), ..., o(6), where o(i) is the number written opposite to the number i.<br><br> Chef performs the following N-1 steps to learn the exact arrangement of the numbers on the die. In the i-th step, Chef pushes the die in some direction (there are 4 possible directions), and the die rolls 90o in this direction. The picture below demonstrates a die and the result that it produced after rolling in each of the 4 directions respectively. For this die, we have o(1)=4, o(2)=5, o(3)=6, o(4)=1, o(5)=2, o(6)=3.<br><br> Chef records N numbers A1, A2, ..., AN, where Ai is the number written on the top of the die before the i-th step. However, the information on the direction in which he pushes the die each time are lost. Can you help h...</code> | <code>from itertools import permutations<br><br>def solve(n,a):<br> ans=[]<br> <br> for des in desire:<br> check=1<br> for i in range(n-1):<br> <br> if (a[i]==a[i+1]):<br> return [-1]<br> if a[i+1]==des[a[i]-1]:<br> check=0<br> break<br> if check:<br> ans=des<br> break<br> if ans:<br> return ans<br> return [-1]<br> <br><br>per=permutations([1,2,3,4,5,6])<br>desire=[]<br>for p in per:<br> check=1<br> for i in range(1,7):<br> if p[i-1]==i:<br> check=0<br> break<br> if check:<br> doublecheck=1<br> for i in range(6):<br> if p[p[i]-1]!=i+1:<br> doublecheck=0<br> break<br> if doublecheck:<br> desire.append(p)<br>#print(desire)<br>for _ in range(int(input())):<br> <br> n=int(input())<br> a=list(map(int,input().split( )))<br> print(*solve(n,a))</code> | <code>def solve():<br> n = int(input())<br> lst = list(map(int,input().split()))<br> if sum(lst) <= n // 2:<br> print(n//2)<br> print("0 " * (n // 2))<br> else:<br> print(n//2 + (n // 2) % 2)<br> print("1 " * (n//2 + (n // 2) % 2))<br>for i in range(int(input())):<br> solve()</code> | <code>import sys<br>input = lambda: sys.stdin.readline().rstrip()<br><br>T = int(input())<br>for _ in range(T):<br> N = int(input())<br> A = [int(a) for a in input().split()]<br> <br> if max(A) == min(A):<br> print(1)<br> print(*([1] * N))<br> elif N % 2 == 0:<br> print(2)<br> print(*([1, 2] * (N // 2)))<br> else:<br> for i in range(N):<br> if A[i-1] == A[i]:<br> print(2)<br> print(*(([1, 2] * N)[:i][::-1] + ([1, 2] * N)[:N-i]))<br> break<br> else:<br> print(3)<br> print(*([3] + [1, 2] * (N // 2)))<br><br></code> | <code>import numpy as np<br><br>N=10**6+1<br>t=eval(input())<br>inp = ()<br><br>t1=ord('z')<br>#bag=[[0 for _ in xrange(t1)] for _ in xrange(N+1)]<br>bag=np.zeros((N+1,t1),dtype=np.int)<br>#print bag<br>while t:<br> t-=1<br> inp=input().split()<br> t2=ord(inp[3]) - ord('a')<br> t3=int(inp[1])<br> t4=int(inp[2]) + 1<br> if inp[0]=="1":<br> #print "enter"<br> bag[t3][t2]+=int(inp[2])<br><br><br> if inp[0]=="2":<br> sum=0<br> for i in range(t3,t4):<br> sum+=bag[i][t2]<br> print(sum)<br><br>#<br># for j in range(ord('z')-ord('a')):<br># for i in range(N+1):<br># if bag[i][j]!=0:<br># print bag[i][j] ,i,j<br><br><br><br></code> | <code># from math import log2<br># N = 10000<br># for i in range(1,N):<br># # print(i)<br># for m in range(i):<br># if( (m^(m+1))==i ):<br># print(i)<br># print(m,m+1,bin(m)[2:])<br># print()<br># break<br># # else:<br># # print(-1)<br># # print()<br>T = int(input())<br>ans = []<br><br>for _ in range(T):<br> N = int(input())<br><br> # x = log2(N+1)<br> if(N==1):<br> ans.append(2)<br> elif('0' not in bin(N)[2:]):<br> ans.append(N//2)<br> else:<br> ans.append(-1)<br><br>for i in ans:<br> print(i)</code> | <code># from math import log2<br># N = 10000<br># for i in range(1,N):<br># # print(i)<br># for m in range(i):<br># if( (m^(m+1))==i ):<br># print(i)<br># print(m,m+1,bin(m)[2:])<br># print()<br># break<br># # else:<br># # print(-1)<br># # print()<br>T = int(input())<br>ans = []<br><br>for _ in range(T):<br> N = int(input())<br><br> # x = log2(N+1)<br> if(N==1):<br> ans.append(2)<br> elif('0' not in bin(N)[2:]):<br> ans.append(N//2)<br> else:<br> ans.append(-1)<br><br>for i in ans:<br> print(i)</code> | <code># from math import log2<br># N = 10000<br># for i in range(1,N):<br># # print(i)<br># for m in range(i):<br># if( (m^(m+1))==i ):<br># print(i)<br># print(m,m+1,bin(m)[2:])<br># print()<br># break<br># # else:<br># # print(-1)<br># # print()<br>T = int(input())<br>ans = []<br><br>for _ in range(T):<br> N = int(input())<br><br> # x = log2(N+1)<br> if(N==1):<br> ans.append(2)<br> elif('0' not in bin(N)[2:]):<br> ans.append(N//2)<br> else:<br> ans.append(-1)<br><br>for i in ans:<br> print(i)</code> | |
| | <code> DevuLand is a very strange place. There are n villages in it. Some of the villages are occupied by dinosaurs while the remaining ones by villagers.<br> You are given the information of DevuLand <br> by an array D of size n. If D[i] is non-negative, it means that there are D[i] villagers in that village. <br> Otherwise, it means that are -D[i] <br> dinosaurs in that village.<br><br> It is also guaranteed that total number of villagers in DevuLand is equal to total number of dinosaurs.<br><br>Once dinosaurs got very hungry and started eating villagers. Frightened villagers gathered immediately and met their Sarpanch Deviji. Deviji, being a very daring and negotiable person, met to the head<br>of dinosaurs. Soon both parties called a truce. It was decided that the villagers will provide laddus to <br>the dinosaurs. So everyday, each villager will take exactly one laddu to one of the dinosaurs in such a way that no dinosaur remains hungry (note that this is possible because number of villagers is the same as the numbe...</code> | <code># cook your dish here<br>for _ in range(int(input())):<br> n = int(input())<br> a = list(map(int, input().split()))<br> curr = 0<br> ans = 0<br> for x in a:<br> curr += x<br> ans += abs(curr)<br> print(ans)</code> | <code>from collections import deque
<br>T=int(input())
<br>def break_down(num):
<br> count=0
<br> while(len(num)!=1):
<br> temp=0
<br> for i in range(0,len(num)):
<br> temp=temp+int(num[i])
<br> num=str(temp)
<br> count=count+1
<br> return (int(num),count)
<br>def digit_sum(num):
<br> temp=0
<br> for i in range(0,len(num)):
<br> temp=temp+int(num[i])
<br> num=temp
<br> return (num)
<br>while(T):
<br> queue=deque()
<br> count_n=0
<br> count_d=0
<br> T=T-1
<br> N,d=[i for i in input().split()]
<br> n,count_n=break_down(N)
<br> D,count_D=break_down(d)
<br> dic={}
<br> if(D==1 or D==2 or D==4 or D==5 or D==7 or D==8):
<br> mini=1
<br> elif(D==3 or D==6):
<br> mini=min(digit_sum(str(n+3)),digit_sum(str(n+6)),digit_sum(str(n+9)))
<br> else:
<br> mini=n
<br> queue.append((int(N),0))
<br> ele=int(N)
<br> count=0
<br> while(len(queue)!=0):
<br> ele,count=queue.popleft()
<br> if(ele==mini):
<br> break
<br> else:
<br> if(len(str(ele))==1):
<br> temp1=ele+int(d)
<br> queue.append((temp1,count+1))...</code> | <code># cook your dish here<br>test_cases = int(input())<br>for i in range(test_cases):<br> no_of_elements = int(input())<br> sequence = list(map(int, input().split()))<br> d1 = sequence[1] - sequence[0]<br> d2 = sequence[2] - sequence[1]<br> d3 = (sequence[3] - sequence[0])/3<br> d4 = (sequence[3] - sequence[1])/2<br> d5 = (sequence[2] - sequence[0])/2<br><br> if (d2 == d4):<br> d = d2<br><br> elif(d3 == d5):<br> d = d3<br><br> elif(d1 == d3):<br> d = d1<br><br> elif(d1 == d5):<br> d = d1<br><br> if (d == d1):<br> for i in range(no_of_elements):<br> sequence[i] = int(sequence[0] + i*d)<br> else:<br> for i in range(no_of_elements):<br> sequence[i] = int(sequence[-1] - ((no_of_elements - i - 1)*d))<br><br> for i in sequence:<br> print(i, end=" ")<br><br> print('\n')<br><br><br></code> | <code>from collections import Counter<br>try:<br> for _ in range(int(input())):<br> n=int(input())<br> s=input()<br> d1=dict(Counter(s))<br> <br> u,d,r,l=0,0,0,0<br> if 'U' in d1:<br> u=d1['U']<br> else:<br> u=0<br> if 'D' in d1:<br> d=d1['D']<br> else:<br> d=0<br> if 'R' in d1:<br> r=d1['R']<br> else:<br> r=0<br> if 'L' in d1:<br> l=d1['L']<br> else:<br> l=0<br> x=0<br> y=0<br> if l==r:<br> x=0<br> elif l>r:<br> x=-(l-r)<br> elif r>l:<br> x=r-l<br> if u==d:<br> y=0<br> elif d>u:<br> y=-(d-u)<br> elif u>d:<br> y=u-d<br> # print(x,y)<br> if x==0 and y==0:<br> print(n)<br> continue<br> <br> print(n-(abs(x)+abs(y)))<br>except:<br> pass<br></code> | <code>from bisect import bisect_left, insort_left<br>a = []<br>n = int(input())<br>for _ in range(n):<br> #print(a)<br> s, d = list(map(int, input().split()))<br> if len(a) == 0:<br> print(s, s+d - 1)<br> a.append((s, s + d - 1))<br> continue<br> p = bisect_left(a, (s, s + d - 1))<br> #print('p', p)<br> ok = True<br> if p > 0 and a[p-1][1] >= s:<br> ok = False<br> if p < len(a) and a[p][0] <= s + d - 1:<br> ok = False<br> if ok:<br> insort_left(a, (s, s + d - 1))<br> print(s, s + d - 1)<br> else:<br> ok = False<br> for i in range(len(a)):<br> if i == 0:<br> if a[0][0] > d:<br> print(1,d)<br> a = [(1, d)] + a<br> ok = True<br> break<br> else:<br> if a[i - 1][1] + d < a[i][0]:<br> print(a[i - 1][1] + 1, a[i - 1][1] + d)<br> insort_left(a, (a[i - 1][1] + 1, a[i - 1][1] + d))<br> ok = True<br> break<br> ...</code> | <code>import fractions<br>for t in range(int(input())):<br> h,u,d = list(map(int,input().split()))<br> g = fractions.gcd(u,d)<br> if (h%g!=0):<br> print(-1)<br> else:<br> m = 0<br> n = 0<br> while (True):<br> n = (float(m)*u-h)/d<br> if (n>0 and int(n) == n):<br> break<br> m+=1<br> print(int(m+n))</code> | <code>import fractions<br>for t in range(int(input())):<br> h,u,d = list(map(int,input().split()))<br> g = fractions.gcd(u,d)<br> if (h%g!=0):<br> print(-1)<br> else:<br> m = 0<br> n = 0<br> while (True):<br> n = (float(m)*u-h)/d<br> if (n>0 and int(n) == n):<br> break<br> m+=1<br> print(int(m+n))</code> | |
| * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters: |
| ```json |
| { |
| "scale": 20.0, |
| "similarity_fct": "cos_sim", |
| "mini_batch_size": 128, |
| "gather_across_devices": false, |
| "directions": [ |
| "query_to_doc" |
| ], |
| "partition_mode": "joint", |
| "hardness_mode": null, |
| "hardness_strength": 0.0 |
| } |
| ``` |
| |
| ### Evaluation Dataset |
|
|
| #### code-retrieval-combined-v2 |
|
|
| * Dataset: [code-retrieval-combined-v2](https://huggingface.co/datasets/benjamintli/code-retrieval-combined-v2) at [2b971a6](https://huggingface.co/datasets/benjamintli/code-retrieval-combined-v2/tree/2b971a6d597823ab7ff10b898ae6f3c0fdbbfa23) |
| * Size: 31,516 evaluation samples |
| * Columns: <code>query</code> and <code>positive</code> |
| * Approximate statistics based on the first 1000 samples: |
| | | query | positive | |
| |:--------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------| |
| | type | string | string | |
| | details | <ul><li>min: 5 tokens</li><li>mean: 42.73 tokens</li><li>max: 834 tokens</li></ul> | <ul><li>min: 30 tokens</li><li>mean: 180.42 tokens</li><li>max: 1024 tokens</li></ul> | |
| * Samples: |
| | query | positive | |
| |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |
| | <code>This gets the version of OpenALPR<br><br> :return: Version information</code> | <code>def get_version(self):<br> """<br> This gets the version of OpenALPR<br><br> :return: Version information<br> """<br><br> ptr = self._get_version_func(self.alpr_pointer)<br> version_number = ctypes.cast(ptr, ctypes.c_char_p).value<br> version_number = _convert_from_charp(version_number)<br> self._free_json_mem_func(ctypes.c_void_p(ptr))<br> return version_number</code> | |
| | <code>Remove all unnecessary comments from a lexer or parser file</code> | <code>public String stripUnnecessaryComments(String javaContent, AntlrOptions options) {<br> if (!options.isOptimizeCodeQuality()) {<br> return javaContent;<br> }<br> javaContent = stripMachineDependentPaths(javaContent);<br> if (options.isStripAllComments()) {<br> javaContent = stripAllComments(javaContent);<br> }<br> return javaContent;<br> }</code> | |
| | <code>Serialize reply to array or JSON.<br><br>@param {Object} packet<br>@param {String} packet.method "get", "search", "post", "put", "delete", "sub", "unsub".<br>@param {String} packet.resource<br>@param {String} packet.id<br>@param {*} packet.body<br>@param {Number} [packet.status]<br>@param {Number\|String} [packet.date]<br>@param {Object} [packet.headers]<br>@param {Boolean} [json] true to generate JSON instead of array.<br>@returns {Array\|String\|null}</code> | <code>function reply(packet, json) {<br> return _create(packet, packet.status \|\| 500, (METHODS[packet.method] \|\| '') + packet.resource, json);<br>}</code> | |
| * Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters: |
| ```json |
| { |
| "scale": 20.0, |
| "similarity_fct": "cos_sim", |
| "mini_batch_size": 128, |
| "gather_across_devices": false, |
| "directions": [ |
| "query_to_doc" |
| ], |
| "partition_mode": "joint", |
| "hardness_mode": null, |
| "hardness_strength": 0.0 |
| } |
| ``` |
|
|
| ### Training Hyperparameters |
| #### Non-Default Hyperparameters |
|
|
| - `eval_strategy`: steps |
| - `per_device_train_batch_size`: 1024 |
| - `per_device_eval_batch_size`: 1024 |
| - `num_train_epochs`: 1 |
| - `warmup_steps`: 0.05 |
| - `bf16`: True |
| - `dataloader_num_workers`: 4 |
| - `load_best_model_at_end`: True |
| - `push_to_hub`: True |
| - `hub_model_id`: modernbert-code-v4-hard-negatives |
| - `batch_sampler`: no_duplicates |
| |
| #### All Hyperparameters |
| <details><summary>Click to expand</summary> |
| |
| - `do_predict`: False |
| - `eval_strategy`: steps |
| - `prediction_loss_only`: True |
| - `per_device_train_batch_size`: 1024 |
| - `per_device_eval_batch_size`: 1024 |
| - `gradient_accumulation_steps`: 1 |
| - `eval_accumulation_steps`: None |
| - `torch_empty_cache_steps`: None |
| - `learning_rate`: 5e-05 |
| - `weight_decay`: 0.0 |
| - `adam_beta1`: 0.9 |
| - `adam_beta2`: 0.999 |
| - `adam_epsilon`: 1e-08 |
| - `max_grad_norm`: 1.0 |
| - `num_train_epochs`: 1 |
| - `max_steps`: -1 |
| - `lr_scheduler_type`: linear |
| - `lr_scheduler_kwargs`: None |
| - `warmup_ratio`: None |
| - `warmup_steps`: 0.05 |
| - `log_level`: passive |
| - `log_level_replica`: warning |
| - `log_on_each_node`: True |
| - `logging_nan_inf_filter`: True |
| - `enable_jit_checkpoint`: False |
| - `save_on_each_node`: False |
| - `save_only_model`: False |
| - `restore_callback_states_from_checkpoint`: False |
| - `use_cpu`: False |
| - `seed`: 42 |
| - `data_seed`: None |
| - `bf16`: True |
| - `fp16`: False |
| - `bf16_full_eval`: False |
| - `fp16_full_eval`: False |
| - `tf32`: None |
| - `local_rank`: -1 |
| - `ddp_backend`: None |
| - `debug`: [] |
| - `dataloader_drop_last`: False |
| - `dataloader_num_workers`: 4 |
| - `dataloader_prefetch_factor`: None |
| - `disable_tqdm`: False |
| - `remove_unused_columns`: True |
| - `label_names`: None |
| - `load_best_model_at_end`: True |
| - `ignore_data_skip`: False |
| - `fsdp`: [] |
| - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} |
| - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} |
| - `parallelism_config`: None |
| - `deepspeed`: None |
| - `label_smoothing_factor`: 0.0 |
| - `optim`: adamw_torch_fused |
| - `optim_args`: None |
| - `group_by_length`: False |
| - `length_column_name`: length |
| - `project`: huggingface |
| - `trackio_space_id`: trackio |
| - `ddp_find_unused_parameters`: None |
| - `ddp_bucket_cap_mb`: None |
| - `ddp_broadcast_buffers`: False |
| - `dataloader_pin_memory`: True |
| - `dataloader_persistent_workers`: False |
| - `skip_memory_metrics`: True |
| - `push_to_hub`: True |
| - `resume_from_checkpoint`: None |
| - `hub_model_id`: modernbert-code-v4-hard-negatives |
| - `hub_strategy`: every_save |
| - `hub_private_repo`: None |
| - `hub_always_push`: False |
| - `hub_revision`: None |
| - `gradient_checkpointing`: False |
| - `gradient_checkpointing_kwargs`: None |
| - `include_for_metrics`: [] |
| - `eval_do_concat_batches`: True |
| - `auto_find_batch_size`: False |
| - `full_determinism`: False |
| - `ddp_timeout`: 1800 |
| - `torch_compile`: False |
| - `torch_compile_backend`: None |
| - `torch_compile_mode`: None |
| - `include_num_input_tokens_seen`: no |
| - `neftune_noise_alpha`: None |
| - `optim_target_modules`: None |
| - `batch_eval_metrics`: False |
| - `eval_on_start`: False |
| - `use_liger_kernel`: False |
| - `liger_kernel_config`: None |
| - `eval_use_gather_object`: False |
| - `average_tokens_across_devices`: True |
| - `use_cache`: False |
| - `prompts`: None |
| - `batch_sampler`: no_duplicates |
| - `multi_dataset_batch_sampler`: proportional |
| - `router_mapping`: {} |
| - `learning_rate_mapping`: {} |
|
|
| </details> |
|
|
| ### Training Logs |
| | Epoch | Step | Training Loss | Validation Loss | eval_cosine_ndcg@10 | |
| |:---------:|:-------:|:-------------:|:---------------:|:-------------------:| |
| | 0.0738 | 20 | 0.9880 | - | - | |
| | 0.1476 | 40 | 0.9529 | 0.3465 | 0.9286 | |
| | 0.2214 | 60 | 0.9726 | - | - | |
| | 0.2952 | 80 | 0.9299 | 0.3351 | 0.9296 | |
| | 0.3690 | 100 | 0.9130 | - | - | |
| | 0.4428 | 120 | 0.9187 | 0.3253 | 0.9325 | |
| | 0.5166 | 140 | 0.8940 | - | - | |
| | 0.5904 | 160 | 0.9037 | 0.3186 | 0.9354 | |
| | 0.6642 | 180 | 0.8951 | - | - | |
| | **0.738** | **200** | **0.8816** | **0.3121** | **0.9361** | |
| | 0.8118 | 220 | 0.8753 | - | - | |
| | 0.8856 | 240 | 0.8649 | 0.3106 | 0.9359 | |
| | 0.9594 | 260 | 0.8575 | - | - | |
|
|
| * The bold row denotes the saved checkpoint. |
|
|
| ### Framework Versions |
| - Python: 3.12.13 |
| - Sentence Transformers: 5.3.0 |
| - Transformers: 5.0.0 |
| - PyTorch: 2.10.0+cu128 |
| - Accelerate: 1.13.0 |
| - Datasets: 4.0.0 |
| - Tokenizers: 0.22.2 |
|
|
| ## Citation |
|
|
| ### BibTeX |
|
|
| #### Sentence Transformers |
| ```bibtex |
| @inproceedings{reimers-2019-sentence-bert, |
| title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", |
| author = "Reimers, Nils and Gurevych, Iryna", |
| booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", |
| month = "11", |
| year = "2019", |
| publisher = "Association for Computational Linguistics", |
| url = "https://arxiv.org/abs/1908.10084", |
| } |
| ``` |
|
|
| #### CachedMultipleNegativesRankingLoss |
| ```bibtex |
| @misc{gao2021scaling, |
| title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup}, |
| author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan}, |
| year={2021}, |
| eprint={2101.06983}, |
| archivePrefix={arXiv}, |
| primaryClass={cs.LG} |
| } |
| ``` |
|
|
| <!-- |
| ## Glossary |
|
|
| *Clearly define terms in order to be accessible across audiences.* |
| --> |
|
|
| <!-- |
| ## Model Card Authors |
|
|
| *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.* |
| --> |
|
|
| <!-- |
| ## Model Card Contact |
|
|
| *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.* |
| --> |