Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks
Paper
•
1908.10084
•
Published
•
10
This is a sentence-transformers model finetuned from microsoft/unixcoder-base-unimodal on the soco_train_java dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: RobertaModel
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("buelfhood/SOCO-Java-UnixCoder-Softmax-PairClass-VAST-ep2-bs32-noEval")
# Run inference
sentences = [
'\n\n\n\nimport java.util.*;\nimport java.net.*;\nimport java.io.*;\nimport javax.swing.*;\n\npublic class PasswordCombination\n{\n private int pwdCounter = 0;\n private int startTime;\n private String str1,str2,str3;\n private String url = "http://sec-crack.cs.rmit.edu./SEC/2/";\n private String loginPwd;\n private String[] password;\n private HoldSharedData data;\n private char[] chars = {\'A\',\'B\',\'C\',\'D\',\'E\',\'F\',\'G\',\'H\',\'I\',\'J\',\'K\',\'L\',\'M\',\n \'N\',\'O\',\'P\',\'Q\',\'R\',\'S\',\'T\',\'U\',\'V\',\'W\',\'X\',\'Y\',\'Z\',\n \'a\',\'b\',\'c\',\'d\',\'e\',\'f\',\'g\',\'h\',\'i\',\'j\',\'k\',\'l\',\'m\',\n \'n\',\'o\',\'p\',\'q\',\'r\',\'s\',\'t\',\'u\',\'v\',\'w\',\'x\',\'y\',\'z\'};\n\n public PasswordCombination()\n {\n System.out.println("Programmed by for INTE1070 Assignment 2");\n\n String input = JOptionPane.showInputDialog( "Enter number of threads" );\n if( input == null )\n System.exit(0);\n\n int numOfConnections = Integer.parseInt( input );\n startTime = System.currentTimeMillis();\n int pwdCounter = 52*52*52 + 52*52 + 52;\n password = new String[pwdCounter];\n\n doPwdCombination();\n\n System.out.println("Total Number of Passwords Generated: " + pwdCounter);\n createConnectionThread( numOfConnections );\n }\n\n private void doPwdCombination()\n {\n for( int i = 0; i < 52; i ++ )\n {\n str1 = "" + chars[i];\n password[pwdCounter++] = "" + chars[i];\n System.err.print( str1 + " | " );\n\n for( int j = 0; j < 52; j ++ )\n {\n str2 = str1 + chars[j];\n password[pwdCounter++] = str1 + chars[j];\n\n for( int k = 0; k < 52; k ++ )\n {\n str3 = str2 + chars[k];\n password[pwdCounter++] = str2 + chars[k];\n }\n }\n }\n\n System.err.println( "\\n" );\n }\n\n private void loadPasswords( )\n {\n FileReader fRead;\n BufferedReader buf;\n String line = null;\n String fileName = "words";\n\n try\n {\n fRead = new FileReader( fileName );\n buf = new BufferedReader(fRead);\n\n while((line = buf.readLine( )) != null)\n {\n password[pwdCounter++] = line;\n }\n }\n catch(FileNotFoundException e)\n {\n System.err.println("File not found: " + fileName);\n }\n catch(IOException ioe)\n {\n System.err.println("IO Error " + ioe);\n }\n }\n\n private void createConnectionThread( int input )\n {\n data = new HoldSharedData( startTime, password, pwdCounter );\n\n int numOfThreads = input;\n int batch = pwdCounter/numOfThreads + 1;\n numOfThreads = pwdCounter/batch + 1;\n System.out.println("Number of Connection Threads Used:" + numOfThreads);\n ConnectionThread[] connThread = new ConnectionThread[numOfThreads];\n\n for( int index = 0; index < numOfThreads; index ++ )\n {\n connThread[index] = new ConnectionThread( url, index, batch, data );\n connThread[index].conn();\n }\n }\n} ',
'\nimport java.util.*;\n\n\npublic class Cracker\n{\n private char[] letters = {\'a\', \'b\', \'c\', \'d\', \'e\', \'f\', \'g\', \'h\', \'i\', \'j\', \'k\', \'l\', \'m\', \'n\', \'o\', \'p\', \'q\', \'r\', \'s\', \'t\', \'u\', \'v\', \'w\', \'x\', \'y\', \'z\', \'A\', \'B\', \'C\', \'D\', \'E\', \'F\', \'G\', \'H\', \'I\', \'J\', \'K\', \'L\', \'M\', \'N\', \'O\', \'P\', \'Q\', \'R\', \'S\', \'T\', \'U\', \'V\', \'W\', \'X\', \'Y\', \'Z\'};\n private Vector v;\n\n public Cracker()\n {\n v = new Vector( 52);\n }\n public void loadLetters()\n {\n int i;\n\n for( i = 0; i < letters.length; i++)\n {\n\t String s = new StringBuffer().append( letters[i]).toString();\n v.add( s);\n }\n }\n public Vector getVictor()\n {\n return ;\n }\n public void loadPairs()\n {\n int i,j;\n\n for( i = 0; i < letters.length - 1; i++)\n {\n for( j = i + 1; j < letters.length; j++)\n {\n String s1 = new StringBuffer().append( letters[i]).append( letters[j]).toString();\n\t String s2 = new StringBuffer().append( letters[j]).append( letters[i]).toString();\n\t v.add( s1);\n\t v.add( s2);\n\t }\n }\n for( i = 0; i < letters.length; i++)\n {\n String s3 = new StringBuffer().append( letters[i]).append( letters[i]).toString();\n\t v.add( s3);\n }\n }\n public void loadTriples()\n {\n int i, j, k;\n \n for( i = 0; i < letters.length; i++)\n {\n String s4 = new StringBuffer().append( letters[i]).append( letters[i]).append( letters[i]).toString();\n\t v.add( s4);\n }\n for( i = 0; i < letters.length - 1; i++)\n {\n for( j = i + 1; j < letters.length; j++)\n\t {\n\t String s5 = new StringBuffer().append( letters[i]).append( letters[j]).append( letters[j]).toString();\n\t String s6 = new StringBuffer().append( letters[j]).append( letters[i]).append( letters[j]).toString();\n\t String s7 = new StringBuffer().append( letters[j]).append( letters[j]).append( letters[i]).toString();\n\t String s8 = new StringBuffer().append( letters[j]).append( letters[i]).append( letters[i]).toString();\n\t String s9 = new StringBuffer().append( letters[i]).append( letters[j]).append( letters[i]).toString();\n\t String s10 = new StringBuffer().append( letters[i]).append( letters[i]).append( letters[j]).toString();\n\t v.add( s5);\n\t v.add( s6);\n\t v.add( s7);\n\t v.add( s8);\n\t v.add( s9);\n\t v.add( s10);\n\t }\n }\n for( i = 0; i < letters.length - 2; i++)\n {\n for( j = i + 1; j < letters.length - 1; j++)\n\t {\n\t for( k = i + 2; k < letters.length; k++)\n\t {\n\t String s11 = new StringBuffer().append( letters[i]).append( letters[j]).append(letters[k]).toString();\n\t String s12 = new StringBuffer().append( letters[i]).append( letters[k]).append(letters[j]).toString();\n\t String s13 = new StringBuffer().append( letters[k]).append( letters[j]).append(letters[i]).toString();\n\t String s14 = new StringBuffer().append( letters[k]).append( letters[i]).append(letters[j]).toString();\n\t String s15 = new StringBuffer().append( letters[j]).append( letters[i]).append(letters[k]).toString();\n\t String s16 = new StringBuffer().append( letters[j]).append( letters[k]).append(letters[i]).toString();\n\t v.add( s11);\n\t v.add( s12);\n\t v.add( s13);\n\t v.add( s14);\n\t v.add( s15);\n\t v.add( s16);\n\t }\n\t }\n }\n }\n \n public static void main( String[] args)\n {\n Cracker cr = new Cracker();\n cr.loadLetters();\n cr.loadPairs();\n cr.loadTriples();\n System.out.println(" far "+cr.getVictor().size()+" elements loaded");\n }\n}\n \n',
'\nimport java.util.*;\n\n\npublic class Cracker\n{\n private char[] letters = {\'a\', \'b\', \'c\', \'d\', \'e\', \'f\', \'g\', \'h\', \'i\', \'j\', \'k\', \'l\', \'m\', \'n\', \'o\', \'p\', \'q\', \'r\', \'s\', \'t\', \'u\', \'v\', \'w\', \'x\', \'y\', \'z\', \'A\', \'B\', \'C\', \'D\', \'E\', \'F\', \'G\', \'H\', \'I\', \'J\', \'K\', \'L\', \'M\', \'N\', \'O\', \'P\', \'Q\', \'R\', \'S\', \'T\', \'U\', \'V\', \'W\', \'X\', \'Y\', \'Z\'};\n private Vector v;\n\n public Cracker()\n {\n v = new Vector( 52);\n }\n public void loadLetters()\n {\n int i;\n\n for( i = 0; i < letters.length; i++)\n {\n\t String s = new StringBuffer().append( letters[i]).toString();\n v.add( s);\n }\n }\n public Vector getVictor()\n {\n return ;\n }\n public void loadPairs()\n {\n int i,j;\n\n for( i = 0; i < letters.length - 1; i++)\n {\n for( j = i + 1; j < letters.length; j++)\n {\n String s1 = new StringBuffer().append( letters[i]).append( letters[j]).toString();\n\t String s2 = new StringBuffer().append( letters[j]).append( letters[i]).toString();\n\t v.add( s1);\n\t v.add( s2);\n\t }\n }\n for( i = 0; i < letters.length; i++)\n {\n String s3 = new StringBuffer().append( letters[i]).append( letters[i]).toString();\n\t v.add( s3);\n }\n }\n public void loadTriples()\n {\n int i, j, k;\n \n for( i = 0; i < letters.length; i++)\n {\n String s4 = new StringBuffer().append( letters[i]).append( letters[i]).append( letters[i]).toString();\n\t v.add( s4);\n }\n for( i = 0; i < letters.length - 1; i++)\n {\n for( j = i + 1; j < letters.length; j++)\n\t {\n\t String s5 = new StringBuffer().append( letters[i]).append( letters[j]).append( letters[j]).toString();\n\t String s6 = new StringBuffer().append( letters[j]).append( letters[i]).append( letters[j]).toString();\n\t String s7 = new StringBuffer().append( letters[j]).append( letters[j]).append( letters[i]).toString();\n\t String s8 = new StringBuffer().append( letters[j]).append( letters[i]).append( letters[i]).toString();\n\t String s9 = new StringBuffer().append( letters[i]).append( letters[j]).append( letters[i]).toString();\n\t String s10 = new StringBuffer().append( letters[i]).append( letters[i]).append( letters[j]).toString();\n\t v.add( s5);\n\t v.add( s6);\n\t v.add( s7);\n\t v.add( s8);\n\t v.add( s9);\n\t v.add( s10);\n\t }\n }\n for( i = 0; i < letters.length - 2; i++)\n {\n for( j = i + 1; j < letters.length - 1; j++)\n\t {\n\t for( k = i + 2; k < letters.length; k++)\n\t {\n\t String s11 = new StringBuffer().append( letters[i]).append( letters[j]).append(letters[k]).toString();\n\t String s12 = new StringBuffer().append( letters[i]).append( letters[k]).append(letters[j]).toString();\n\t String s13 = new StringBuffer().append( letters[k]).append( letters[j]).append(letters[i]).toString();\n\t String s14 = new StringBuffer().append( letters[k]).append( letters[i]).append(letters[j]).toString();\n\t String s15 = new StringBuffer().append( letters[j]).append( letters[i]).append(letters[k]).toString();\n\t String s16 = new StringBuffer().append( letters[j]).append( letters[k]).append(letters[i]).toString();\n\t v.add( s11);\n\t v.add( s12);\n\t v.add( s13);\n\t v.add( s14);\n\t v.add( s15);\n\t v.add( s16);\n\t }\n\t }\n }\n }\n \n public static void main( String[] args)\n {\n Cracker cr = new Cracker();\n cr.loadLetters();\n cr.loadPairs();\n cr.loadTriples();\n System.out.println(" far "+cr.getVictor().size()+" elements loaded");\n }\n}\n \n',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]
# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]
label, text_1, and text_2| label | text_1 | text_2 | |
|---|---|---|---|
| type | int | string | string |
| details |
|
|
|
| label | text_1 | text_2 |
|---|---|---|
0 |
|
|
0 |
|
|
0 |
|
|
SoftmaxLossper_device_train_batch_size: 32num_train_epochs: 2overwrite_output_dir: Falsedo_predict: Falseeval_strategy: noprediction_loss_only: Trueper_device_train_batch_size: 32per_device_eval_batch_size: 8per_gpu_train_batch_size: Noneper_gpu_eval_batch_size: Nonegradient_accumulation_steps: 1eval_accumulation_steps: Nonetorch_empty_cache_steps: Nonelearning_rate: 5e-05weight_decay: 0.0adam_beta1: 0.9adam_beta2: 0.999adam_epsilon: 1e-08max_grad_norm: 1.0num_train_epochs: 2max_steps: -1lr_scheduler_type: linearlr_scheduler_kwargs: {}warmup_ratio: 0.0warmup_steps: 0log_level: passivelog_level_replica: warninglog_on_each_node: Truelogging_nan_inf_filter: Truesave_safetensors: Truesave_on_each_node: Falsesave_only_model: Falserestore_callback_states_from_checkpoint: Falseno_cuda: Falseuse_cpu: Falseuse_mps_device: Falseseed: 42data_seed: Nonejit_mode_eval: Falseuse_ipex: Falsebf16: Falsefp16: Falsefp16_opt_level: O1half_precision_backend: autobf16_full_eval: Falsefp16_full_eval: Falsetf32: Nonelocal_rank: 0ddp_backend: Nonetpu_num_cores: Nonetpu_metrics_debug: Falsedebug: []dataloader_drop_last: Falsedataloader_num_workers: 0dataloader_prefetch_factor: Nonepast_index: -1disable_tqdm: Falseremove_unused_columns: Truelabel_names: Noneload_best_model_at_end: Falseignore_data_skip: Falsefsdp: []fsdp_min_num_params: 0fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap: Noneaccelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}deepspeed: Nonelabel_smoothing_factor: 0.0optim: adamw_torchoptim_args: Noneadafactor: Falsegroup_by_length: Falselength_column_name: lengthddp_find_unused_parameters: Noneddp_bucket_cap_mb: Noneddp_broadcast_buffers: Falsedataloader_pin_memory: Truedataloader_persistent_workers: Falseskip_memory_metrics: Trueuse_legacy_prediction_loop: Falsepush_to_hub: Falseresume_from_checkpoint: Nonehub_model_id: Nonehub_strategy: every_savehub_private_repo: Nonehub_always_push: Falsegradient_checkpointing: Falsegradient_checkpointing_kwargs: Noneinclude_inputs_for_metrics: Falseinclude_for_metrics: []eval_do_concat_batches: Truefp16_backend: autopush_to_hub_model_id: Nonepush_to_hub_organization: Nonemp_parameters: auto_find_batch_size: Falsefull_determinism: Falsetorchdynamo: Noneray_scope: lastddp_timeout: 1800torch_compile: Falsetorch_compile_backend: Nonetorch_compile_mode: Noneinclude_tokens_per_second: Falseinclude_num_input_tokens_seen: Falseneftune_noise_alpha: Noneoptim_target_modules: Nonebatch_eval_metrics: Falseeval_on_start: Falseuse_liger_kernel: Falseeval_use_gather_object: Falseaverage_tokens_across_devices: Falseprompts: Nonebatch_sampler: batch_samplermulti_dataset_batch_sampler: proportional| Epoch | Step | Training Loss |
|---|---|---|
| 0.4785 | 500 | 0.0175 |
| 0.9569 | 1000 | 0.012 |
| 1.4354 | 1500 | 0.0098 |
| 1.9139 | 2000 | 0.0037 |
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
Base model
microsoft/unixcoder-base-unimodal