diff --git a/.gitattributes b/.gitattributes index e7f3cfc563b34ab08e34e00ecc20cdb1a0823126..a6bc448a1adf509c985ffb2799fa1df3a0668c34 100644 --- a/.gitattributes +++ b/.gitattributes @@ -40,3 +40,6 @@ tokenizer.json filter=lfs diff=lfs merge=lfs -text checkpoint-39/tokenizer.json filter=lfs diff=lfs merge=lfs -text checkpoint-52/tokenizer.json filter=lfs diff=lfs merge=lfs -text checkpoint-65/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-14/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-28/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-35/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 12ca5d1576265d73b753859c31f9da8b52cd49f5..2bf489d86084ed7a91f4a5f33a78523a4353e811 100644 --- a/README.md +++ b/README.md @@ -8,657 +8,758 @@ tags: - feature-extraction - dense - generated_from_trainer -- dataset_size:391 +- dataset_size:82 - loss:MatryoshkaLoss - loss:MultipleNegativesRankingLoss base_model: intfloat/multilingual-e5-large widget: -- source_sentence: What does 'personal data breach' entail? +- source_sentence: When did the victims give away credentials? sentences: - - '1.Processing of personal data revealing racial or ethnic origin, political opinions, - religious or philosophical beliefs, or trade union membership, and the processing - of genetic data, biometric data for the purpose of uniquely identifying a natural - person, data concerning health or data concerning a natural person''s sex life - or sexual orientation shall be prohibited. - - 2.Paragraph 1 shall not apply if one of the following applies: (a) the data subject - has given explicit consent to the processing of those personal data for one or - more specified purposes, except where Union or Member State law provide that the - prohibition referred to in paragraph 1 may not be lifted by the data subject; - (b) processing is necessary for the purposes of carrying out the obligations - and exercising specific rights of the controller or of the data subject in the - field of employment and social security and social protection law in so far as - it is authorised by Union or Member State law or a collective agreement pursuant - to Member State law providing for appropriate safeguards for the fundamental rights - and the interests of the data subject; (c) processing is necessary to protect - the vital interests of the data subject or of another natural person where the - data subject is physically or legally incapable of giving consent; (d) processing - is carried out in the course of its legitimate activities with appropriate safeguards - by a foundation, association or any other not-for-profit body with a political, - philosophical, religious or trade union aim and on condition that the processing - relates solely to the members or to former members of the body or to persons who - have regular contact with it in connection with its purposes and that the personal - data are not disclosed outside that body without the consent of the data subjects; - (e) processing relates to personal data which are manifestly made public by the - data subject; (f) processing is necessary for the establishment, exercise or - defence of legal claims or whenever courts are acting in their judicial capacity; - (g) processing is necessary for reasons of substantial public interest, on the - basis of Union or Member State law which shall be proportionate to the aim pursued, - respect the essence of the right to data protection and provide for suitable and - specific measures to safeguard the fundamental rights and the interests of the - data subject; (h) processing is necessary for the purposes of preventive or occupational - medicine, for the assessment of the working capacity of the employee, medical - diagnosis, the provision of health or social care or treatment or the management - of health or social care systems and services on the basis of Union or Member - State law or pursuant to contract with a health professional and subject to the - conditions and safeguards referred to in paragraph 3; (i) processing is necessary - for reasons of public interest in the area of public health, such as protecting - against serious cross-border threats to health or ensuring high standards of quality - and safety of health care and of medicinal products or medical devices, on the - basis of Union or Member State law which provides for suitable and specific measures - to safeguard the rights and freedoms of the data subject, in particular professional - secrecy; 4.5.2016 L 119/38 (j) processing is necessary for archiving purposes - in the public interest, scientific or historical research purposes or statistical - purposes in accordance with Article 89(1) based on Union or Member State law which - shall be proportionate to the aim pursued, respect the essence of the right to - data protection and provide for suitable and specific measures to safeguard the - fundamental rights and the interests of the data subject. - - 3.Personal data referred to in paragraph 1 may be processed for the purposes referred - to in point (h) of paragraph 2 when those data are processed by or under the responsibility - of a professional subject to the obligation of professional secrecy under Union - or Member State law or rules established by national competent bodies or by another - person also subject to an obligation of secrecy under Union or Member State law - or rules established by national competent bodies. - - 4.Member States may maintain or introduce further conditions, including limitations, - with regard to the processing of genetic data, biometric data or data concerning - health.' - - '1) ''personal data'' means any information relating to an identified or identifiable - natural person (''data subject''); an identifiable natural person is one who can - be identified, directly or indirectly, in particular by reference to an identifier - such as a name, an identification number, location data, an online identifier - or to one or more factors specific to the physical, physiological, genetic, mental, - economic, cultural or social identity of that natural person; - - (2) ‘processing’ means any operation or set of operations which is performed on - personal data or on sets of personal data, whether or not by automated means, - such as collection, recording, organisation, structuring, storage, adaptation - or alteration, retrieval, consultation, use, disclosure by transmission, dissemination - or otherwise making available, alignment or combination, restriction, erasure - or destruction; - - (3) ‘restriction of processing’ means the marking of stored personal data with - the aim of limiting their processing in the future; - - (4) ‘profiling’ means any form of automated processing of personal data consisting - of the use of personal data to evaluate certain personal aspects relating to a - natural person, in particular to analyse or predict aspects concerning that natural - person''s performance at work, economic situation, health, personal preferences, - interests, reliability, behaviour, location or movements; - - (5) ‘pseudonymisation’ means the processing of personal data in such a manner - that the personal data can no longer be attributed to a specific data subject - without the use of additional information, provided that such additional information - is kept separately and is subject to technical and organisational measures to - ensure that the personal data are not attributed to an identified or identifiable - natural person; - - (6) ‘filing system’ means any structured set of personal data which are accessible - according to specific criteria, whether centralised, decentralised or dispersed - on a functional or geographical basis; - - (7) ‘controller’ means the natural or legal person, public authority, agency or - other body which, alone or jointly with others, determines the purposes and means - of the processing of personal data; where the purposes and means of such processing - are determined by Union or Member State law, the controller or the specific criteria - for its nomination may be provided for by Union or Member State law; - - (8) ‘processor’ means a natural or legal person, public authority, agency or other - body which processes personal data on behalf of the controller; - - (9) ‘recipient’ means a natural or legal person, public authority, agency or another - body, to which the personal data are disclosed, whether a third party or not. - However, public authorities which may receive personal data in the framework of - a particular inquiry in accordance with Union or Member State law shall not be - regarded as recipients; the processing of those data by those public authorities - shall be in compliance with the applicable data protection rules according to - the purposes of the processing; - - (10) ‘third party’ means a natural or legal person, public authority, agency or - body other than the data subject, controller, processor and persons who, under - the direct authority of the controller or processor, are authorised to process - personal data; - - (11) ‘consent’ of the data subject means any freely given, specific, informed - and unambiguous indication of the data subject''s wishes by which he or she, by - a statement or by a clear affirmative action, signifies agreement to the processing - of personal data relating to him or her; - - (12) ‘personal data breach’ means a breach of security leading to the accidental - or unlawful destruction, loss, alteration, unauthorised disclosure of, or access - to, personal data transmitted, stored or otherwise processed; - - (13) ‘genetic data’ means personal data relating to the inherited or acquired - genetic characteristics of a natural person which give unique information about - the physiology or the health of that natural person and which result, in particular, - from an analysis of a biological sample from the natural person in question; - - (14) ‘biometric data’ means personal data resulting from specific technical processing - relating to the physical, physiological or behavioural characteristics of a natural - person, which allow or confirm the unique identification of that natural person, - such as facial images or dactyloscopic data; - - (15) ‘data concerning health’ means personal data related to the physical or mental - health of a natural person, including the provision of health care services, which - reveal information about his or her health status; - - (16) ‘main establishment’ means: (a) as regards a controller with establishments - in more than one Member State, the place of its central administration in the - Union, unless the decisions on the purposes and means of the processing of personal - data are taken in another establishment of the controller in the Union and the - latter establishment has the power to have such decisions implemented, in which - case the establishment having taken such decisions is to be considered to be the - main establishment; (b) as regards a processor with establishments in more than - one Member State, the place of its central administration in the Union, or, if - the processor has no central administration in the Union, the establishment of - the processor in the Union where the main processing activities in the context - of the activities of an establishment of the processor take place to the extent - that the processor is subject to specific obligations under this Regulation; - - (17) ‘representative’ means a natural or legal person established in the Union - who, designated by the controller or processor in writing pursuant to Article - 27, represents the controller or processor with regard to their respective obligations - under this Regulation; - - (18) ‘enterprise’ means a natural or legal person engaged in an economic activity, - irrespective of its legal form, including partnerships or associations regularly - engaged in an economic activity; - - (19) ‘group of undertakings’ means a controlling undertaking and its controlled - undertakings; - - (20) ‘binding corporate rules’ means personal data protection policies which are - adhered to by a controller or processor established on the territory of a Member - State for transfers or a set of transfers of personal data to a controller or - processor in one or more third countries within a group of undertakings, or group - of enterprises engaged in a joint economic activity; - - (21) ‘supervisory authority’ means an independent public authority which is established - by a Member State pursuant to Article 51; - - (22) ‘supervisory authority concerned’ means a supervisory authority which is - concerned by the processing of personal data because: (a) the controller or processor - is established on the territory of the Member State of that supervisory authority; - (b) data subjects residing in the Member State of that supervisory authority are - substantially affected or likely to be substantially affected by the processing; - or (c) a complaint has been lodged with that supervisory authority; - - (23) ‘cross-border processing’ means either: (a) processing of personal data which - takes place in the context of the activities of establishments in more than one - Member State of a controller or processor in the Union where the controller or - processor is established in more than one Member State; or (b) processing of personal - data which takes place in the context of the activities of a single establishment - of a controller or processor in the Union but which substantially affects or is - likely to substantially affect data subjects in more than one Member State. - - (24) ‘relevant and reasoned objection’ means an objection to a draft decision - as to whether there is an infringement of this Regulation, or whether envisaged - action in relation to the controller or processor complies with this Regulation, - which clearly demonstrates the significance of the risks posed by the draft decision - as regards the fundamental rights and freedoms of data subjects and, where applicable, - the free flow of personal data within the Union; - - (25) ‘information society service’ means a service as defined in point (b) of - Article 1(1) of Directive (EU) 2015/1535 of the European Parliament and of the - Council (1); - - (26) ‘international organisation’ means an organisation and its subordinate bodies - governed by public international law, or any other body which is set up by, or - on the basis of, an agreement between two or more countries.' - - Any processing of personal data should be lawful and fair. It should be transparent - to natural persons that personal data concerning them are collected, used, consulted - or otherwise processed and to what extent the personal data are or will be processed. - The principle of transparency requires that any information and communication - relating to the processing of those personal data be easily accessible and easy - to understand, and that clear and plain language be used. That principle concerns, - in particular, information to the data subjects on the identity of the controller - and the purposes of the processing and further information to ensure fair and - transparent processing in respect of the natural persons concerned and their right - to obtain confirmation and communication of personal data concerning them which - are being processed. Natural persons should be made aware of risks, rules, safeguards - and rights in relation to the processing of personal data and how to exercise - their rights in relation to such processing. In particular, the specific purposes - for which personal data are processed should be explicit and legitimate and determined - at the time of the collection of the personal data. The personal data should be - adequate, relevant and limited to what is necessary for the purposes for which - they are processed. This requires, in particular, ensuring that the period for - which the personal data are stored is limited to a strict minimum. Personal data - should be processed only if the purpose of the processing could not reasonably - be fulfilled by other means. In order to ensure that the personal data are not - kept longer than necessary, time limits should be established by the controller - for erasure or for a periodic review. Every reasonable step should be taken to - ensure that personal data which are inaccurate are rectified or deleted. Personal - data should be processed in a manner that ensures appropriate security and confidentiality - of the personal data, including for preventing unauthorised access to or use of - personal data and the equipment used for the processing. -- source_sentence: In what situations could providing information to the data subject - be considered impossible or involve a disproportionate effort? + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without it being necessary that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence that is detrimental + to themselves or another; and + + + c) Damage to another person’s property, as defined under civil law, which must + be causally linked to the deceptive acts or omissions of the perpetrator. It is + not required that the person deceived and the person who suffered the damage be + the same individual. + + + The term “facts”, within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts referring to the present or the past, in such a manner as to create + the impression of future fulfillment based on a false present situation fabricated + by the perpetrator, who has already formed the decision not to fulfill their obligation, + the crime of fraud is established. + + + The term “property” refers to the totality of a person’s economic assets that + possess monetary value, while damage to property means its reduction—specifically, + the difference between the monetary value the property had before the disposition + caused by the fraudulent conduct and the value remaining after it. Property damage + exists even if the victim possesses an active claim for restitution. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their fraudulent conduct, namely when they made the false + representations that deceived the victim or a third party. Any subsequent moment + at which the victim’s damage actually occurred—thereby completing the fraud—or + the time when the victim carried out the harmful act or omission, is irrelevant.' + - 'Voice phishing involves manipulating victims over the phone. Attackers pose as + bank officials or authorities and use intimidation to extract financial details. + + + Scenario: + + - Victims are coerced into giving away PINs, passwords, or other credentials under + false pretenses of legal or financial emergencies.' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without requiring that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and performs an act, omission, or acquiescence; and + + + c) Damage to another’s property, according to civil law, which must be causally + connected to the perpetrator’s deceptive acts or omissions. It is not required + that the deceived person and the person who suffered the loss be the same. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts relating to the present or the past, in such a way as to create the + impression of future fulfillment, based on a false present situation fabricated + by the perpetrator—who has already made the decision not to fulfill their obligation—then + the crime of fraud is established. + + + The term “property” denotes the totality of a person’s economic assets possessing + monetary value, while damage to property refers to its reduction—specifically, + the difference between the property’s monetary value before the disposition caused + by the fraudulent conduct and its value afterward. Property damage exists even + if the victim has an active claim for its restitution. + + + The time of commission of fraud is considered to be the moment when the perpetrator + acted and completed the deceptive conduct, that is, when they made the false representations + which deceived the victim or a third party. Any later time at which the victim’s + financial loss occurred—thus completing the fraud—or the time when the harmful + act or omission of the deceived person took place, is irrelevant. + + + The reference to multiple modes of commission of fraud (i.e., both the misrepresentation + of false facts and the concealment of true ones) may create ambiguity and contradiction, + unless it is made clear from the overall findings that the offense was committed + in one particular manner, and that the reference to the other merely serves to + define the intent (mens rea) of the perpetrator—specifically, that the representations + were false. + + + Furthermore, a conviction must contain the specific and well-reasoned justification + required by Articles 93 paragraph 3 of the Constitution and 139 of the Code of + Criminal Procedure. The absence of such reasoning constitutes grounds for cassation + (appeal) under Article 510 paragraph 1(d) of the Code of Criminal Procedure, when + the judgment does not set out, with clarity, completeness, and consistency, the + factual circumstances established by the evidence, upon which the court based + its findings regarding the objective and subjective elements of the offense, the + evidence supporting those findings, and the legal reasoning through which those + facts were subsumed under the applicable substantive criminal provision. + + + For the existence of such reasoning, the explanatory and operative parts of the + decision may complement each other, as they form a single, unified whole. + + + The existence of intent (dolus) does not generally need to be specially justified, + since it is inherent in the will to bring about the factual circumstances constituting + the objective elements of the offense, and it is presumed from their realization + in each particular case—unless the law requires additional elements for criminal + liability, such as the act being committed with knowledge of a specific circumstance + (direct intent) or with the pursuit of a further purpose, i.e., the achievement + of an additional result (offenses requiring a special subjective element). + + + Furthermore, under Article 510 paragraph 1(e) of the Code of Criminal Procedure, + a misapplication of substantive criminal law also constitutes grounds for cassation. + Such misapplication occurs when the trial court incorrectly applies the law to + the facts it has found to be true, or when the violation occurs indirectly, namely + when the reasoning of the judgment—comprising the combination of its factual and + operative parts and relating to the elements and identity of the offense—contains + ambiguities, contradictions, or logical gaps, rendering it impossible to verify, + on appeal, whether the law was applied correctly. In such cases, the judgment + lacks a lawful basis.' +- source_sentence: What must be the outcome of the deception in relation to property + damage? sentences: - - '1.The controller shall consult the supervisory authority prior to processing - where a data protection impact assessment under Article 35 indicates that the - processing would result in a high risk in the absence of measures taken by the - controller to mitigate the risk. - - 2.Where the supervisory authority is of the opinion that the intended processing - referred to in paragraph 1 would infringe this Regulation, in particular where - the controller has insufficiently identified or mitigated the risk, the supervisory - authority shall, within period of up to eight weeks of receipt of the request - for consultation, provide written advice to the controller and, where applicable - to the processor, and may use any of its powers referred to in Article 58. That - period may be extended by six weeks, taking into account the complexity of the - intended processing. The supervisory authority shall inform the controller and, - where applicable, the processor, of any such extension within one month of receipt - of the request for consultation together with the reasons for the delay. Those - periods may be suspended until the supervisory authority has obtained information - it has requested for the purposes of the consultation. - - 3.When consulting the supervisory authority pursuant to paragraph 1, the controller - shall provide the supervisory authority with: (a) where applicable, the respective - responsibilities of the controller, joint controllers and processors involved - in the processing, in particular for processing within a group of undertakings; - (b) the purposes and means of the intended processing; (c) the measures and - safeguards provided to protect the rights and freedoms of data subjects pursuant - to this Regulation; (d) where applicable, the contact details of the data protection - officer; 4.5.2016 L 119/54 (e) the data protection impact assessment provided - for in Article 35; and (f) any other information requested by the supervisory - authority. - - 4.Member States shall consult the supervisory authority during the preparation - of a proposal for a legislative measure to be adopted by a national parliament, - or of a regulatory measure based on such a legislative measure, which relates - to processing. - - 5.Notwithstanding paragraph 1, Member State law may require controllers to consult - with, and obtain prior authorisation from, the supervisory authority in relation - to processing by a controller for the performance of a task carried out by the - controller in the public interest, including processing in relation to social - protection and public health' - - "1.The Member States, the supervisory authorities, the Board and the Commission\ - \ shall encourage, in particular at Union level, the establishment of data protection\ - \ certification mechanisms and of data protection seals and marks, for the purpose\ - \ of demonstrating compliance with this Regulation of processing operations by\ - \ controllers and processors. The specific needs of micro, small and medium-sized\ - \ enterprises shall be taken into account. 4.5.2016 L 119/58 \n2.In addition\ - \ to adherence by controllers or processors subject to this Regulation, data protection\ - \ certification mechanisms, seals or marks approved pursuant to paragraph 5 of\ - \ this Article may be established for the purpose of demonstrating the existence\ - \ of appropriate safeguards provided by controllers or processors that are not\ - \ subject to this Regulation pursuant to Article 3 within the framework of personal\ - \ data transfers to third countries or international organisations under the terms\ - \ referred to in point (f) of Article 46(2). Such controllers or processors shall\ - \ make binding and enforceable commitments, via contractual or other legally binding\ - \ instruments, to apply those appropriate safeguards, including with regard to\ - \ the rights of data subjects.\n3.The certification shall be voluntary and available\ - \ via a process that is transparent.\n4.A certification pursuant to this Article\ - \ does not reduce the responsibility of the controller or the processor for compliance\ - \ with this Regulation and is without prejudice to the tasks and powers of the\ - \ supervisory authorities which are competent pursuant to Article 55 or 56\n5.A\ - \ certification pursuant to this Article shall be issued by the certification\ - \ bodies referred to in Article 43 or by the competent supervisory authority,\ - \ on the basis of criteria approved by that competent supervisory authority pursuant\ - \ to Article 58(3) or by the Board pursuant to Article 63. Where the criteria\ - \ are approved by the Board, this may result in a common certification, the European\ - \ Data Protection Seal.\n6.The controller or processor which submits its processing\ - \ to the certification mechanism shall provide the certification body referred\ - \ to in Article 43, or where applicable, the competent supervisory authority,\ - \ with all information and access to its processing activities which are necessary\ - \ to conduct the certification procedure.\n7.Certification shall be issued to\ - \ a controller or processor for a maximum period of three years and may be renewed,\ - \ under the same conditions, provided that the relevant requirements continue\ - \ to be met. Certification shall be withdrawn, as applicable, by the certification\ - \ bodies referred to in Article 43 or by the competent supervisory authority where\ - \ the requirements for the certification are not or are no longer met.\n8.The\ - \ Board shall collate all certification mechanisms and data protection seals and\ - \ marks in a register and shall make them publicly available by any appropriate\ - \ means." - - However, it is not necessary to impose the obligation to provide information where - the data subject already possesses the information, where the recording or disclosure - of the personal data is expressly laid down by law or where the provision of information - to the data subject proves to be impossible or would involve a disproportionate - effort. The latter could in particular be the case where processing is carried - out for archiving purposes in the public interest, scientific or historical research - purposes or statistical purposes. In that regard, the number of data subjects, - the age of the data and any appropriate safeguards adopted should be taken into - consideration. -- source_sentence: What is the data subject provided with prior to further processing - of personal data? + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without requiring that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and performs an act, omission, or acquiescence; and + + + c) Damage to another’s property, according to civil law, which must be causally + connected to the perpetrator’s deceptive acts or omissions. It is not required + that the deceived person and the person who suffered the loss be the same. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts relating to the present or the past, in such a way as to create the + impression of future fulfillment, based on a false present situation fabricated + by the perpetrator—who has already made the decision not to fulfill their obligation—then + the crime of fraud is established. + + + The term “property” denotes the totality of a person’s economic assets possessing + monetary value, while damage to property refers to its reduction—specifically, + the difference between the property’s monetary value before the disposition caused + by the fraudulent conduct and its value afterward. Property damage exists even + if the victim has an active claim for its restitution. + + + The time of commission of fraud is considered to be the moment when the perpetrator + acted and completed the deceptive conduct, that is, when they made the false representations + which deceived the victim or a third party. Any later time at which the victim’s + financial loss occurred—thus completing the fraud—or the time when the harmful + act or omission of the deceived person took place, is irrelevant. + + + The reference to multiple modes of commission of fraud (i.e., both the misrepresentation + of false facts and the concealment of true ones) may create ambiguity and contradiction, + unless it is made clear from the overall findings that the offense was committed + in one particular manner, and that the reference to the other merely serves to + define the intent (mens rea) of the perpetrator—specifically, that the representations + were false. + + + Furthermore, a conviction must contain the specific and well-reasoned justification + required by Articles 93 paragraph 3 of the Constitution and 139 of the Code of + Criminal Procedure. The absence of such reasoning constitutes grounds for cassation + (appeal) under Article 510 paragraph 1(d) of the Code of Criminal Procedure, when + the judgment does not set out, with clarity, completeness, and consistency, the + factual circumstances established by the evidence, upon which the court based + its findings regarding the objective and subjective elements of the offense, the + evidence supporting those findings, and the legal reasoning through which those + facts were subsumed under the applicable substantive criminal provision. + + + For the existence of such reasoning, the explanatory and operative parts of the + decision may complement each other, as they form a single, unified whole. + + + The existence of intent (dolus) does not generally need to be specially justified, + since it is inherent in the will to bring about the factual circumstances constituting + the objective elements of the offense, and it is presumed from their realization + in each particular case—unless the law requires additional elements for criminal + liability, such as the act being committed with knowledge of a specific circumstance + (direct intent) or with the pursuit of a further purpose, i.e., the achievement + of an additional result (offenses requiring a special subjective element). + + + Furthermore, under Article 510 paragraph 1(e) of the Code of Criminal Procedure, + a misapplication of substantive criminal law also constitutes grounds for cassation. + Such misapplication occurs when the trial court incorrectly applies the law to + the facts it has found to be true, or when the violation occurs indirectly, namely + when the reasoning of the judgment—comprising the combination of its factual and + operative parts and relating to the elements and identity of the offense—contains + ambiguities, contradictions, or logical gaps, rendering it impossible to verify, + on appeal, whether the law was applied correctly. In such cases, the judgment + lacks a lawful basis.' + - 'According to Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From these provisions, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence detrimental to themselves + or another; and + + + c) Damage to another’s property, as defined under civil law, which must be causally + connected to the perpetrator’s deceptive acts. + + + From the above provisions, it is deduced that the crime of fraud is established + both objectively and subjectively through the knowing misrepresentation of false + facts as true, or the unlawful concealment or suppression of true ones, by which + another person is deceived and, as a result, performs an act, omission, or acquiescence + involving a disposition of property that directly and necessarily causes financial + damage to the deceived person or another, with the intent that the perpetrator + or another gain an unlawful benefit. It is irrelevant whether this intended benefit + was ultimately achieved. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those expected to occur in the future, + such as mere promises or contractual obligations. The false fact must have existed + in the past or must be a present circumstance at the time it is asserted, and + cannot relate to the future. + + + However, when future circumstances—that is, promises or contractual obligations—are + accompanied by false assurances and representations of other false facts referring + to the present or past, in such a way as to create the impression of future fulfillment, + based on a false present situation or supposed ability of the perpetrator, who + had already made the decision not to fulfill their obligation, then the crime + of fraud is established.' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another person’s property by persuading someone to act, + omit, or tolerate something through the knowing misrepresentation of false facts + as true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision, it follows that for the crime of fraud to be established, + the following elements are required: + + + a) Intent of the perpetrator to obtain for themselves or another an unlawful pecuniary + benefit, regardless of whether this benefit was actually realized; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which, as a causal factor, someone + is deceived and acts in a way that is detrimental to themselves or another (by + an act, omission, or acquiescence); and + + + c) Damage to another’s property, in the sense recognized by civil law, which must + be causally linked to the fraudulent conduct (the deceptive act or omission of + the perpetrator) and to the resulting deception of the person who made the property + disposition. It is not required that the person deceived be the same person who + suffered the damage. + + + Property damage exists when there is a reduction or deterioration in the victim’s + assets, even if the victim has an active claim to restitution. However, as an + element of the objective aspect of the crime of fraud, the damage must be the + direct, necessary, and exclusive result of the property disposition—namely, the + act, omission, or acquiescence performed by the person deceived by the perpetrator’s + fraudulent conduct. + + + There must therefore be a causal connection between the perpetrator’s deceptive + behavior and the deception it caused, as well as between this deception and the + resulting property damage, which must be the direct, necessary, and exclusive + outcome of the deception and of the act, omission, or acquiescence of the deceived + person. + + + The term “facts” refers to real circumstances relating to the past or present, + and not to those expected to occur in the future, such as mere promises or contractual + obligations. However, when such promises or obligations are accompanied by false + assurances and representations of other false facts relating to the present or + the past, in such a way as to create the impression of future fulfillment, based + on the false present situation presented by a perpetrator who has already made + the decision not to fulfill their obligation, then the crime of fraud is established. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their deceptive conduct—that is, when they made the false + representations that deceived the victim or a third party. Any later time at which + the victim’s financial loss actually occurred—thus completing the fraud—or the + time when the deceived person performed the harmful act or omission, is irrelevant.' +- source_sentence: How are victims tricked in email phishing scams? sentences: - - '1.Where personal data relating to a data subject are collected from the data - subject, the controller shall, at the time when personal data are obtained, provide - the data subject with all of the following information: (a) the identity and - the contact details of the controller and, where applicable, of the controller''s - representative; (b) the contact details of the data protection officer, where - applicable; (c) the purposes of the processing for which the personal data are - intended as well as the legal basis for the processing; 4.5.2016 L 119/40 (d) where - the processing is based on point (f) of Article 6(1), the legitimate interests - pursued by the controller or by a third party; (e) the recipients or categories - of recipients of the personal data, if any; (f) where applicable, the fact that - the controller intends to transfer personal data to a third country or international - organisation and the existence or absence of an adequacy decision by the Commission, - or in the case of transfers referred to in Article 46 or 47, or the second subparagraph - of Article 49(1), reference to the appropriate or suitable safeguards and the - means by which to obtain a copy of them or where they have been made available. - - 2.In addition to the information referred to in paragraph 1, the controller shall, - at the time when personal data are obtained, provide the data subject with the - following further information necessary to ensure fair and transparent processing: - (a) the period for which the personal data will be stored, or if that is not - possible, the criteria used to determine that period; (b) the existence of the - right to request from the controller access to and rectification or erasure of - personal data or restriction of processing concerning the data subject or to object - to processing as well as the right to data portability; (c) where the processing - is based on point (a) of Article 6(1) or point (a) of Article 9(2), the existence - of the right to withdraw consent at any time, without affecting the lawfulness - of processing based on consent before its withdrawal; (d) the right to lodge - a complaint with a supervisory authority; (e) whether the provision of personal - data is a statutory or contractual requirement, or a requirement necessary to - enter into a contract, as well as whether the data subject is obliged to provide - the personal data and of the possible consequences of failure to provide such - data; (f) the existence of automated decision-making, including profiling, referred - to in Article 22(1) and (4) and, at least in those cases, meaningful information - about the logic involved, as well as the significance and the envisaged consequences - of such processing for the data subject. - - 3.Where the controller intends to further process the personal data for a purpose - other than that for which the personal data were collected, the controller shall - provide the data subject prior to that further processing with information on - that other purpose and with any relevant further information as referred to in - paragraph 2 - - 4.Paragraphs 1, 2 and 3 shall not apply where and insofar as the data subject - already has the information.' - - This Regulation respects and does not prejudice the status under existing constitutional - law of churches and religious associations or communities in the Member States, - as recognised in Article 17 TFEU. - - '1) ''personal data'' means any information relating to an identified or identifiable - natural person (''data subject''); an identifiable natural person is one who can - be identified, directly or indirectly, in particular by reference to an identifier - such as a name, an identification number, location data, an online identifier - or to one or more factors specific to the physical, physiological, genetic, mental, - economic, cultural or social identity of that natural person; - - (2) ‘processing’ means any operation or set of operations which is performed on - personal data or on sets of personal data, whether or not by automated means, - such as collection, recording, organisation, structuring, storage, adaptation - or alteration, retrieval, consultation, use, disclosure by transmission, dissemination - or otherwise making available, alignment or combination, restriction, erasure - or destruction; - - (3) ‘restriction of processing’ means the marking of stored personal data with - the aim of limiting their processing in the future; - - (4) ‘profiling’ means any form of automated processing of personal data consisting - of the use of personal data to evaluate certain personal aspects relating to a - natural person, in particular to analyse or predict aspects concerning that natural - person''s performance at work, economic situation, health, personal preferences, - interests, reliability, behaviour, location or movements; - - (5) ‘pseudonymisation’ means the processing of personal data in such a manner - that the personal data can no longer be attributed to a specific data subject - without the use of additional information, provided that such additional information - is kept separately and is subject to technical and organisational measures to - ensure that the personal data are not attributed to an identified or identifiable - natural person; - - (6) ‘filing system’ means any structured set of personal data which are accessible - according to specific criteria, whether centralised, decentralised or dispersed - on a functional or geographical basis; - - (7) ‘controller’ means the natural or legal person, public authority, agency or - other body which, alone or jointly with others, determines the purposes and means - of the processing of personal data; where the purposes and means of such processing - are determined by Union or Member State law, the controller or the specific criteria - for its nomination may be provided for by Union or Member State law; - - (8) ‘processor’ means a natural or legal person, public authority, agency or other - body which processes personal data on behalf of the controller; - - (9) ‘recipient’ means a natural or legal person, public authority, agency or another - body, to which the personal data are disclosed, whether a third party or not. - However, public authorities which may receive personal data in the framework of - a particular inquiry in accordance with Union or Member State law shall not be - regarded as recipients; the processing of those data by those public authorities - shall be in compliance with the applicable data protection rules according to - the purposes of the processing; - - (10) ‘third party’ means a natural or legal person, public authority, agency or - body other than the data subject, controller, processor and persons who, under - the direct authority of the controller or processor, are authorised to process - personal data; - - (11) ‘consent’ of the data subject means any freely given, specific, informed - and unambiguous indication of the data subject''s wishes by which he or she, by - a statement or by a clear affirmative action, signifies agreement to the processing - of personal data relating to him or her; - - (12) ‘personal data breach’ means a breach of security leading to the accidental - or unlawful destruction, loss, alteration, unauthorised disclosure of, or access - to, personal data transmitted, stored or otherwise processed; - - (13) ‘genetic data’ means personal data relating to the inherited or acquired - genetic characteristics of a natural person which give unique information about - the physiology or the health of that natural person and which result, in particular, - from an analysis of a biological sample from the natural person in question; - - (14) ‘biometric data’ means personal data resulting from specific technical processing - relating to the physical, physiological or behavioural characteristics of a natural - person, which allow or confirm the unique identification of that natural person, - such as facial images or dactyloscopic data; - - (15) ‘data concerning health’ means personal data related to the physical or mental - health of a natural person, including the provision of health care services, which - reveal information about his or her health status; - - (16) ‘main establishment’ means: (a) as regards a controller with establishments - in more than one Member State, the place of its central administration in the - Union, unless the decisions on the purposes and means of the processing of personal - data are taken in another establishment of the controller in the Union and the - latter establishment has the power to have such decisions implemented, in which - case the establishment having taken such decisions is to be considered to be the - main establishment; (b) as regards a processor with establishments in more than - one Member State, the place of its central administration in the Union, or, if - the processor has no central administration in the Union, the establishment of - the processor in the Union where the main processing activities in the context - of the activities of an establishment of the processor take place to the extent - that the processor is subject to specific obligations under this Regulation; - - (17) ‘representative’ means a natural or legal person established in the Union - who, designated by the controller or processor in writing pursuant to Article - 27, represents the controller or processor with regard to their respective obligations - under this Regulation; - - (18) ‘enterprise’ means a natural or legal person engaged in an economic activity, - irrespective of its legal form, including partnerships or associations regularly - engaged in an economic activity; - - (19) ‘group of undertakings’ means a controlling undertaking and its controlled - undertakings; - - (20) ‘binding corporate rules’ means personal data protection policies which are - adhered to by a controller or processor established on the territory of a Member - State for transfers or a set of transfers of personal data to a controller or - processor in one or more third countries within a group of undertakings, or group - of enterprises engaged in a joint economic activity; - - (21) ‘supervisory authority’ means an independent public authority which is established - by a Member State pursuant to Article 51; - - (22) ‘supervisory authority concerned’ means a supervisory authority which is - concerned by the processing of personal data because: (a) the controller or processor - is established on the territory of the Member State of that supervisory authority; - (b) data subjects residing in the Member State of that supervisory authority are - substantially affected or likely to be substantially affected by the processing; - or (c) a complaint has been lodged with that supervisory authority; - - (23) ‘cross-border processing’ means either: (a) processing of personal data which - takes place in the context of the activities of establishments in more than one - Member State of a controller or processor in the Union where the controller or - processor is established in more than one Member State; or (b) processing of personal - data which takes place in the context of the activities of a single establishment - of a controller or processor in the Union but which substantially affects or is - likely to substantially affect data subjects in more than one Member State. - - (24) ‘relevant and reasoned objection’ means an objection to a draft decision - as to whether there is an infringement of this Regulation, or whether envisaged - action in relation to the controller or processor complies with this Regulation, - which clearly demonstrates the significance of the risks posed by the draft decision - as regards the fundamental rights and freedoms of data subjects and, where applicable, - the free flow of personal data within the Union; - - (25) ‘information society service’ means a service as defined in point (b) of - Article 1(1) of Directive (EU) 2015/1535 of the European Parliament and of the - Council (1); - - (26) ‘international organisation’ means an organisation and its subordinate bodies - governed by public international law, or any other body which is set up by, or - on the basis of, an agreement between two or more countries.' -- source_sentence: What type of data may be processed for purposes related to point - (h) of paragraph 2? + - 'According to Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From these provisions, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence detrimental to themselves + or another; and + + + c) Damage to another’s property, as defined under civil law, which must be causally + connected to the perpetrator’s deceptive acts. + + + From the above provisions, it is deduced that the crime of fraud is established + both objectively and subjectively through the knowing misrepresentation of false + facts as true, or the unlawful concealment or suppression of true ones, by which + another person is deceived and, as a result, performs an act, omission, or acquiescence + involving a disposition of property that directly and necessarily causes financial + damage to the deceived person or another, with the intent that the perpetrator + or another gain an unlawful benefit. It is irrelevant whether this intended benefit + was ultimately achieved. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those expected to occur in the future, + such as mere promises or contractual obligations. The false fact must have existed + in the past or must be a present circumstance at the time it is asserted, and + cannot relate to the future. + + + However, when future circumstances—that is, promises or contractual obligations—are + accompanied by false assurances and representations of other false facts referring + to the present or past, in such a way as to create the impression of future fulfillment, + based on a false present situation or supposed ability of the perpetrator, who + had already made the decision not to fulfill their obligation, then the crime + of fraud is established.' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without it being necessary that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence that is detrimental + to themselves or another; and + + + c) Damage to another person’s property, as defined under civil law, which must + be causally linked to the deceptive acts or omissions of the perpetrator. It is + not required that the person deceived and the person who suffered the damage be + the same individual. + + + The term “facts”, within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts referring to the present or the past, in such a manner as to create + the impression of future fulfillment based on a false present situation fabricated + by the perpetrator, who has already formed the decision not to fulfill their obligation, + the crime of fraud is established. + + + The term “property” refers to the totality of a person’s economic assets that + possess monetary value, while damage to property means its reduction—specifically, + the difference between the monetary value the property had before the disposition + caused by the fraudulent conduct and the value remaining after it. Property damage + exists even if the victim possesses an active claim for restitution. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their fraudulent conduct, namely when they made the false + representations that deceived the victim or a third party. Any subsequent moment + at which the victim’s damage actually occurred—thereby completing the fraud—or + the time when the victim carried out the harmful act or omission, is irrelevant.' + - 'Email phishing is a type of identity theft scam conducted via email or SMS. The + attacker uses social engineering tactics such as impersonating trusted entities + and inducing urgency. Victims are tricked into disclosing personal information + or downloading malware. + + + Scenarios: + + - Scenario 1: Emails impersonating high-ranking executives accuse victims of crimes + to coerce them into revealing information or opening malware-laden attachments. + + - Scenario 2: Emails/SMS from fake banks or authorities alert victims of data + breaches, directing them to spoofed websites to input credentials. + + - Scenario 3: SMS messages deliver disguised malware apps that harvest sensitive + data. + + - Scenario 4: SMS links lead to pharming sites that mimic trusted brands and steal + login data through fake pop-ups.' +- source_sentence: What circumstances do the term 'facts' refer to within the meaning + of the provision? sentences: - - '1.Processing of personal data revealing racial or ethnic origin, political opinions, - religious or philosophical beliefs, or trade union membership, and the processing - of genetic data, biometric data for the purpose of uniquely identifying a natural - person, data concerning health or data concerning a natural person''s sex life - or sexual orientation shall be prohibited. - - 2.Paragraph 1 shall not apply if one of the following applies: (a) the data subject - has given explicit consent to the processing of those personal data for one or - more specified purposes, except where Union or Member State law provide that the - prohibition referred to in paragraph 1 may not be lifted by the data subject; - (b) processing is necessary for the purposes of carrying out the obligations - and exercising specific rights of the controller or of the data subject in the - field of employment and social security and social protection law in so far as - it is authorised by Union or Member State law or a collective agreement pursuant - to Member State law providing for appropriate safeguards for the fundamental rights - and the interests of the data subject; (c) processing is necessary to protect - the vital interests of the data subject or of another natural person where the - data subject is physically or legally incapable of giving consent; (d) processing - is carried out in the course of its legitimate activities with appropriate safeguards - by a foundation, association or any other not-for-profit body with a political, - philosophical, religious or trade union aim and on condition that the processing - relates solely to the members or to former members of the body or to persons who - have regular contact with it in connection with its purposes and that the personal - data are not disclosed outside that body without the consent of the data subjects; - (e) processing relates to personal data which are manifestly made public by the - data subject; (f) processing is necessary for the establishment, exercise or - defence of legal claims or whenever courts are acting in their judicial capacity; - (g) processing is necessary for reasons of substantial public interest, on the - basis of Union or Member State law which shall be proportionate to the aim pursued, - respect the essence of the right to data protection and provide for suitable and - specific measures to safeguard the fundamental rights and the interests of the - data subject; (h) processing is necessary for the purposes of preventive or occupational - medicine, for the assessment of the working capacity of the employee, medical - diagnosis, the provision of health or social care or treatment or the management - of health or social care systems and services on the basis of Union or Member - State law or pursuant to contract with a health professional and subject to the - conditions and safeguards referred to in paragraph 3; (i) processing is necessary - for reasons of public interest in the area of public health, such as protecting - against serious cross-border threats to health or ensuring high standards of quality - and safety of health care and of medicinal products or medical devices, on the - basis of Union or Member State law which provides for suitable and specific measures - to safeguard the rights and freedoms of the data subject, in particular professional - secrecy; 4.5.2016 L 119/38 (j) processing is necessary for archiving purposes - in the public interest, scientific or historical research purposes or statistical - purposes in accordance with Article 89(1) based on Union or Member State law which - shall be proportionate to the aim pursued, respect the essence of the right to - data protection and provide for suitable and specific measures to safeguard the - fundamental rights and the interests of the data subject. - - 3.Personal data referred to in paragraph 1 may be processed for the purposes referred - to in point (h) of paragraph 2 when those data are processed by or under the responsibility - of a professional subject to the obligation of professional secrecy under Union - or Member State law or rules established by national competent bodies or by another - person also subject to an obligation of secrecy under Union or Member State law - or rules established by national competent bodies. - - 4.Member States may maintain or introduce further conditions, including limitations, - with regard to the processing of genetic data, biometric data or data concerning - health.' - - '1.The data protection officer shall have at least the following tasks: (a) to - inform and advise the controller or the processor and the employees who carry - out processing of their obligations pursuant to this Regulation and to other Union - or Member State data protection provisions; (b) to monitor compliance with this - Regulation, with other Union or Member State data protection provisions and with - the policies of the controller or processor in relation to the protection of personal - data, including the assignment of responsibilities, awareness-raising and training - of staff involved in processing operations, and the related audits; (c) to provide - advice where requested as regards the data protection impact assessment and monitor - its performance pursuant to Article 35; (d) to cooperate with the supervisory - authority; (e) to act as the contact point for the supervisory authority on issues - relating to processing, including the prior consultation referred to in Article - 36, and to consult, where appropriate, with regard to any other matter. - - 2.The data protection officer shall in the performance of his or her tasks have - due regard to the risk associated with processing operations, taking into account - the nature, scope, context and purposes of processing. Section 5 Codes of conduct - and certification' - - Processing should be lawful where it is necessary in the context of a contract - or the intention to enter into a contract. -- source_sentence: What may impede authorities in the discharge of their responsibilities - under Union law? + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another person’s property by persuading someone to act, + omit, or tolerate something through the knowing misrepresentation of false facts + as true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision, it follows that for the crime of fraud to be established, + the following elements are required: + + + a) Intent of the perpetrator to obtain for themselves or another an unlawful pecuniary + benefit, regardless of whether this benefit was actually realized; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which, as a causal factor, someone + is deceived and acts in a way that is detrimental to themselves or another (by + an act, omission, or acquiescence); and + + + c) Damage to another’s property, in the sense recognized by civil law, which must + be causally linked to the fraudulent conduct (the deceptive act or omission of + the perpetrator) and to the resulting deception of the person who made the property + disposition. It is not required that the person deceived be the same person who + suffered the damage. + + + Property damage exists when there is a reduction or deterioration in the victim’s + assets, even if the victim has an active claim to restitution. However, as an + element of the objective aspect of the crime of fraud, the damage must be the + direct, necessary, and exclusive result of the property disposition—namely, the + act, omission, or acquiescence performed by the person deceived by the perpetrator’s + fraudulent conduct. + + + There must therefore be a causal connection between the perpetrator’s deceptive + behavior and the deception it caused, as well as between this deception and the + resulting property damage, which must be the direct, necessary, and exclusive + outcome of the deception and of the act, omission, or acquiescence of the deceived + person. + + + The term “facts” refers to real circumstances relating to the past or present, + and not to those expected to occur in the future, such as mere promises or contractual + obligations. However, when such promises or obligations are accompanied by false + assurances and representations of other false facts relating to the present or + the past, in such a way as to create the impression of future fulfillment, based + on the false present situation presented by a perpetrator who has already made + the decision not to fulfill their obligation, then the crime of fraud is established. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their deceptive conduct—that is, when they made the false + representations that deceived the victim or a third party. Any later time at which + the victim’s financial loss actually occurred—thus completing the fraud—or the + time when the deceived person performed the harmful act or omission, is irrelevant.' + - '1. Anyone who, by knowingly presenting false facts as true or by unlawfully concealing + or withholding true facts, damages another person''s property by persuading someone + to act, omission, or tolerance with the aim of obtaining, for themselves or another, + an unlawful financial gain from the damage to that property shall be punished + with imprisonment, "and if the damage caused is particularly great, with imprisonment + of at least three (3) months and a fine." . + + If the damage caused exceeds a total of one hundred and twenty thousand (120,000) + euros, imprisonment of up to ten (10) years and a fine shall be imposed. + + 2. If the fraud is directed directly against the legal entity of the Greek State, + legal entities governed by public law, or local government organizations, and + the damage caused exceeds a total of one hundred and twenty thousand (120,000) + euros, a prison sentence of at least ten (10) years and a fine of up to one thousand + (1,000) daily units shall be imposed. This offense shall be time-barred after + twenty (20) years. + + ' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without it being necessary that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence that is detrimental + to themselves or another; and + + + c) Damage to another person’s property, as defined under civil law, which must + be causally linked to the deceptive acts or omissions of the perpetrator. It is + not required that the person deceived and the person who suffered the damage be + the same individual. + + + The term “facts”, within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts referring to the present or the past, in such a manner as to create + the impression of future fulfillment based on a false present situation fabricated + by the perpetrator, who has already formed the decision not to fulfill their obligation, + the crime of fraud is established. + + + The term “property” refers to the totality of a person’s economic assets that + possess monetary value, while damage to property means its reduction—specifically, + the difference between the monetary value the property had before the disposition + caused by the fraudulent conduct and the value remaining after it. Property damage + exists even if the victim possesses an active claim for restitution. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their fraudulent conduct, namely when they made the false + representations that deceived the victim or a third party. Any subsequent moment + at which the victim’s damage actually occurred—thereby completing the fraud—or + the time when the victim carried out the harmful act or omission, is irrelevant.' +- source_sentence: When is the time of commission of the fraud considered? sentences: - - '1.The controller and the processor shall designate a data protection officer - in any case where: (a) the processing is carried out by a public authority or - body, except for courts acting in their judicial capacity; (b) the core activities - of the controller or the processor consist of processing operations which, by - virtue of their nature, their scope and/or their purposes, require regular and - systematic monitoring of data subjects on a large scale; or (c) the core activities - of the controller or the processor consist of processing on a large scale of special - categories of data pursuant to Article 9 and personal data relating to criminal - convictions and offences referred to in Article 10 - - 2.A group of undertakings may appoint a single data protection officer provided - that a data protection officer is easily accessible from each establishment. - - 3.Where the controller or the processor is a public authority or body, a single - data protection officer may be designated for several such authorities or bodies, - taking account of their organisational structure and size. - - 4.In cases other than those referred to in paragraph 1, the controller or processor - or associations and other bodies representing categories of controllers or processors - may or, where required by Union or Member State law shall, designate a data protection - officer. The data protection officer may act for such associations and other bodies - representing controllers or processors. - - 5.The data protection officer shall be designated on the basis of professional - qualities and, in particular, expert knowledge of data protection law and practices - and the ability to fulfil the tasks referred to in Article 39 - - 6.The data protection officer may be a staff member of the controller or processor, - or fulfil the tasks on the basis of a service contract. - - 7.The controller or the processor shall publish the contact details of the data - protection officer and communicate them to the supervisory authority.' - - This Regulation is without prejudice to international agreements concluded between - the Union and third countries regulating the transfer of personal data including - appropriate safeguards for the data subjects. Member States may conclude international - agreements which involve the transfer of personal data to third countries or international - organisations, as far as such agreements do not affect this Regulation or any - other provisions of Union law and include an appropriate level of protection for - the fundamental rights of the data subjects. - - The objectives and principles of Directive 95/46/EC remain sound, but it has not - prevented fragmentation in the implementation of data protection across the Union, - legal uncertainty or a widespread public perception that there are significant - risks to the protection of natural persons, in particular with regard to online - activity. Differences in the level of protection of the rights and freedoms of - natural persons, in particular the right to the protection of personal data, with - regard to the processing of personal data in the Member States may prevent the - free flow of personal data throughout the Union. Those differences may therefore - constitute an obstacle to the pursuit of economic activities at the level of the - Union, distort competition and impede authorities in the discharge of their responsibilities - under Union law. Such a difference in levels of protection is due to the existence - of differences in the implementation and application of Directive 95/46/EC. + - 'Spear phishing targets specific individuals or employees within an organization + using personalized, deceptive emails. Unlike mass phishing, these emails are crafted + to seem familiar and urgent. + + + Scenarios: + + - CEO Fraud: Attackers impersonate executives to extract financial or sensitive + data from employees. + + - Whaling: High-ranking executives are targeted using tailored fraud emails that + press for immediate action without verification.' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without it being necessary that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence that is detrimental + to themselves or another; and + + + c) Damage to another person’s property, as defined under civil law, which must + be causally linked to the deceptive acts or omissions of the perpetrator. It is + not required that the person deceived and the person who suffered the damage be + the same individual. + + + The term “facts”, within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts referring to the present or the past, in such a manner as to create + the impression of future fulfillment based on a false present situation fabricated + by the perpetrator, who has already formed the decision not to fulfill their obligation, + the crime of fraud is established. + + + The term “property” refers to the totality of a person’s economic assets that + possess monetary value, while damage to property means its reduction—specifically, + the difference between the monetary value the property had before the disposition + caused by the fraudulent conduct and the value remaining after it. Property damage + exists even if the victim possesses an active claim for restitution. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their fraudulent conduct, namely when they made the false + representations that deceived the victim or a third party. Any subsequent moment + at which the victim’s damage actually occurred—thereby completing the fraud—or + the time when the victim carried out the harmful act or omission, is irrelevant.' + - 'According to Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From these provisions, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence detrimental to themselves + or another; and + + + c) Damage to another’s property, as defined under civil law, which must be causally + connected to the perpetrator’s deceptive acts. + + + From the above provisions, it is deduced that the crime of fraud is established + both objectively and subjectively through the knowing misrepresentation of false + facts as true, or the unlawful concealment or suppression of true ones, by which + another person is deceived and, as a result, performs an act, omission, or acquiescence + involving a disposition of property that directly and necessarily causes financial + damage to the deceived person or another, with the intent that the perpetrator + or another gain an unlawful benefit. It is irrelevant whether this intended benefit + was ultimately achieved. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those expected to occur in the future, + such as mere promises or contractual obligations. The false fact must have existed + in the past or must be a present circumstance at the time it is asserted, and + cannot relate to the future. + + + However, when future circumstances—that is, promises or contractual obligations—are + accompanied by false assurances and representations of other false facts referring + to the present or past, in such a way as to create the impression of future fulfillment, + based on a false present situation or supposed ability of the perpetrator, who + had already made the decision not to fulfill their obligation, then the crime + of fraud is established.' pipeline_tag: sentence-similarity library_name: sentence-transformers metrics: @@ -678,7 +779,7 @@ metrics: - cosine_mrr@10 - cosine_map@100 model-index: -- name: multilingual-e5-large +- name: multilingual_e5_large Finetuned on Data results: - task: type: information-retrieval @@ -688,49 +789,49 @@ model-index: type: dim_1024 metrics: - type: cosine_accuracy@1 - value: 0.3053777208706786 + value: 0.5238095238095238 name: Cosine Accuracy@1 - type: cosine_accuracy@3 - value: 0.31434058898847633 + value: 0.5238095238095238 name: Cosine Accuracy@3 - type: cosine_accuracy@5 - value: 0.34635083226632524 + value: 0.5238095238095238 name: Cosine Accuracy@5 - type: cosine_accuracy@10 - value: 0.3854033290653009 + value: 0.6190476190476191 name: Cosine Accuracy@10 - type: cosine_precision@1 - value: 0.3053777208706786 + value: 0.5238095238095238 name: Cosine Precision@1 - type: cosine_precision@3 - value: 0.30644472897994024 + value: 0.5079365079365079 name: Cosine Precision@3 - type: cosine_precision@5 - value: 0.2998719590268886 + value: 0.4666666666666666 name: Cosine Precision@5 - type: cosine_precision@10 - value: 0.27516005121638926 + value: 0.4428571428571429 name: Cosine Precision@10 - type: cosine_recall@1 - value: 0.03802085156159601 + value: 0.08218864468864469 name: Cosine Recall@1 - type: cosine_recall@3 - value: 0.11197884027712995 + value: 0.22275641025641024 name: Cosine Recall@3 - type: cosine_recall@5 - value: 0.16921874866295042 + value: 0.2958638583638584 name: Cosine Recall@5 - type: cosine_recall@10 - value: 0.2530788160980187 + value: 0.4766483516483517 name: Cosine Recall@10 - type: cosine_ndcg@10 - value: 0.3342308256907798 + value: 0.5598242514045669 name: Cosine Ndcg@10 - type: cosine_mrr@10 - value: 0.31961389549417674 + value: 0.5374149659863945 name: Cosine Mrr@10 - type: cosine_map@100 - value: 0.3945564342654779 + value: 0.6534286699882501 name: Cosine Map@100 - task: type: information-retrieval @@ -740,49 +841,49 @@ model-index: type: dim_768 metrics: - type: cosine_accuracy@1 - value: 0.29961587708066584 + value: 0.5238095238095238 name: Cosine Accuracy@1 - type: cosine_accuracy@3 - value: 0.30985915492957744 + value: 0.5238095238095238 name: Cosine Accuracy@3 - type: cosine_accuracy@5 - value: 0.3393085787451985 + value: 0.5238095238095238 name: Cosine Accuracy@5 - type: cosine_accuracy@10 - value: 0.382202304737516 + value: 0.6190476190476191 name: Cosine Accuracy@10 - type: cosine_precision@1 - value: 0.29961587708066584 + value: 0.5238095238095238 name: Cosine Precision@1 - type: cosine_precision@3 - value: 0.3008962868117797 + value: 0.5079365079365079 name: Cosine Precision@3 - type: cosine_precision@5 - value: 0.2946222791293214 + value: 0.4666666666666666 name: Cosine Precision@5 - type: cosine_precision@10 - value: 0.2716389244558258 + value: 0.4428571428571429 name: Cosine Precision@10 - type: cosine_recall@1 - value: 0.036933576189293056 + value: 0.08218864468864469 name: Cosine Recall@1 - type: cosine_recall@3 - value: 0.10879088395240075 + value: 0.22275641025641024 name: Cosine Recall@3 - type: cosine_recall@5 - value: 0.16490167901637093 + value: 0.2958638583638584 name: Cosine Recall@5 - type: cosine_recall@10 - value: 0.24941789989219518 + value: 0.4766483516483517 name: Cosine Recall@10 - type: cosine_ndcg@10 - value: 0.32886000357585454 + value: 0.5598242514045669 name: Cosine Ndcg@10 - type: cosine_mrr@10 - value: 0.31423414222709956 + value: 0.5374149659863945 name: Cosine Mrr@10 - type: cosine_map@100 - value: 0.39053989548220974 + value: 0.653075337994289 name: Cosine Map@100 - task: type: information-retrieval @@ -792,49 +893,49 @@ model-index: type: dim_512 metrics: - type: cosine_accuracy@1 - value: 0.29449423815621 + value: 0.5238095238095238 name: Cosine Accuracy@1 - type: cosine_accuracy@3 - value: 0.3060179257362356 + value: 0.5238095238095238 name: Cosine Accuracy@3 - type: cosine_accuracy@5 - value: 0.3361075544174136 + value: 0.5238095238095238 name: Cosine Accuracy@5 - type: cosine_accuracy@10 - value: 0.3738796414852753 + value: 0.6190476190476191 name: Cosine Accuracy@10 - type: cosine_precision@1 - value: 0.29449423815621 + value: 0.5238095238095238 name: Cosine Precision@1 - type: cosine_precision@3 - value: 0.29641485275288093 + value: 0.5079365079365079 name: Cosine Precision@3 - type: cosine_precision@5 - value: 0.29078104993597953 + value: 0.4666666666666666 name: Cosine Precision@5 - type: cosine_precision@10 - value: 0.2677336747759283 + value: 0.4428571428571429 name: Cosine Precision@10 - type: cosine_recall@1 - value: 0.03628785736063772 + value: 0.08218864468864469 name: Cosine Recall@1 - type: cosine_recall@3 - value: 0.10691245199699026 + value: 0.22275641025641024 name: Cosine Recall@3 - type: cosine_recall@5 - value: 0.16186564288415414 + value: 0.2958638583638584 name: Cosine Recall@5 - type: cosine_recall@10 - value: 0.24344608505680018 + value: 0.4766483516483517 name: Cosine Recall@10 - type: cosine_ndcg@10 - value: 0.3240238070684528 + value: 0.5598242514045669 name: Cosine Ndcg@10 - type: cosine_mrr@10 - value: 0.30902409405930487 + value: 0.5374149659863945 name: Cosine Mrr@10 - type: cosine_map@100 - value: 0.3843895305060905 + value: 0.6492208787775379 name: Cosine Map@100 - task: type: information-retrieval @@ -844,49 +945,49 @@ model-index: type: dim_256 metrics: - type: cosine_accuracy@1 - value: 0.2874519846350832 + value: 0.6190476190476191 name: Cosine Accuracy@1 - type: cosine_accuracy@3 - value: 0.293213828425096 + value: 0.6190476190476191 name: Cosine Accuracy@3 - type: cosine_accuracy@5 - value: 0.3181818181818182 + value: 0.6190476190476191 name: Cosine Accuracy@5 - type: cosine_accuracy@10 - value: 0.353393085787452 + value: 0.6666666666666666 name: Cosine Accuracy@10 - type: cosine_precision@1 - value: 0.2874519846350832 + value: 0.6190476190476191 name: Cosine Precision@1 - type: cosine_precision@3 - value: 0.2874519846350832 + value: 0.6031746031746031 name: Cosine Precision@3 - type: cosine_precision@5 - value: 0.28015364916773366 + value: 0.5619047619047619 name: Cosine Precision@5 - type: cosine_precision@10 - value: 0.2553777208706786 + value: 0.5190476190476192 name: Cosine Precision@10 - type: cosine_recall@1 - value: 0.03468470626522805 + value: 0.08600427350427349 name: Cosine Recall@1 - type: cosine_recall@3 - value: 0.10190738055739662 + value: 0.2342032967032967 name: Cosine Recall@3 - type: cosine_recall@5 - value: 0.15339992748676054 + value: 0.31494200244200243 name: Cosine Recall@5 - type: cosine_recall@10 - value: 0.2285021587131539 + value: 0.5028998778998779 name: Cosine Recall@10 - type: cosine_ndcg@10 - value: 0.30974666081054697 + value: 0.6420780535145918 name: Cosine Ndcg@10 - type: cosine_mrr@10 - value: 0.29859866268316915 + value: 0.6258503401360545 name: Cosine Mrr@10 - type: cosine_map@100 - value: 0.36682428388570304 + value: 0.6975707466438095 name: Cosine Map@100 - task: type: information-retrieval @@ -896,49 +997,49 @@ model-index: type: dim_128 metrics: - type: cosine_accuracy@1 - value: 0.26632522407170295 + value: 0.5238095238095238 name: Cosine Accuracy@1 - type: cosine_accuracy@3 - value: 0.2714468629961588 + value: 0.5238095238095238 name: Cosine Accuracy@3 - type: cosine_accuracy@5 - value: 0.29769526248399486 + value: 0.5238095238095238 name: Cosine Accuracy@5 - type: cosine_accuracy@10 - value: 0.3322663252240717 + value: 0.6190476190476191 name: Cosine Accuracy@10 - type: cosine_precision@1 - value: 0.26632522407170295 + value: 0.5238095238095238 name: Cosine Precision@1 - type: cosine_precision@3 - value: 0.26632522407170295 + value: 0.5079365079365079 name: Cosine Precision@3 - type: cosine_precision@5 - value: 0.2601792573623559 + value: 0.4666666666666666 name: Cosine Precision@5 - type: cosine_precision@10 - value: 0.23681177976952622 + value: 0.4428571428571429 name: Cosine Precision@10 - type: cosine_recall@1 - value: 0.032637352520298826 + value: 0.0811965811965812 name: Cosine Recall@1 - type: cosine_recall@3 - value: 0.09577557901596723 + value: 0.21978021978021975 name: Cosine Recall@3 - type: cosine_recall@5 - value: 0.14494055334056014 + value: 0.2909035409035409 name: Cosine Recall@5 - type: cosine_recall@10 - value: 0.2162161410106577 + value: 0.46672771672771673 name: Cosine Recall@10 - type: cosine_ndcg@10 - value: 0.2888228791481225 + value: 0.5598242514045669 name: Cosine Ndcg@10 - type: cosine_mrr@10 - value: 0.2774843505477303 + value: 0.5374149659863945 name: Cosine Mrr@10 - type: cosine_map@100 - value: 0.3452084373303918 + value: 0.6478872365910466 name: Cosine Map@100 - task: type: information-retrieval @@ -948,53 +1049,53 @@ model-index: type: dim_64 metrics: - type: cosine_accuracy@1 - value: 0.24647887323943662 + value: 0.42857142857142855 name: Cosine Accuracy@1 - type: cosine_accuracy@3 - value: 0.25160051216389245 + value: 0.47619047619047616 name: Cosine Accuracy@3 - type: cosine_accuracy@5 - value: 0.28040973111395645 + value: 0.47619047619047616 name: Cosine Accuracy@5 - type: cosine_accuracy@10 - value: 0.3111395646606914 + value: 0.5714285714285714 name: Cosine Accuracy@10 - type: cosine_precision@1 - value: 0.24647887323943662 + value: 0.42857142857142855 name: Cosine Precision@1 - type: cosine_precision@3 - value: 0.24669227486128895 + value: 0.4444444444444445 name: Cosine Precision@3 - type: cosine_precision@5 - value: 0.24186939820742642 + value: 0.419047619047619 name: Cosine Precision@5 - type: cosine_precision@10 - value: 0.22227912932138283 + value: 0.3952380952380953 name: Cosine Precision@10 - type: cosine_recall@1 - value: 0.029677140839872655 + value: 0.054410866910866905 name: Cosine Recall@1 - type: cosine_recall@3 - value: 0.08750621650497534 + value: 0.18704212454212454 name: Cosine Recall@3 - type: cosine_recall@5 - value: 0.13309179677669725 + value: 0.27602258852258854 name: Cosine Recall@5 - type: cosine_recall@10 - value: 0.19988984965370699 + value: 0.43696581196581197 name: Cosine Recall@10 - type: cosine_ndcg@10 - value: 0.26920687806072385 + value: 0.4917595713548203 name: Cosine Ndcg@10 - type: cosine_mrr@10 - value: 0.2576418104587115 + value: 0.45804988662131524 name: Cosine Mrr@10 - type: cosine_map@100 - value: 0.32242948081625944 + value: 0.5872011588310861 name: Cosine Map@100 --- -# multilingual-e5-large +# multilingual_e5_large Finetuned on Data This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more. @@ -1044,9 +1145,9 @@ from sentence_transformers import SentenceTransformer model = SentenceTransformer("sentence_transformers_model_id") # Run inference sentences = [ - 'What may impede authorities in the discharge of their responsibilities under Union law?', - 'The objectives and principles of Directive 95/46/EC remain sound, but it has not prevented fragmentation in the implementation of data protection across the Union, legal uncertainty or a widespread public perception that there are significant risks to the protection of natural persons, in particular with regard to online activity. Differences in the level of protection of the rights and freedoms of natural persons, in particular the right to the protection of personal data, with regard to the processing of personal data in the Member States may prevent the free flow of personal data throughout the Union. Those differences may therefore constitute an obstacle to the pursuit of economic activities at the level of the Union, distort competition and impede authorities in the discharge of their responsibilities under Union law. Such a difference in levels of protection is due to the existence of differences in the implementation and application of Directive 95/46/EC.', - 'This Regulation is without prejudice to international agreements concluded between the Union and third countries regulating the transfer of personal data including appropriate safeguards for the data subjects. Member States may conclude international agreements which involve the transfer of personal data to third countries or international organisations, as far as such agreements do not affect this Regulation or any other provisions of Union law and include an appropriate level of protection for the fundamental rights of the data subjects.', + 'When is the time of commission of the fraud considered?', + 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code,\n\n"Whoever, with the intent to obtain for themselves or another an unlawful pecuniary benefit, causes damage to another’s property by persuading someone to act, omit, or tolerate something through the knowing misrepresentation of false facts as true, or through the unlawful concealment or suppression of true facts, shall be punished by imprisonment of at least three months, and if the damage caused is particularly large, by imprisonment of at least two years."\n\nFrom this provision it follows that, for the crime of fraud to be established, the following elements are required:\n\na) The intent of the perpetrator to obtain for themselves or another an unlawful pecuniary benefit, without it being necessary that the benefit actually materialize;\n\nb) The knowing misrepresentation of false facts as true, or the unlawful concealment or suppression of true facts, as a result of which—serving as the causal factor—someone is deceived and proceeds to an act, omission, or acquiescence that is detrimental to themselves or another; and\n\nc) Damage to another person’s property, as defined under civil law, which must be causally linked to the deceptive acts or omissions of the perpetrator. It is not required that the person deceived and the person who suffered the damage be the same individual.\n\nThe term “facts”, within the meaning of the above provision, refers to real circumstances relating to the past or present, and not to those that will occur in the future, such as mere promises or contractual obligations. However, when such promises or obligations are accompanied by false assurances and representations of other false facts referring to the present or the past, in such a manner as to create the impression of future fulfillment based on a false present situation fabricated by the perpetrator, who has already formed the decision not to fulfill their obligation, the crime of fraud is established.\n\nThe term “property” refers to the totality of a person’s economic assets that possess monetary value, while damage to property means its reduction—specifically, the difference between the monetary value the property had before the disposition caused by the fraudulent conduct and the value remaining after it. Property damage exists even if the victim possesses an active claim for restitution.\n\nThe time of commission of the fraud is considered to be the moment when the perpetrator acted and completed their fraudulent conduct, namely when they made the false representations that deceived the victim or a third party. Any subsequent moment at which the victim’s damage actually occurred—thereby completing the fraud—or the time when the victim carried out the harmful act or omission, is irrelevant.', + 'Spear phishing targets specific individuals or employees within an organization using personalized, deceptive emails. Unlike mass phishing, these emails are crafted to seem familiar and urgent.\n\nScenarios:\n- CEO Fraud: Attackers impersonate executives to extract financial or sensitive data from employees.\n- Whaling: High-ranking executives are targeted using tailored fraud emails that press for immediate action without verification.', ] embeddings = model.encode(sentences) print(embeddings.shape) @@ -1055,9 +1156,9 @@ print(embeddings.shape) # Get the similarity scores for the embeddings similarities = model.similarity(embeddings, embeddings) print(similarities) -# tensor([[1.0000, 0.5451, 0.4389], -# [0.5451, 1.0000, 0.6484], -# [0.4389, 0.6484, 1.0000]]) +# tensor([[1.0000, 0.6673, 0.4780], +# [0.6673, 1.0000, 0.4691], +# [0.4780, 0.4691, 1.0000]]) ``` +- **Maximum Sequence Length:** 512 tokens +- **Output Dimensionality:** 1024 dimensions +- **Similarity Function:** Cosine Similarity + +- **Language:** en +- **License:** apache-2.0 + +### Model Sources + +- **Documentation:** [Sentence Transformers Documentation](https://sbert.net) +- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers) +- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) + +### Full Model Architecture + +``` +SentenceTransformer( + (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'XLMRobertaModel'}) + (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) + (2): Normalize() +) +``` + +## Usage + +### Direct Usage (Sentence Transformers) + +First install the Sentence Transformers library: + +```bash +pip install -U sentence-transformers +``` + +Then you can load this model and run inference. +```python +from sentence_transformers import SentenceTransformer + +# Download from the 🤗 Hub +model = SentenceTransformer("sentence_transformers_model_id") +# Run inference +sentences = [ + 'When is the time of commission of the fraud considered?', + 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code,\n\n"Whoever, with the intent to obtain for themselves or another an unlawful pecuniary benefit, causes damage to another’s property by persuading someone to act, omit, or tolerate something through the knowing misrepresentation of false facts as true, or through the unlawful concealment or suppression of true facts, shall be punished by imprisonment of at least three months, and if the damage caused is particularly large, by imprisonment of at least two years."\n\nFrom this provision it follows that, for the crime of fraud to be established, the following elements are required:\n\na) The intent of the perpetrator to obtain for themselves or another an unlawful pecuniary benefit, without it being necessary that the benefit actually materialize;\n\nb) The knowing misrepresentation of false facts as true, or the unlawful concealment or suppression of true facts, as a result of which—serving as the causal factor—someone is deceived and proceeds to an act, omission, or acquiescence that is detrimental to themselves or another; and\n\nc) Damage to another person’s property, as defined under civil law, which must be causally linked to the deceptive acts or omissions of the perpetrator. It is not required that the person deceived and the person who suffered the damage be the same individual.\n\nThe term “facts”, within the meaning of the above provision, refers to real circumstances relating to the past or present, and not to those that will occur in the future, such as mere promises or contractual obligations. However, when such promises or obligations are accompanied by false assurances and representations of other false facts referring to the present or the past, in such a manner as to create the impression of future fulfillment based on a false present situation fabricated by the perpetrator, who has already formed the decision not to fulfill their obligation, the crime of fraud is established.\n\nThe term “property” refers to the totality of a person’s economic assets that possess monetary value, while damage to property means its reduction—specifically, the difference between the monetary value the property had before the disposition caused by the fraudulent conduct and the value remaining after it. Property damage exists even if the victim possesses an active claim for restitution.\n\nThe time of commission of the fraud is considered to be the moment when the perpetrator acted and completed their fraudulent conduct, namely when they made the false representations that deceived the victim or a third party. Any subsequent moment at which the victim’s damage actually occurred—thereby completing the fraud—or the time when the victim carried out the harmful act or omission, is irrelevant.', + 'Spear phishing targets specific individuals or employees within an organization using personalized, deceptive emails. Unlike mass phishing, these emails are crafted to seem familiar and urgent.\n\nScenarios:\n- CEO Fraud: Attackers impersonate executives to extract financial or sensitive data from employees.\n- Whaling: High-ranking executives are targeted using tailored fraud emails that press for immediate action without verification.', +] +embeddings = model.encode(sentences) +print(embeddings.shape) +# [3, 1024] + +# Get the similarity scores for the embeddings +similarities = model.similarity(embeddings, embeddings) +print(similarities) +# tensor([[1.0000, 0.6673, 0.4780], +# [0.6673, 1.0000, 0.4691], +# [0.4780, 0.4691, 1.0000]]) +``` + + + + + + + +## Evaluation + +### Metrics + +#### Information Retrieval + +* Dataset: `dim_1024` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 1024 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.5238 | +| cosine_accuracy@3 | 0.5238 | +| cosine_accuracy@5 | 0.5714 | +| cosine_accuracy@10 | 0.6667 | +| cosine_precision@1 | 0.5238 | +| cosine_precision@3 | 0.5079 | +| cosine_precision@5 | 0.4762 | +| cosine_precision@10 | 0.4476 | +| cosine_recall@1 | 0.0893 | +| cosine_recall@3 | 0.2442 | +| cosine_recall@5 | 0.3395 | +| cosine_recall@10 | 0.5401 | +| **cosine_ndcg@10** | **0.5921** | +| cosine_mrr@10 | 0.5481 | +| cosine_map@100 | 0.6742 | + +#### Information Retrieval + +* Dataset: `dim_768` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 768 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.5238 | +| cosine_accuracy@3 | 0.5238 | +| cosine_accuracy@5 | 0.5714 | +| cosine_accuracy@10 | 0.6667 | +| cosine_precision@1 | 0.5238 | +| cosine_precision@3 | 0.5079 | +| cosine_precision@5 | 0.4762 | +| cosine_precision@10 | 0.4476 | +| cosine_recall@1 | 0.0893 | +| cosine_recall@3 | 0.2442 | +| cosine_recall@5 | 0.3395 | +| cosine_recall@10 | 0.5401 | +| **cosine_ndcg@10** | **0.5921** | +| cosine_mrr@10 | 0.5481 | +| cosine_map@100 | 0.6742 | + +#### Information Retrieval + +* Dataset: `dim_512` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 512 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.4762 | +| cosine_accuracy@3 | 0.4762 | +| cosine_accuracy@5 | 0.5714 | +| cosine_accuracy@10 | 0.619 | +| cosine_precision@1 | 0.4762 | +| cosine_precision@3 | 0.4603 | +| cosine_precision@5 | 0.4571 | +| cosine_precision@10 | 0.4238 | +| cosine_recall@1 | 0.0735 | +| cosine_recall@3 | 0.1966 | +| cosine_recall@5 | 0.3078 | +| cosine_recall@10 | 0.5203 | +| **cosine_ndcg@10** | **0.5518** | +| cosine_mrr@10 | 0.502 | +| cosine_map@100 | 0.6266 | + +#### Information Retrieval + +* Dataset: `dim_256` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 256 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.5238 | +| cosine_accuracy@3 | 0.5238 | +| cosine_accuracy@5 | 0.5714 | +| cosine_accuracy@10 | 0.619 | +| cosine_precision@1 | 0.5238 | +| cosine_precision@3 | 0.5079 | +| cosine_precision@5 | 0.4952 | +| cosine_precision@10 | 0.4238 | +| cosine_recall@1 | 0.0814 | +| cosine_recall@3 | 0.2204 | +| cosine_recall@5 | 0.3395 | +| cosine_recall@10 | 0.5203 | +| **cosine_ndcg@10** | **0.5709** | +| cosine_mrr@10 | 0.5401 | +| cosine_map@100 | 0.6515 | + +#### Information Retrieval + +* Dataset: `dim_128` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 128 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.5238 | +| cosine_accuracy@3 | 0.5238 | +| cosine_accuracy@5 | 0.5714 | +| cosine_accuracy@10 | 0.619 | +| cosine_precision@1 | 0.5238 | +| cosine_precision@3 | 0.5238 | +| cosine_precision@5 | 0.5048 | +| cosine_precision@10 | 0.4238 | +| cosine_recall@1 | 0.0735 | +| cosine_recall@3 | 0.2204 | +| cosine_recall@5 | 0.3475 | +| cosine_recall@10 | 0.5203 | +| **cosine_ndcg@10** | **0.5685** | +| cosine_mrr@10 | 0.5401 | +| cosine_map@100 | 0.649 | + +#### Information Retrieval + +* Dataset: `dim_64` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 64 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.4286 | +| cosine_accuracy@3 | 0.4286 | +| cosine_accuracy@5 | 0.4762 | +| cosine_accuracy@10 | 0.619 | +| cosine_precision@1 | 0.4286 | +| cosine_precision@3 | 0.4286 | +| cosine_precision@5 | 0.4286 | +| cosine_precision@10 | 0.4 | +| cosine_recall@1 | 0.0536 | +| cosine_recall@3 | 0.1609 | +| cosine_recall@5 | 0.2721 | +| cosine_recall@10 | 0.5005 | +| **cosine_ndcg@10** | **0.5113** | +| cosine_mrr@10 | 0.4596 | +| cosine_map@100 | 0.5888 | + + + + + +## Training Details + +### Training Dataset + +#### Unnamed Dataset + +* Size: 82 training samples +* Columns: anchor and positive +* Approximate statistics based on the first 82 samples: + | | anchor | positive | + |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------| + | type | string | string | + | details | | | +* Samples: + | anchor | positive | + |:----------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + | What determines whether the act in question shall be punished if the offender is in the service of the legal holder of the data? | Everyone who obtains access to data recorded in a computer or in the external memory of a computer or transmitted by telecommunication systems shall be punished with imprisonment for up to six months or by a fine from 29 to 15,000 Euro, under the condition that these acts have been committed without right, especially in violation of prohibitions or of security measures taken by the legal holder. If the act concerns the international relations or the security of the State, he shall be punished according to Article 148.
If the offender is in the service of the legal holder of the data, the act of the preceding paragraph shall be punished only if it has been explicitly prohibited by internal regulations or by a written decision of the holder or of a competent employee of his.
| + | What must be causally connected to the perpetrator's deceptive acts? | According to Article 386 paragraph 1 of the Greek Penal Code,

"Whoever, with the intent to obtain for themselves or another an unlawful pecuniary benefit, causes damage to another’s property by persuading someone to act, omit, or tolerate something through the knowing misrepresentation of false facts as true, or through the unlawful concealment or suppression of true facts, shall be punished by imprisonment of at least three months, and if the damage caused is particularly large, by imprisonment of at least two years."

From these provisions, it follows that, for the crime of fraud to be established, the following elements are required:

a) The intent of the perpetrator to obtain for themselves or another an unlawful pecuniary benefit;

b) The knowing misrepresentation of false facts as true, or the unlawful concealment or suppression of true facts, as a result of which—serving as the causal factor—someone is deceived and proceeds to an act, omission, or acquiescence detrimental to th...
| + | Who can be punished with imprisonment? | 1. Anyone who, by knowingly presenting false facts as true or by unlawfully concealing or withholding true facts, damages another person's property by persuading someone to act, omission, or tolerance with the aim of obtaining, for themselves or another, an unlawful financial gain from the damage to that property shall be punished with imprisonment, "and if the damage caused is particularly great, with imprisonment of at least three (3) months and a fine." .
If the damage caused exceeds a total of one hundred and twenty thousand (120,000) euros, imprisonment of up to ten (10) years and a fine shall be imposed.
2. If the fraud is directed directly against the legal entity of the Greek State, legal entities governed by public law, or local government organizations, and the damage caused exceeds a total of one hundred and twenty thousand (120,000) euros, a prison sentence of at least ten (10) years and a fine of up to one thousand (1,000) daily units shall be imposed. This offense shall b...
| +* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: + ```json + { + "loss": "MultipleNegativesRankingLoss", + "matryoshka_dims": [ + 1024, + 768, + 512, + 256, + 128, + 64 + ], + "matryoshka_weights": [ + 1, + 1, + 1, + 1, + 1, + 1 + ], + "n_dims_per_step": -1 + } + ``` + +### Training Hyperparameters +#### Non-Default Hyperparameters + +- `eval_strategy`: epoch +- `gradient_accumulation_steps`: 2 +- `learning_rate`: 2e-05 +- `num_train_epochs`: 10 +- `lr_scheduler_type`: cosine +- `warmup_ratio`: 0.1 +- `bf16`: True +- `tf32`: True +- `load_best_model_at_end`: True +- `optim`: adamw_torch_fused +- `batch_sampler`: no_duplicates + +#### All Hyperparameters +
Click to expand + +- `overwrite_output_dir`: False +- `do_predict`: False +- `eval_strategy`: epoch +- `prediction_loss_only`: True +- `per_device_train_batch_size`: 8 +- `per_device_eval_batch_size`: 8 +- `per_gpu_train_batch_size`: None +- `per_gpu_eval_batch_size`: None +- `gradient_accumulation_steps`: 2 +- `eval_accumulation_steps`: None +- `torch_empty_cache_steps`: None +- `learning_rate`: 2e-05 +- `weight_decay`: 0.0 +- `adam_beta1`: 0.9 +- `adam_beta2`: 0.999 +- `adam_epsilon`: 1e-08 +- `max_grad_norm`: 1.0 +- `num_train_epochs`: 10 +- `max_steps`: -1 +- `lr_scheduler_type`: cosine +- `lr_scheduler_kwargs`: {} +- `warmup_ratio`: 0.1 +- `warmup_steps`: 0 +- `log_level`: passive +- `log_level_replica`: warning +- `log_on_each_node`: True +- `logging_nan_inf_filter`: True +- `save_safetensors`: True +- `save_on_each_node`: False +- `save_only_model`: False +- `restore_callback_states_from_checkpoint`: False +- `no_cuda`: False +- `use_cpu`: False +- `use_mps_device`: False +- `seed`: 42 +- `data_seed`: None +- `jit_mode_eval`: False +- `use_ipex`: False +- `bf16`: True +- `fp16`: False +- `fp16_opt_level`: O1 +- `half_precision_backend`: auto +- `bf16_full_eval`: False +- `fp16_full_eval`: False +- `tf32`: True +- `local_rank`: 0 +- `ddp_backend`: None +- `tpu_num_cores`: None +- `tpu_metrics_debug`: False +- `debug`: [] +- `dataloader_drop_last`: False +- `dataloader_num_workers`: 0 +- `dataloader_prefetch_factor`: None +- `past_index`: -1 +- `disable_tqdm`: False +- `remove_unused_columns`: True +- `label_names`: None +- `load_best_model_at_end`: True +- `ignore_data_skip`: False +- `fsdp`: [] +- `fsdp_min_num_params`: 0 +- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} +- `tp_size`: 0 +- `fsdp_transformer_layer_cls_to_wrap`: None +- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} +- `deepspeed`: None +- `label_smoothing_factor`: 0.0 +- `optim`: adamw_torch_fused +- `optim_args`: None +- `adafactor`: False +- `group_by_length`: False +- `length_column_name`: length +- `ddp_find_unused_parameters`: None +- `ddp_bucket_cap_mb`: None +- `ddp_broadcast_buffers`: False +- `dataloader_pin_memory`: True +- `dataloader_persistent_workers`: False +- `skip_memory_metrics`: True +- `use_legacy_prediction_loop`: False +- `push_to_hub`: False +- `resume_from_checkpoint`: None +- `hub_model_id`: None +- `hub_strategy`: every_save +- `hub_private_repo`: None +- `hub_always_push`: False +- `gradient_checkpointing`: False +- `gradient_checkpointing_kwargs`: None +- `include_inputs_for_metrics`: False +- `include_for_metrics`: [] +- `eval_do_concat_batches`: True +- `fp16_backend`: auto +- `push_to_hub_model_id`: None +- `push_to_hub_organization`: None +- `mp_parameters`: +- `auto_find_batch_size`: False +- `full_determinism`: False +- `torchdynamo`: None +- `ray_scope`: last +- `ddp_timeout`: 1800 +- `torch_compile`: False +- `torch_compile_backend`: None +- `torch_compile_mode`: None +- `include_tokens_per_second`: False +- `include_num_input_tokens_seen`: False +- `neftune_noise_alpha`: None +- `optim_target_modules`: None +- `batch_eval_metrics`: False +- `eval_on_start`: False +- `use_liger_kernel`: False +- `eval_use_gather_object`: False +- `average_tokens_across_devices`: False +- `prompts`: None +- `batch_sampler`: no_duplicates +- `multi_dataset_batch_sampler`: proportional +- `router_mapping`: {} +- `learning_rate_mapping`: {} + +
+ +### Training Logs +| Epoch | Step | Training Loss | dim_1024_cosine_ndcg@10 | dim_768_cosine_ndcg@10 | dim_512_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_64_cosine_ndcg@10 | +|:------:|:----:|:-------------:|:-----------------------:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:| +| 0.1818 | 1 | 18.029 | - | - | - | - | - | - | +| 0.3636 | 2 | 19.4106 | - | - | - | - | - | - | +| 0.5455 | 3 | 16.6201 | - | - | - | - | - | - | +| 0.7273 | 4 | 15.3048 | - | - | - | - | - | - | +| 0.9091 | 5 | 14.0182 | - | - | - | - | - | - | +| 1.0 | 6 | 6.4771 | - | - | - | - | - | - | +| 1.0909 | 7 | 6.7664 | 0.6167 | 0.5821 | 0.5524 | 0.5177 | 0.5278 | 0.4124 | +| 1.1818 | 8 | 11.8583 | - | - | - | - | - | - | +| 1.3636 | 9 | 11.9216 | - | - | - | - | - | - | +| 1.5455 | 10 | 13.3764 | - | - | - | - | - | - | +| 1.7273 | 11 | 12.9063 | - | - | - | - | - | - | +| 1.9091 | 12 | 13.5984 | - | - | - | - | - | - | +| 2.0 | 13 | 7.8523 | - | - | - | - | - | - | +| 2.0909 | 14 | 4.4487 | 0.5921 | 0.5921 | 0.5518 | 0.5709 | 0.5685 | 0.5113 | + + +### Framework Versions +- Python: 3.12.12 +- Sentence Transformers: 5.1.1 +- Transformers: 4.51.3 +- PyTorch: 2.8.0+cu126 +- Accelerate: 1.11.0 +- Datasets: 4.0.0 +- Tokenizers: 0.21.4 + +## Citation + +### BibTeX + +#### Sentence Transformers +```bibtex +@inproceedings{reimers-2019-sentence-bert, + title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", + author = "Reimers, Nils and Gurevych, Iryna", + booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", + month = "11", + year = "2019", + publisher = "Association for Computational Linguistics", + url = "https://arxiv.org/abs/1908.10084", +} +``` + +#### MatryoshkaLoss +```bibtex +@misc{kusupati2024matryoshka, + title={Matryoshka Representation Learning}, + author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi}, + year={2024}, + eprint={2205.13147}, + archivePrefix={arXiv}, + primaryClass={cs.LG} +} +``` + +#### MultipleNegativesRankingLoss +```bibtex +@misc{henderson2017efficient, + title={Efficient Natural Language Response Suggestion for Smart Reply}, + author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil}, + year={2017}, + eprint={1705.00652}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` + + + + + + \ No newline at end of file diff --git a/checkpoint-14/config.json b/checkpoint-14/config.json new file mode 100644 index 0000000000000000000000000000000000000000..93f46b2fc5d0e832be8338cb0ce9f09ae81bc1a2 --- /dev/null +++ b/checkpoint-14/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "XLMRobertaModel" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 16, + "num_hidden_layers": 24, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.51.3", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/checkpoint-14/config_sentence_transformers.json b/checkpoint-14/config_sentence_transformers.json new file mode 100644 index 0000000000000000000000000000000000000000..a0390e7f92afe1fdc0e00e1712f50de56290fde6 --- /dev/null +++ b/checkpoint-14/config_sentence_transformers.json @@ -0,0 +1,14 @@ +{ + "model_type": "SentenceTransformer", + "__version__": { + "sentence_transformers": "5.1.1", + "transformers": "4.51.3", + "pytorch": "2.8.0+cu126" + }, + "prompts": { + "query": "", + "document": "" + }, + "default_prompt_name": null, + "similarity_fn_name": "cosine" +} \ No newline at end of file diff --git a/checkpoint-14/model.safetensors b/checkpoint-14/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1722446b91f3a5104288b64b2b62960773485579 --- /dev/null +++ b/checkpoint-14/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46e99ac0a0cb9fa54e6b9a6f77368aacd6fac9b3751511f22a5ca5c9fb7c5204 +size 2239607176 diff --git a/checkpoint-14/modules.json b/checkpoint-14/modules.json new file mode 100644 index 0000000000000000000000000000000000000000..952a9b81c0bfd99800fabf352f69c7ccd46c5e43 --- /dev/null +++ b/checkpoint-14/modules.json @@ -0,0 +1,20 @@ +[ + { + "idx": 0, + "name": "0", + "path": "", + "type": "sentence_transformers.models.Transformer" + }, + { + "idx": 1, + "name": "1", + "path": "1_Pooling", + "type": "sentence_transformers.models.Pooling" + }, + { + "idx": 2, + "name": "2", + "path": "2_Normalize", + "type": "sentence_transformers.models.Normalize" + } +] \ No newline at end of file diff --git a/checkpoint-14/optimizer.pt b/checkpoint-14/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6acf7464ff8d79edb08705082a362bae577ac811 --- /dev/null +++ b/checkpoint-14/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e6297d4d18fe9514b828db9f04e257311c390afeb2ee8199df0c8a21c372ad +size 4471067142 diff --git a/checkpoint-14/rng_state.pth b/checkpoint-14/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a1602dddb7a5aa52513201caf363ad6b1d15f9b1 --- /dev/null +++ b/checkpoint-14/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b2584240053bafbf0d1841cb2d9fbbccb6afaacb562198511df474996d98d8 +size 14645 diff --git a/checkpoint-14/scheduler.pt b/checkpoint-14/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eef586257ee3d92c1d6c18c0ea35b3ee23b2ac21 --- /dev/null +++ b/checkpoint-14/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62064cbcdbbfd5a9d97ecb6c2c1900ba606dc24883f0bbf53e596e2266d6824a +size 1465 diff --git a/checkpoint-14/sentence_bert_config.json b/checkpoint-14/sentence_bert_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4eca68d85ecd3034cf4174d8a4033a75344ea62d --- /dev/null +++ b/checkpoint-14/sentence_bert_config.json @@ -0,0 +1,4 @@ +{ + "max_seq_length": 512, + "do_lower_case": false +} \ No newline at end of file diff --git a/checkpoint-14/sentencepiece.bpe.model b/checkpoint-14/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-14/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-14/special_tokens_map.json b/checkpoint-14/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b1879d702821e753ffe4245048eee415d54a9385 --- /dev/null +++ b/checkpoint-14/special_tokens_map.json @@ -0,0 +1,51 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-14/tokenizer.json b/checkpoint-14/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..2a51933f1ccb3cf68d53b877cbfa24734ada642f --- /dev/null +++ b/checkpoint-14/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085 +size 17082987 diff --git a/checkpoint-14/tokenizer_config.json b/checkpoint-14/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff4390b993a1a04254ff2e21bc6bee199cd6e32 --- /dev/null +++ b/checkpoint-14/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/checkpoint-14/trainer_state.json b/checkpoint-14/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4a78687d9fb9829c74ddd03acdaded5e2b0ecea7 --- /dev/null +++ b/checkpoint-14/trainer_state.json @@ -0,0 +1,337 @@ +{ + "best_global_step": 14, + "best_metric": 0.5685354415901852, + "best_model_checkpoint": "multilingual-e5-large/checkpoint-14", + "epoch": 2.090909090909091, + "eval_steps": 500, + "global_step": 14, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.18181818181818182, + "grad_norm": 135.6220703125, + "learning_rate": 0.0, + "loss": 18.029, + "step": 1 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 140.5317840576172, + "learning_rate": 4.000000000000001e-06, + "loss": 19.4106, + "step": 2 + }, + { + "epoch": 0.5454545454545454, + "grad_norm": 142.2399444580078, + "learning_rate": 8.000000000000001e-06, + "loss": 16.6201, + "step": 3 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 110.63945770263672, + "learning_rate": 1.2e-05, + "loss": 15.3048, + "step": 4 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 129.43089294433594, + "learning_rate": 1.6000000000000003e-05, + "loss": 14.0182, + "step": 5 + }, + { + "epoch": 1.0, + "grad_norm": 108.25520324707031, + "learning_rate": 2e-05, + "loss": 6.4771, + "step": 6 + }, + { + "epoch": 1.0909090909090908, + "grad_norm": 95.05782318115234, + "learning_rate": 1.9975640502598243e-05, + "loss": 6.7664, + "step": 7 + }, + { + "epoch": 1.0909090909090908, + "eval_dim_1024_cosine_accuracy@1": 0.5714285714285714, + "eval_dim_1024_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_1024_cosine_accuracy@3": 0.5714285714285714, + "eval_dim_1024_cosine_accuracy@5": 0.6190476190476191, + "eval_dim_1024_cosine_map@100": 0.6946498352795571, + "eval_dim_1024_cosine_mrr@10": 0.5888888888888888, + "eval_dim_1024_cosine_ndcg@10": 0.6167053425672016, + "eval_dim_1024_cosine_precision@1": 0.5714285714285714, + "eval_dim_1024_cosine_precision@10": 0.4476190476190477, + "eval_dim_1024_cosine_precision@3": 0.5555555555555556, + "eval_dim_1024_cosine_precision@5": 0.5142857142857142, + "eval_dim_1024_cosine_recall@1": 0.10123626373626372, + "eval_dim_1024_cosine_recall@10": 0.5401404151404151, + "eval_dim_1024_cosine_recall@3": 0.2798992673992674, + "eval_dim_1024_cosine_recall@5": 0.3871336996336997, + "eval_dim_128_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_128_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_128_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_128_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_128_cosine_map@100": 0.5954946117864774, + "eval_dim_128_cosine_mrr@10": 0.49251700680272104, + "eval_dim_128_cosine_ndcg@10": 0.5277801377642622, + "eval_dim_128_cosine_precision@1": 0.47619047619047616, + "eval_dim_128_cosine_precision@10": 0.3761904761904762, + "eval_dim_128_cosine_precision@3": 0.4603174603174603, + "eval_dim_128_cosine_precision@5": 0.42857142857142855, + "eval_dim_128_cosine_recall@1": 0.08536324786324785, + "eval_dim_128_cosine_recall@10": 0.4806166056166057, + "eval_dim_128_cosine_recall@3": 0.23228021978021982, + "eval_dim_128_cosine_recall@5": 0.31967338217338215, + "eval_dim_256_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_256_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_256_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_256_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_256_cosine_map@100": 0.6218563539505393, + "eval_dim_256_cosine_mrr@10": 0.4925170068027211, + "eval_dim_256_cosine_ndcg@10": 0.5177339964253599, + "eval_dim_256_cosine_precision@1": 0.47619047619047616, + "eval_dim_256_cosine_precision@10": 0.3476190476190476, + "eval_dim_256_cosine_precision@3": 0.4603174603174603, + "eval_dim_256_cosine_precision@5": 0.419047619047619, + "eval_dim_256_cosine_recall@1": 0.09543650793650793, + "eval_dim_256_cosine_recall@10": 0.47817460317460325, + "eval_dim_256_cosine_recall@3": 0.2625, + "eval_dim_256_cosine_recall@5": 0.35813492063492064, + "eval_dim_512_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_512_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_512_cosine_map@100": 0.6608463504287858, + "eval_dim_512_cosine_mrr@10": 0.5333333333333333, + "eval_dim_512_cosine_ndcg@10": 0.5523687509163372, + "eval_dim_512_cosine_precision@1": 0.5238095238095238, + "eval_dim_512_cosine_precision@10": 0.3761904761904762, + "eval_dim_512_cosine_precision@3": 0.5079365079365079, + "eval_dim_512_cosine_precision@5": 0.4666666666666666, + "eval_dim_512_cosine_recall@1": 0.0994047619047619, + "eval_dim_512_cosine_recall@10": 0.5019841269841271, + "eval_dim_512_cosine_recall@3": 0.2744047619047619, + "eval_dim_512_cosine_recall@5": 0.37797619047619047, + "eval_dim_64_cosine_accuracy@1": 0.3333333333333333, + "eval_dim_64_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_64_cosine_accuracy@3": 0.38095238095238093, + "eval_dim_64_cosine_accuracy@5": 0.38095238095238093, + "eval_dim_64_cosine_map@100": 0.5579595177809107, + "eval_dim_64_cosine_mrr@10": 0.3764172335600907, + "eval_dim_64_cosine_ndcg@10": 0.41244392103396355, + "eval_dim_64_cosine_precision@1": 0.3333333333333333, + "eval_dim_64_cosine_precision@10": 0.2619047619047619, + "eval_dim_64_cosine_precision@3": 0.3492063492063492, + "eval_dim_64_cosine_precision@5": 0.3142857142857143, + "eval_dim_64_cosine_recall@1": 0.07063492063492063, + "eval_dim_64_cosine_recall@10": 0.42002442002442003, + "eval_dim_64_cosine_recall@3": 0.2357142857142857, + "eval_dim_64_cosine_recall@5": 0.34523809523809523, + "eval_dim_768_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_768_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_768_cosine_map@100": 0.6727109022414055, + "eval_dim_768_cosine_mrr@10": 0.5480725623582765, + "eval_dim_768_cosine_ndcg@10": 0.5820705880762242, + "eval_dim_768_cosine_precision@1": 0.5238095238095238, + "eval_dim_768_cosine_precision@10": 0.41904761904761906, + "eval_dim_768_cosine_precision@3": 0.5079365079365079, + "eval_dim_768_cosine_precision@5": 0.4666666666666666, + "eval_dim_768_cosine_recall@1": 0.09726800976800977, + "eval_dim_768_cosine_recall@10": 0.5163308913308913, + "eval_dim_768_cosine_recall@3": 0.2679945054945055, + "eval_dim_768_cosine_recall@5": 0.36729242979242976, + "eval_runtime": 7.0357, + "eval_samples_per_second": 0.0, + "eval_sequential_score": 0.41244392103396355, + "eval_steps_per_second": 0.0, + "step": 7 + }, + { + "epoch": 1.1818181818181819, + "grad_norm": 159.73828125, + "learning_rate": 1.9902680687415704e-05, + "loss": 11.8583, + "step": 8 + }, + { + "epoch": 1.3636363636363638, + "grad_norm": 134.70603942871094, + "learning_rate": 1.9781476007338058e-05, + "loss": 11.9216, + "step": 9 + }, + { + "epoch": 1.5454545454545454, + "grad_norm": 139.73654174804688, + "learning_rate": 1.961261695938319e-05, + "loss": 13.3764, + "step": 10 + }, + { + "epoch": 1.7272727272727273, + "grad_norm": 173.23065185546875, + "learning_rate": 1.9396926207859085e-05, + "loss": 12.9063, + "step": 11 + }, + { + "epoch": 1.9090909090909092, + "grad_norm": 208.78482055664062, + "learning_rate": 1.913545457642601e-05, + "loss": 13.5984, + "step": 12 + }, + { + "epoch": 2.0, + "grad_norm": 298.82183837890625, + "learning_rate": 1.8829475928589272e-05, + "loss": 7.8523, + "step": 13 + }, + { + "epoch": 2.090909090909091, + "grad_norm": 156.5718994140625, + "learning_rate": 1.848048096156426e-05, + "loss": 4.4487, + "step": 14 + }, + { + "epoch": 2.090909090909091, + "eval_dim_1024_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_1024_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_1024_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_1024_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_1024_cosine_map@100": 0.67423207909377, + "eval_dim_1024_cosine_mrr@10": 0.5480725623582765, + "eval_dim_1024_cosine_ndcg@10": 0.5921167294151266, + "eval_dim_1024_cosine_precision@1": 0.5238095238095238, + "eval_dim_1024_cosine_precision@10": 0.4476190476190477, + "eval_dim_1024_cosine_precision@3": 0.5079365079365079, + "eval_dim_1024_cosine_precision@5": 0.47619047619047616, + "eval_dim_1024_cosine_recall@1": 0.08933150183150182, + "eval_dim_1024_cosine_recall@10": 0.5401404151404151, + "eval_dim_1024_cosine_recall@3": 0.24418498168498168, + "eval_dim_1024_cosine_recall@5": 0.33951465201465203, + "eval_dim_128_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_128_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_128_cosine_map@100": 0.6489604480560528, + "eval_dim_128_cosine_mrr@10": 0.5401360544217686, + "eval_dim_128_cosine_ndcg@10": 0.5685354415901852, + "eval_dim_128_cosine_precision@1": 0.5238095238095238, + "eval_dim_128_cosine_precision@10": 0.4238095238095239, + "eval_dim_128_cosine_precision@3": 0.5238095238095238, + "eval_dim_128_cosine_precision@5": 0.5047619047619047, + "eval_dim_128_cosine_recall@1": 0.07345848595848595, + "eval_dim_128_cosine_recall@10": 0.5202991452991453, + "eval_dim_128_cosine_recall@3": 0.2203754578754579, + "eval_dim_128_cosine_recall@5": 0.34745115995116, + "eval_dim_256_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_256_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_256_cosine_map@100": 0.651530364911684, + "eval_dim_256_cosine_mrr@10": 0.5401360544217686, + "eval_dim_256_cosine_ndcg@10": 0.5708936958722651, + "eval_dim_256_cosine_precision@1": 0.5238095238095238, + "eval_dim_256_cosine_precision@10": 0.4238095238095239, + "eval_dim_256_cosine_precision@3": 0.5079365079365079, + "eval_dim_256_cosine_precision@5": 0.49523809523809514, + "eval_dim_256_cosine_recall@1": 0.0813949938949939, + "eval_dim_256_cosine_recall@10": 0.5202991452991453, + "eval_dim_256_cosine_recall@3": 0.22037545787545787, + "eval_dim_256_cosine_recall@5": 0.33951465201465203, + "eval_dim_512_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_512_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_512_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_512_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_512_cosine_map@100": 0.6265911712939339, + "eval_dim_512_cosine_mrr@10": 0.5020408163265305, + "eval_dim_512_cosine_ndcg@10": 0.5518338753600308, + "eval_dim_512_cosine_precision@1": 0.47619047619047616, + "eval_dim_512_cosine_precision@10": 0.4238095238095239, + "eval_dim_512_cosine_precision@3": 0.4603174603174603, + "eval_dim_512_cosine_precision@5": 0.45714285714285713, + "eval_dim_512_cosine_recall@1": 0.07345848595848595, + "eval_dim_512_cosine_recall@10": 0.5202991452991453, + "eval_dim_512_cosine_recall@3": 0.19656593406593406, + "eval_dim_512_cosine_recall@5": 0.3077686202686203, + "eval_dim_64_cosine_accuracy@1": 0.42857142857142855, + "eval_dim_64_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_64_cosine_accuracy@3": 0.42857142857142855, + "eval_dim_64_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_64_cosine_map@100": 0.5888462989137369, + "eval_dim_64_cosine_mrr@10": 0.45963718820861665, + "eval_dim_64_cosine_ndcg@10": 0.51131642091388, + "eval_dim_64_cosine_precision@1": 0.42857142857142855, + "eval_dim_64_cosine_precision@10": 0.3999999999999999, + "eval_dim_64_cosine_precision@3": 0.42857142857142855, + "eval_dim_64_cosine_precision@5": 0.42857142857142855, + "eval_dim_64_cosine_recall@1": 0.053617216117216114, + "eval_dim_64_cosine_recall@10": 0.5004578754578755, + "eval_dim_64_cosine_recall@3": 0.16085164835164836, + "eval_dim_64_cosine_recall@5": 0.27205433455433453, + "eval_dim_768_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_768_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_768_cosine_map@100": 0.67423207909377, + "eval_dim_768_cosine_mrr@10": 0.5480725623582765, + "eval_dim_768_cosine_ndcg@10": 0.5921167294151266, + "eval_dim_768_cosine_precision@1": 0.5238095238095238, + "eval_dim_768_cosine_precision@10": 0.4476190476190477, + "eval_dim_768_cosine_precision@3": 0.5079365079365079, + "eval_dim_768_cosine_precision@5": 0.47619047619047616, + "eval_dim_768_cosine_recall@1": 0.08933150183150182, + "eval_dim_768_cosine_recall@10": 0.5401404151404151, + "eval_dim_768_cosine_recall@3": 0.24418498168498168, + "eval_dim_768_cosine_recall@5": 0.33951465201465203, + "eval_runtime": 6.9723, + "eval_samples_per_second": 0.0, + "eval_sequential_score": 0.51131642091388, + "eval_steps_per_second": 0.0, + "step": 14 + } + ], + "logging_steps": 1, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-14/training_args.bin b/checkpoint-14/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8da3332899cbb380f0ea340bef3e2069d6cf1db2 --- /dev/null +++ b/checkpoint-14/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14debc6c3f8c5edee5db8d97a3a78a007d313a13e4b96f43026da543b59bef8c +size 6097 diff --git a/checkpoint-28/1_Pooling/config.json b/checkpoint-28/1_Pooling/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3f060ee536308b48017dad1a834f306f115695a3 --- /dev/null +++ b/checkpoint-28/1_Pooling/config.json @@ -0,0 +1,10 @@ +{ + "word_embedding_dimension": 1024, + "pooling_mode_cls_token": false, + "pooling_mode_mean_tokens": true, + "pooling_mode_max_tokens": false, + "pooling_mode_mean_sqrt_len_tokens": false, + "pooling_mode_weightedmean_tokens": false, + "pooling_mode_lasttoken": false, + "include_prompt": true +} \ No newline at end of file diff --git a/checkpoint-28/README.md b/checkpoint-28/README.md new file mode 100644 index 0000000000000000000000000000000000000000..31b2fab626a31763fc4d01f9d72052951b67b051 --- /dev/null +++ b/checkpoint-28/README.md @@ -0,0 +1,1651 @@ +--- +language: +- en +license: apache-2.0 +tags: +- sentence-transformers +- sentence-similarity +- feature-extraction +- dense +- generated_from_trainer +- dataset_size:82 +- loss:MatryoshkaLoss +- loss:MultipleNegativesRankingLoss +base_model: intfloat/multilingual-e5-large +widget: +- source_sentence: When did the victims give away credentials? + sentences: + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without it being necessary that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence that is detrimental + to themselves or another; and + + + c) Damage to another person’s property, as defined under civil law, which must + be causally linked to the deceptive acts or omissions of the perpetrator. It is + not required that the person deceived and the person who suffered the damage be + the same individual. + + + The term “facts”, within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts referring to the present or the past, in such a manner as to create + the impression of future fulfillment based on a false present situation fabricated + by the perpetrator, who has already formed the decision not to fulfill their obligation, + the crime of fraud is established. + + + The term “property” refers to the totality of a person’s economic assets that + possess monetary value, while damage to property means its reduction—specifically, + the difference between the monetary value the property had before the disposition + caused by the fraudulent conduct and the value remaining after it. Property damage + exists even if the victim possesses an active claim for restitution. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their fraudulent conduct, namely when they made the false + representations that deceived the victim or a third party. Any subsequent moment + at which the victim’s damage actually occurred—thereby completing the fraud—or + the time when the victim carried out the harmful act or omission, is irrelevant.' + - 'Voice phishing involves manipulating victims over the phone. Attackers pose as + bank officials or authorities and use intimidation to extract financial details. + + + Scenario: + + - Victims are coerced into giving away PINs, passwords, or other credentials under + false pretenses of legal or financial emergencies.' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without requiring that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and performs an act, omission, or acquiescence; and + + + c) Damage to another’s property, according to civil law, which must be causally + connected to the perpetrator’s deceptive acts or omissions. It is not required + that the deceived person and the person who suffered the loss be the same. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts relating to the present or the past, in such a way as to create the + impression of future fulfillment, based on a false present situation fabricated + by the perpetrator—who has already made the decision not to fulfill their obligation—then + the crime of fraud is established. + + + The term “property” denotes the totality of a person’s economic assets possessing + monetary value, while damage to property refers to its reduction—specifically, + the difference between the property’s monetary value before the disposition caused + by the fraudulent conduct and its value afterward. Property damage exists even + if the victim has an active claim for its restitution. + + + The time of commission of fraud is considered to be the moment when the perpetrator + acted and completed the deceptive conduct, that is, when they made the false representations + which deceived the victim or a third party. Any later time at which the victim’s + financial loss occurred—thus completing the fraud—or the time when the harmful + act or omission of the deceived person took place, is irrelevant. + + + The reference to multiple modes of commission of fraud (i.e., both the misrepresentation + of false facts and the concealment of true ones) may create ambiguity and contradiction, + unless it is made clear from the overall findings that the offense was committed + in one particular manner, and that the reference to the other merely serves to + define the intent (mens rea) of the perpetrator—specifically, that the representations + were false. + + + Furthermore, a conviction must contain the specific and well-reasoned justification + required by Articles 93 paragraph 3 of the Constitution and 139 of the Code of + Criminal Procedure. The absence of such reasoning constitutes grounds for cassation + (appeal) under Article 510 paragraph 1(d) of the Code of Criminal Procedure, when + the judgment does not set out, with clarity, completeness, and consistency, the + factual circumstances established by the evidence, upon which the court based + its findings regarding the objective and subjective elements of the offense, the + evidence supporting those findings, and the legal reasoning through which those + facts were subsumed under the applicable substantive criminal provision. + + + For the existence of such reasoning, the explanatory and operative parts of the + decision may complement each other, as they form a single, unified whole. + + + The existence of intent (dolus) does not generally need to be specially justified, + since it is inherent in the will to bring about the factual circumstances constituting + the objective elements of the offense, and it is presumed from their realization + in each particular case—unless the law requires additional elements for criminal + liability, such as the act being committed with knowledge of a specific circumstance + (direct intent) or with the pursuit of a further purpose, i.e., the achievement + of an additional result (offenses requiring a special subjective element). + + + Furthermore, under Article 510 paragraph 1(e) of the Code of Criminal Procedure, + a misapplication of substantive criminal law also constitutes grounds for cassation. + Such misapplication occurs when the trial court incorrectly applies the law to + the facts it has found to be true, or when the violation occurs indirectly, namely + when the reasoning of the judgment—comprising the combination of its factual and + operative parts and relating to the elements and identity of the offense—contains + ambiguities, contradictions, or logical gaps, rendering it impossible to verify, + on appeal, whether the law was applied correctly. In such cases, the judgment + lacks a lawful basis.' +- source_sentence: What must be the outcome of the deception in relation to property + damage? + sentences: + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without requiring that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and performs an act, omission, or acquiescence; and + + + c) Damage to another’s property, according to civil law, which must be causally + connected to the perpetrator’s deceptive acts or omissions. It is not required + that the deceived person and the person who suffered the loss be the same. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts relating to the present or the past, in such a way as to create the + impression of future fulfillment, based on a false present situation fabricated + by the perpetrator—who has already made the decision not to fulfill their obligation—then + the crime of fraud is established. + + + The term “property” denotes the totality of a person’s economic assets possessing + monetary value, while damage to property refers to its reduction—specifically, + the difference between the property’s monetary value before the disposition caused + by the fraudulent conduct and its value afterward. Property damage exists even + if the victim has an active claim for its restitution. + + + The time of commission of fraud is considered to be the moment when the perpetrator + acted and completed the deceptive conduct, that is, when they made the false representations + which deceived the victim or a third party. Any later time at which the victim’s + financial loss occurred—thus completing the fraud—or the time when the harmful + act or omission of the deceived person took place, is irrelevant. + + + The reference to multiple modes of commission of fraud (i.e., both the misrepresentation + of false facts and the concealment of true ones) may create ambiguity and contradiction, + unless it is made clear from the overall findings that the offense was committed + in one particular manner, and that the reference to the other merely serves to + define the intent (mens rea) of the perpetrator—specifically, that the representations + were false. + + + Furthermore, a conviction must contain the specific and well-reasoned justification + required by Articles 93 paragraph 3 of the Constitution and 139 of the Code of + Criminal Procedure. The absence of such reasoning constitutes grounds for cassation + (appeal) under Article 510 paragraph 1(d) of the Code of Criminal Procedure, when + the judgment does not set out, with clarity, completeness, and consistency, the + factual circumstances established by the evidence, upon which the court based + its findings regarding the objective and subjective elements of the offense, the + evidence supporting those findings, and the legal reasoning through which those + facts were subsumed under the applicable substantive criminal provision. + + + For the existence of such reasoning, the explanatory and operative parts of the + decision may complement each other, as they form a single, unified whole. + + + The existence of intent (dolus) does not generally need to be specially justified, + since it is inherent in the will to bring about the factual circumstances constituting + the objective elements of the offense, and it is presumed from their realization + in each particular case—unless the law requires additional elements for criminal + liability, such as the act being committed with knowledge of a specific circumstance + (direct intent) or with the pursuit of a further purpose, i.e., the achievement + of an additional result (offenses requiring a special subjective element). + + + Furthermore, under Article 510 paragraph 1(e) of the Code of Criminal Procedure, + a misapplication of substantive criminal law also constitutes grounds for cassation. + Such misapplication occurs when the trial court incorrectly applies the law to + the facts it has found to be true, or when the violation occurs indirectly, namely + when the reasoning of the judgment—comprising the combination of its factual and + operative parts and relating to the elements and identity of the offense—contains + ambiguities, contradictions, or logical gaps, rendering it impossible to verify, + on appeal, whether the law was applied correctly. In such cases, the judgment + lacks a lawful basis.' + - 'According to Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From these provisions, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence detrimental to themselves + or another; and + + + c) Damage to another’s property, as defined under civil law, which must be causally + connected to the perpetrator’s deceptive acts. + + + From the above provisions, it is deduced that the crime of fraud is established + both objectively and subjectively through the knowing misrepresentation of false + facts as true, or the unlawful concealment or suppression of true ones, by which + another person is deceived and, as a result, performs an act, omission, or acquiescence + involving a disposition of property that directly and necessarily causes financial + damage to the deceived person or another, with the intent that the perpetrator + or another gain an unlawful benefit. It is irrelevant whether this intended benefit + was ultimately achieved. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those expected to occur in the future, + such as mere promises or contractual obligations. The false fact must have existed + in the past or must be a present circumstance at the time it is asserted, and + cannot relate to the future. + + + However, when future circumstances—that is, promises or contractual obligations—are + accompanied by false assurances and representations of other false facts referring + to the present or past, in such a way as to create the impression of future fulfillment, + based on a false present situation or supposed ability of the perpetrator, who + had already made the decision not to fulfill their obligation, then the crime + of fraud is established.' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another person’s property by persuading someone to act, + omit, or tolerate something through the knowing misrepresentation of false facts + as true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision, it follows that for the crime of fraud to be established, + the following elements are required: + + + a) Intent of the perpetrator to obtain for themselves or another an unlawful pecuniary + benefit, regardless of whether this benefit was actually realized; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which, as a causal factor, someone + is deceived and acts in a way that is detrimental to themselves or another (by + an act, omission, or acquiescence); and + + + c) Damage to another’s property, in the sense recognized by civil law, which must + be causally linked to the fraudulent conduct (the deceptive act or omission of + the perpetrator) and to the resulting deception of the person who made the property + disposition. It is not required that the person deceived be the same person who + suffered the damage. + + + Property damage exists when there is a reduction or deterioration in the victim’s + assets, even if the victim has an active claim to restitution. However, as an + element of the objective aspect of the crime of fraud, the damage must be the + direct, necessary, and exclusive result of the property disposition—namely, the + act, omission, or acquiescence performed by the person deceived by the perpetrator’s + fraudulent conduct. + + + There must therefore be a causal connection between the perpetrator’s deceptive + behavior and the deception it caused, as well as between this deception and the + resulting property damage, which must be the direct, necessary, and exclusive + outcome of the deception and of the act, omission, or acquiescence of the deceived + person. + + + The term “facts” refers to real circumstances relating to the past or present, + and not to those expected to occur in the future, such as mere promises or contractual + obligations. However, when such promises or obligations are accompanied by false + assurances and representations of other false facts relating to the present or + the past, in such a way as to create the impression of future fulfillment, based + on the false present situation presented by a perpetrator who has already made + the decision not to fulfill their obligation, then the crime of fraud is established. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their deceptive conduct—that is, when they made the false + representations that deceived the victim or a third party. Any later time at which + the victim’s financial loss actually occurred—thus completing the fraud—or the + time when the deceived person performed the harmful act or omission, is irrelevant.' +- source_sentence: How are victims tricked in email phishing scams? + sentences: + - 'According to Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From these provisions, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence detrimental to themselves + or another; and + + + c) Damage to another’s property, as defined under civil law, which must be causally + connected to the perpetrator’s deceptive acts. + + + From the above provisions, it is deduced that the crime of fraud is established + both objectively and subjectively through the knowing misrepresentation of false + facts as true, or the unlawful concealment or suppression of true ones, by which + another person is deceived and, as a result, performs an act, omission, or acquiescence + involving a disposition of property that directly and necessarily causes financial + damage to the deceived person or another, with the intent that the perpetrator + or another gain an unlawful benefit. It is irrelevant whether this intended benefit + was ultimately achieved. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those expected to occur in the future, + such as mere promises or contractual obligations. The false fact must have existed + in the past or must be a present circumstance at the time it is asserted, and + cannot relate to the future. + + + However, when future circumstances—that is, promises or contractual obligations—are + accompanied by false assurances and representations of other false facts referring + to the present or past, in such a way as to create the impression of future fulfillment, + based on a false present situation or supposed ability of the perpetrator, who + had already made the decision not to fulfill their obligation, then the crime + of fraud is established.' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without it being necessary that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence that is detrimental + to themselves or another; and + + + c) Damage to another person’s property, as defined under civil law, which must + be causally linked to the deceptive acts or omissions of the perpetrator. It is + not required that the person deceived and the person who suffered the damage be + the same individual. + + + The term “facts”, within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts referring to the present or the past, in such a manner as to create + the impression of future fulfillment based on a false present situation fabricated + by the perpetrator, who has already formed the decision not to fulfill their obligation, + the crime of fraud is established. + + + The term “property” refers to the totality of a person’s economic assets that + possess monetary value, while damage to property means its reduction—specifically, + the difference between the monetary value the property had before the disposition + caused by the fraudulent conduct and the value remaining after it. Property damage + exists even if the victim possesses an active claim for restitution. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their fraudulent conduct, namely when they made the false + representations that deceived the victim or a third party. Any subsequent moment + at which the victim’s damage actually occurred—thereby completing the fraud—or + the time when the victim carried out the harmful act or omission, is irrelevant.' + - 'Email phishing is a type of identity theft scam conducted via email or SMS. The + attacker uses social engineering tactics such as impersonating trusted entities + and inducing urgency. Victims are tricked into disclosing personal information + or downloading malware. + + + Scenarios: + + - Scenario 1: Emails impersonating high-ranking executives accuse victims of crimes + to coerce them into revealing information or opening malware-laden attachments. + + - Scenario 2: Emails/SMS from fake banks or authorities alert victims of data + breaches, directing them to spoofed websites to input credentials. + + - Scenario 3: SMS messages deliver disguised malware apps that harvest sensitive + data. + + - Scenario 4: SMS links lead to pharming sites that mimic trusted brands and steal + login data through fake pop-ups.' +- source_sentence: What circumstances do the term 'facts' refer to within the meaning + of the provision? + sentences: + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another person’s property by persuading someone to act, + omit, or tolerate something through the knowing misrepresentation of false facts + as true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision, it follows that for the crime of fraud to be established, + the following elements are required: + + + a) Intent of the perpetrator to obtain for themselves or another an unlawful pecuniary + benefit, regardless of whether this benefit was actually realized; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which, as a causal factor, someone + is deceived and acts in a way that is detrimental to themselves or another (by + an act, omission, or acquiescence); and + + + c) Damage to another’s property, in the sense recognized by civil law, which must + be causally linked to the fraudulent conduct (the deceptive act or omission of + the perpetrator) and to the resulting deception of the person who made the property + disposition. It is not required that the person deceived be the same person who + suffered the damage. + + + Property damage exists when there is a reduction or deterioration in the victim’s + assets, even if the victim has an active claim to restitution. However, as an + element of the objective aspect of the crime of fraud, the damage must be the + direct, necessary, and exclusive result of the property disposition—namely, the + act, omission, or acquiescence performed by the person deceived by the perpetrator’s + fraudulent conduct. + + + There must therefore be a causal connection between the perpetrator’s deceptive + behavior and the deception it caused, as well as between this deception and the + resulting property damage, which must be the direct, necessary, and exclusive + outcome of the deception and of the act, omission, or acquiescence of the deceived + person. + + + The term “facts” refers to real circumstances relating to the past or present, + and not to those expected to occur in the future, such as mere promises or contractual + obligations. However, when such promises or obligations are accompanied by false + assurances and representations of other false facts relating to the present or + the past, in such a way as to create the impression of future fulfillment, based + on the false present situation presented by a perpetrator who has already made + the decision not to fulfill their obligation, then the crime of fraud is established. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their deceptive conduct—that is, when they made the false + representations that deceived the victim or a third party. Any later time at which + the victim’s financial loss actually occurred—thus completing the fraud—or the + time when the deceived person performed the harmful act or omission, is irrelevant.' + - '1. Anyone who, by knowingly presenting false facts as true or by unlawfully concealing + or withholding true facts, damages another person''s property by persuading someone + to act, omission, or tolerance with the aim of obtaining, for themselves or another, + an unlawful financial gain from the damage to that property shall be punished + with imprisonment, "and if the damage caused is particularly great, with imprisonment + of at least three (3) months and a fine." . + + If the damage caused exceeds a total of one hundred and twenty thousand (120,000) + euros, imprisonment of up to ten (10) years and a fine shall be imposed. + + 2. If the fraud is directed directly against the legal entity of the Greek State, + legal entities governed by public law, or local government organizations, and + the damage caused exceeds a total of one hundred and twenty thousand (120,000) + euros, a prison sentence of at least ten (10) years and a fine of up to one thousand + (1,000) daily units shall be imposed. This offense shall be time-barred after + twenty (20) years. + + ' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without it being necessary that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence that is detrimental + to themselves or another; and + + + c) Damage to another person’s property, as defined under civil law, which must + be causally linked to the deceptive acts or omissions of the perpetrator. It is + not required that the person deceived and the person who suffered the damage be + the same individual. + + + The term “facts”, within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts referring to the present or the past, in such a manner as to create + the impression of future fulfillment based on a false present situation fabricated + by the perpetrator, who has already formed the decision not to fulfill their obligation, + the crime of fraud is established. + + + The term “property” refers to the totality of a person’s economic assets that + possess monetary value, while damage to property means its reduction—specifically, + the difference between the monetary value the property had before the disposition + caused by the fraudulent conduct and the value remaining after it. Property damage + exists even if the victim possesses an active claim for restitution. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their fraudulent conduct, namely when they made the false + representations that deceived the victim or a third party. Any subsequent moment + at which the victim’s damage actually occurred—thereby completing the fraud—or + the time when the victim carried out the harmful act or omission, is irrelevant.' +- source_sentence: When is the time of commission of the fraud considered? + sentences: + - 'Spear phishing targets specific individuals or employees within an organization + using personalized, deceptive emails. Unlike mass phishing, these emails are crafted + to seem familiar and urgent. + + + Scenarios: + + - CEO Fraud: Attackers impersonate executives to extract financial or sensitive + data from employees. + + - Whaling: High-ranking executives are targeted using tailored fraud emails that + press for immediate action without verification.' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without it being necessary that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence that is detrimental + to themselves or another; and + + + c) Damage to another person’s property, as defined under civil law, which must + be causally linked to the deceptive acts or omissions of the perpetrator. It is + not required that the person deceived and the person who suffered the damage be + the same individual. + + + The term “facts”, within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts referring to the present or the past, in such a manner as to create + the impression of future fulfillment based on a false present situation fabricated + by the perpetrator, who has already formed the decision not to fulfill their obligation, + the crime of fraud is established. + + + The term “property” refers to the totality of a person’s economic assets that + possess monetary value, while damage to property means its reduction—specifically, + the difference between the monetary value the property had before the disposition + caused by the fraudulent conduct and the value remaining after it. Property damage + exists even if the victim possesses an active claim for restitution. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their fraudulent conduct, namely when they made the false + representations that deceived the victim or a third party. Any subsequent moment + at which the victim’s damage actually occurred—thereby completing the fraud—or + the time when the victim carried out the harmful act or omission, is irrelevant.' + - 'According to Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From these provisions, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence detrimental to themselves + or another; and + + + c) Damage to another’s property, as defined under civil law, which must be causally + connected to the perpetrator’s deceptive acts. + + + From the above provisions, it is deduced that the crime of fraud is established + both objectively and subjectively through the knowing misrepresentation of false + facts as true, or the unlawful concealment or suppression of true ones, by which + another person is deceived and, as a result, performs an act, omission, or acquiescence + involving a disposition of property that directly and necessarily causes financial + damage to the deceived person or another, with the intent that the perpetrator + or another gain an unlawful benefit. It is irrelevant whether this intended benefit + was ultimately achieved. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those expected to occur in the future, + such as mere promises or contractual obligations. The false fact must have existed + in the past or must be a present circumstance at the time it is asserted, and + cannot relate to the future. + + + However, when future circumstances—that is, promises or contractual obligations—are + accompanied by false assurances and representations of other false facts referring + to the present or past, in such a way as to create the impression of future fulfillment, + based on a false present situation or supposed ability of the perpetrator, who + had already made the decision not to fulfill their obligation, then the crime + of fraud is established.' +pipeline_tag: sentence-similarity +library_name: sentence-transformers +metrics: +- cosine_accuracy@1 +- cosine_accuracy@3 +- cosine_accuracy@5 +- cosine_accuracy@10 +- cosine_precision@1 +- cosine_precision@3 +- cosine_precision@5 +- cosine_precision@10 +- cosine_recall@1 +- cosine_recall@3 +- cosine_recall@5 +- cosine_recall@10 +- cosine_ndcg@10 +- cosine_mrr@10 +- cosine_map@100 +model-index: +- name: multilingual_e5_large Finetuned on Data + results: + - task: + type: information-retrieval + name: Information Retrieval + dataset: + name: dim 1024 + type: dim_1024 + metrics: + - type: cosine_accuracy@1 + value: 0.47619047619047616 + name: Cosine Accuracy@1 + - type: cosine_accuracy@3 + value: 0.47619047619047616 + name: Cosine Accuracy@3 + - type: cosine_accuracy@5 + value: 0.47619047619047616 + name: Cosine Accuracy@5 + - type: cosine_accuracy@10 + value: 0.5714285714285714 + name: Cosine Accuracy@10 + - type: cosine_precision@1 + value: 0.47619047619047616 + name: Cosine Precision@1 + - type: cosine_precision@3 + value: 0.4603174603174603 + name: Cosine Precision@3 + - type: cosine_precision@5 + value: 0.419047619047619 + name: Cosine Precision@5 + - type: cosine_precision@10 + value: 0.4 + name: Cosine Precision@10 + - type: cosine_recall@1 + value: 0.07822039072039072 + name: Cosine Recall@1 + - type: cosine_recall@3 + value: 0.21085164835164832 + name: Cosine Recall@3 + - type: cosine_recall@5 + value: 0.27602258852258854 + name: Cosine Recall@5 + - type: cosine_recall@10 + value: 0.4449023199023199 + name: Cosine Recall@10 + - type: cosine_ndcg@10 + value: 0.5159384546892658 + name: Cosine Ndcg@10 + - type: cosine_mrr@10 + value: 0.49092970521541945 + name: Cosine Mrr@10 + - type: cosine_map@100 + value: 0.6149109740313521 + name: Cosine Map@100 + - task: + type: information-retrieval + name: Information Retrieval + dataset: + name: dim 768 + type: dim_768 + metrics: + - type: cosine_accuracy@1 + value: 0.5238095238095238 + name: Cosine Accuracy@1 + - type: cosine_accuracy@3 + value: 0.5238095238095238 + name: Cosine Accuracy@3 + - type: cosine_accuracy@5 + value: 0.5238095238095238 + name: Cosine Accuracy@5 + - type: cosine_accuracy@10 + value: 0.5714285714285714 + name: Cosine Accuracy@10 + - type: cosine_precision@1 + value: 0.5238095238095238 + name: Cosine Precision@1 + - type: cosine_precision@3 + value: 0.5079365079365079 + name: Cosine Precision@3 + - type: cosine_precision@5 + value: 0.4666666666666666 + name: Cosine Precision@5 + - type: cosine_precision@10 + value: 0.4238095238095238 + name: Cosine Precision@10 + - type: cosine_recall@1 + value: 0.08218864468864469 + name: Cosine Recall@1 + - type: cosine_recall@3 + value: 0.22275641025641024 + name: Cosine Recall@3 + - type: cosine_recall@5 + value: 0.2958638583638584 + name: Cosine Recall@5 + - type: cosine_recall@10 + value: 0.46474358974358976 + name: Cosine Recall@10 + - type: cosine_ndcg@10 + value: 0.5468399582764966 + name: Cosine Ndcg@10 + - type: cosine_mrr@10 + value: 0.5306122448979591 + name: Cosine Mrr@10 + - type: cosine_map@100 + value: 0.6351788392177582 + name: Cosine Map@100 + - task: + type: information-retrieval + name: Information Retrieval + dataset: + name: dim 512 + type: dim_512 + metrics: + - type: cosine_accuracy@1 + value: 0.47619047619047616 + name: Cosine Accuracy@1 + - type: cosine_accuracy@3 + value: 0.47619047619047616 + name: Cosine Accuracy@3 + - type: cosine_accuracy@5 + value: 0.47619047619047616 + name: Cosine Accuracy@5 + - type: cosine_accuracy@10 + value: 0.5238095238095238 + name: Cosine Accuracy@10 + - type: cosine_precision@1 + value: 0.47619047619047616 + name: Cosine Precision@1 + - type: cosine_precision@3 + value: 0.4603174603174603 + name: Cosine Precision@3 + - type: cosine_precision@5 + value: 0.419047619047619 + name: Cosine Precision@5 + - type: cosine_precision@10 + value: 0.3761904761904762 + name: Cosine Precision@10 + - type: cosine_recall@1 + value: 0.07822039072039072 + name: Cosine Recall@1 + - type: cosine_recall@3 + value: 0.21085164835164832 + name: Cosine Recall@3 + - type: cosine_recall@5 + value: 0.27602258852258854 + name: Cosine Recall@5 + - type: cosine_recall@10 + value: 0.42506105006105005 + name: Cosine Recall@10 + - type: cosine_ndcg@10 + value: 0.49922091065744895 + name: Cosine Ndcg@10 + - type: cosine_mrr@10 + value: 0.48299319727891155 + name: Cosine Mrr@10 + - type: cosine_map@100 + value: 0.5978106306698094 + name: Cosine Map@100 + - task: + type: information-retrieval + name: Information Retrieval + dataset: + name: dim 256 + type: dim_256 + metrics: + - type: cosine_accuracy@1 + value: 0.5238095238095238 + name: Cosine Accuracy@1 + - type: cosine_accuracy@3 + value: 0.5238095238095238 + name: Cosine Accuracy@3 + - type: cosine_accuracy@5 + value: 0.5238095238095238 + name: Cosine Accuracy@5 + - type: cosine_accuracy@10 + value: 0.5714285714285714 + name: Cosine Accuracy@10 + - type: cosine_precision@1 + value: 0.5238095238095238 + name: Cosine Precision@1 + - type: cosine_precision@3 + value: 0.5079365079365079 + name: Cosine Precision@3 + - type: cosine_precision@5 + value: 0.4666666666666666 + name: Cosine Precision@5 + - type: cosine_precision@10 + value: 0.4238095238095239 + name: Cosine Precision@10 + - type: cosine_recall@1 + value: 0.08005189255189255 + name: Cosine Recall@1 + - type: cosine_recall@3 + value: 0.21634615384615385 + name: Cosine Recall@3 + - type: cosine_recall@5 + value: 0.28518009768009767 + name: Cosine Recall@5 + - type: cosine_recall@10 + value: 0.4433760683760684 + name: Cosine Recall@10 + - type: cosine_ndcg@10 + value: 0.5468399582764966 + name: Cosine Ndcg@10 + - type: cosine_mrr@10 + value: 0.5306122448979591 + name: Cosine Mrr@10 + - type: cosine_map@100 + value: 0.6411393184007045 + name: Cosine Map@100 + - task: + type: information-retrieval + name: Information Retrieval + dataset: + name: dim 128 + type: dim_128 + metrics: + - type: cosine_accuracy@1 + value: 0.47619047619047616 + name: Cosine Accuracy@1 + - type: cosine_accuracy@3 + value: 0.47619047619047616 + name: Cosine Accuracy@3 + - type: cosine_accuracy@5 + value: 0.47619047619047616 + name: Cosine Accuracy@5 + - type: cosine_accuracy@10 + value: 0.5238095238095238 + name: Cosine Accuracy@10 + - type: cosine_precision@1 + value: 0.47619047619047616 + name: Cosine Precision@1 + - type: cosine_precision@3 + value: 0.4603174603174603 + name: Cosine Precision@3 + - type: cosine_precision@5 + value: 0.419047619047619 + name: Cosine Precision@5 + - type: cosine_precision@10 + value: 0.3761904761904762 + name: Cosine Precision@10 + - type: cosine_recall@1 + value: 0.07822039072039072 + name: Cosine Recall@1 + - type: cosine_recall@3 + value: 0.21085164835164832 + name: Cosine Recall@3 + - type: cosine_recall@5 + value: 0.27602258852258854 + name: Cosine Recall@5 + - type: cosine_recall@10 + value: 0.42506105006105005 + name: Cosine Recall@10 + - type: cosine_ndcg@10 + value: 0.49922091065744895 + name: Cosine Ndcg@10 + - type: cosine_mrr@10 + value: 0.48299319727891155 + name: Cosine Mrr@10 + - type: cosine_map@100 + value: 0.6025310247157158 + name: Cosine Map@100 + - task: + type: information-retrieval + name: Information Retrieval + dataset: + name: dim 64 + type: dim_64 + metrics: + - type: cosine_accuracy@1 + value: 0.47619047619047616 + name: Cosine Accuracy@1 + - type: cosine_accuracy@3 + value: 0.47619047619047616 + name: Cosine Accuracy@3 + - type: cosine_accuracy@5 + value: 0.47619047619047616 + name: Cosine Accuracy@5 + - type: cosine_accuracy@10 + value: 0.5238095238095238 + name: Cosine Accuracy@10 + - type: cosine_precision@1 + value: 0.47619047619047616 + name: Cosine Precision@1 + - type: cosine_precision@3 + value: 0.4603174603174603 + name: Cosine Precision@3 + - type: cosine_precision@5 + value: 0.419047619047619 + name: Cosine Precision@5 + - type: cosine_precision@10 + value: 0.3761904761904762 + name: Cosine Precision@10 + - type: cosine_recall@1 + value: 0.07822039072039072 + name: Cosine Recall@1 + - type: cosine_recall@3 + value: 0.21085164835164832 + name: Cosine Recall@3 + - type: cosine_recall@5 + value: 0.27602258852258854 + name: Cosine Recall@5 + - type: cosine_recall@10 + value: 0.42506105006105005 + name: Cosine Recall@10 + - type: cosine_ndcg@10 + value: 0.49922091065744895 + name: Cosine Ndcg@10 + - type: cosine_mrr@10 + value: 0.48299319727891155 + name: Cosine Mrr@10 + - type: cosine_map@100 + value: 0.5960251374266525 + name: Cosine Map@100 +--- + +# multilingual_e5_large Finetuned on Data + +This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more. + +## Model Details + +### Model Description +- **Model Type:** Sentence Transformer +- **Base model:** [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) +- **Maximum Sequence Length:** 512 tokens +- **Output Dimensionality:** 1024 dimensions +- **Similarity Function:** Cosine Similarity + +- **Language:** en +- **License:** apache-2.0 + +### Model Sources + +- **Documentation:** [Sentence Transformers Documentation](https://sbert.net) +- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers) +- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) + +### Full Model Architecture + +``` +SentenceTransformer( + (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'XLMRobertaModel'}) + (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) + (2): Normalize() +) +``` + +## Usage + +### Direct Usage (Sentence Transformers) + +First install the Sentence Transformers library: + +```bash +pip install -U sentence-transformers +``` + +Then you can load this model and run inference. +```python +from sentence_transformers import SentenceTransformer + +# Download from the 🤗 Hub +model = SentenceTransformer("sentence_transformers_model_id") +# Run inference +sentences = [ + 'When is the time of commission of the fraud considered?', + 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code,\n\n"Whoever, with the intent to obtain for themselves or another an unlawful pecuniary benefit, causes damage to another’s property by persuading someone to act, omit, or tolerate something through the knowing misrepresentation of false facts as true, or through the unlawful concealment or suppression of true facts, shall be punished by imprisonment of at least three months, and if the damage caused is particularly large, by imprisonment of at least two years."\n\nFrom this provision it follows that, for the crime of fraud to be established, the following elements are required:\n\na) The intent of the perpetrator to obtain for themselves or another an unlawful pecuniary benefit, without it being necessary that the benefit actually materialize;\n\nb) The knowing misrepresentation of false facts as true, or the unlawful concealment or suppression of true facts, as a result of which—serving as the causal factor—someone is deceived and proceeds to an act, omission, or acquiescence that is detrimental to themselves or another; and\n\nc) Damage to another person’s property, as defined under civil law, which must be causally linked to the deceptive acts or omissions of the perpetrator. It is not required that the person deceived and the person who suffered the damage be the same individual.\n\nThe term “facts”, within the meaning of the above provision, refers to real circumstances relating to the past or present, and not to those that will occur in the future, such as mere promises or contractual obligations. However, when such promises or obligations are accompanied by false assurances and representations of other false facts referring to the present or the past, in such a manner as to create the impression of future fulfillment based on a false present situation fabricated by the perpetrator, who has already formed the decision not to fulfill their obligation, the crime of fraud is established.\n\nThe term “property” refers to the totality of a person’s economic assets that possess monetary value, while damage to property means its reduction—specifically, the difference between the monetary value the property had before the disposition caused by the fraudulent conduct and the value remaining after it. Property damage exists even if the victim possesses an active claim for restitution.\n\nThe time of commission of the fraud is considered to be the moment when the perpetrator acted and completed their fraudulent conduct, namely when they made the false representations that deceived the victim or a third party. Any subsequent moment at which the victim’s damage actually occurred—thereby completing the fraud—or the time when the victim carried out the harmful act or omission, is irrelevant.', + 'Spear phishing targets specific individuals or employees within an organization using personalized, deceptive emails. Unlike mass phishing, these emails are crafted to seem familiar and urgent.\n\nScenarios:\n- CEO Fraud: Attackers impersonate executives to extract financial or sensitive data from employees.\n- Whaling: High-ranking executives are targeted using tailored fraud emails that press for immediate action without verification.', +] +embeddings = model.encode(sentences) +print(embeddings.shape) +# [3, 1024] + +# Get the similarity scores for the embeddings +similarities = model.similarity(embeddings, embeddings) +print(similarities) +# tensor([[1.0000, 0.5637, 0.3101], +# [0.5637, 1.0000, 0.3522], +# [0.3101, 0.3522, 1.0000]]) +``` + + + + + + + +## Evaluation + +### Metrics + +#### Information Retrieval + +* Dataset: `dim_1024` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 1024 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.4762 | +| cosine_accuracy@3 | 0.4762 | +| cosine_accuracy@5 | 0.4762 | +| cosine_accuracy@10 | 0.5714 | +| cosine_precision@1 | 0.4762 | +| cosine_precision@3 | 0.4603 | +| cosine_precision@5 | 0.419 | +| cosine_precision@10 | 0.4 | +| cosine_recall@1 | 0.0782 | +| cosine_recall@3 | 0.2109 | +| cosine_recall@5 | 0.276 | +| cosine_recall@10 | 0.4449 | +| **cosine_ndcg@10** | **0.5159** | +| cosine_mrr@10 | 0.4909 | +| cosine_map@100 | 0.6149 | + +#### Information Retrieval + +* Dataset: `dim_768` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 768 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.5238 | +| cosine_accuracy@3 | 0.5238 | +| cosine_accuracy@5 | 0.5238 | +| cosine_accuracy@10 | 0.5714 | +| cosine_precision@1 | 0.5238 | +| cosine_precision@3 | 0.5079 | +| cosine_precision@5 | 0.4667 | +| cosine_precision@10 | 0.4238 | +| cosine_recall@1 | 0.0822 | +| cosine_recall@3 | 0.2228 | +| cosine_recall@5 | 0.2959 | +| cosine_recall@10 | 0.4647 | +| **cosine_ndcg@10** | **0.5468** | +| cosine_mrr@10 | 0.5306 | +| cosine_map@100 | 0.6352 | + +#### Information Retrieval + +* Dataset: `dim_512` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 512 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.4762 | +| cosine_accuracy@3 | 0.4762 | +| cosine_accuracy@5 | 0.4762 | +| cosine_accuracy@10 | 0.5238 | +| cosine_precision@1 | 0.4762 | +| cosine_precision@3 | 0.4603 | +| cosine_precision@5 | 0.419 | +| cosine_precision@10 | 0.3762 | +| cosine_recall@1 | 0.0782 | +| cosine_recall@3 | 0.2109 | +| cosine_recall@5 | 0.276 | +| cosine_recall@10 | 0.4251 | +| **cosine_ndcg@10** | **0.4992** | +| cosine_mrr@10 | 0.483 | +| cosine_map@100 | 0.5978 | + +#### Information Retrieval + +* Dataset: `dim_256` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 256 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.5238 | +| cosine_accuracy@3 | 0.5238 | +| cosine_accuracy@5 | 0.5238 | +| cosine_accuracy@10 | 0.5714 | +| cosine_precision@1 | 0.5238 | +| cosine_precision@3 | 0.5079 | +| cosine_precision@5 | 0.4667 | +| cosine_precision@10 | 0.4238 | +| cosine_recall@1 | 0.0801 | +| cosine_recall@3 | 0.2163 | +| cosine_recall@5 | 0.2852 | +| cosine_recall@10 | 0.4434 | +| **cosine_ndcg@10** | **0.5468** | +| cosine_mrr@10 | 0.5306 | +| cosine_map@100 | 0.6411 | + +#### Information Retrieval + +* Dataset: `dim_128` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 128 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.4762 | +| cosine_accuracy@3 | 0.4762 | +| cosine_accuracy@5 | 0.4762 | +| cosine_accuracy@10 | 0.5238 | +| cosine_precision@1 | 0.4762 | +| cosine_precision@3 | 0.4603 | +| cosine_precision@5 | 0.419 | +| cosine_precision@10 | 0.3762 | +| cosine_recall@1 | 0.0782 | +| cosine_recall@3 | 0.2109 | +| cosine_recall@5 | 0.276 | +| cosine_recall@10 | 0.4251 | +| **cosine_ndcg@10** | **0.4992** | +| cosine_mrr@10 | 0.483 | +| cosine_map@100 | 0.6025 | + +#### Information Retrieval + +* Dataset: `dim_64` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 64 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.4762 | +| cosine_accuracy@3 | 0.4762 | +| cosine_accuracy@5 | 0.4762 | +| cosine_accuracy@10 | 0.5238 | +| cosine_precision@1 | 0.4762 | +| cosine_precision@3 | 0.4603 | +| cosine_precision@5 | 0.419 | +| cosine_precision@10 | 0.3762 | +| cosine_recall@1 | 0.0782 | +| cosine_recall@3 | 0.2109 | +| cosine_recall@5 | 0.276 | +| cosine_recall@10 | 0.4251 | +| **cosine_ndcg@10** | **0.4992** | +| cosine_mrr@10 | 0.483 | +| cosine_map@100 | 0.596 | + + + + + +## Training Details + +### Training Dataset + +#### Unnamed Dataset + +* Size: 82 training samples +* Columns: anchor and positive +* Approximate statistics based on the first 82 samples: + | | anchor | positive | + |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------| + | type | string | string | + | details |
  • min: 9 tokens
  • mean: 18.17 tokens
  • max: 34 tokens
|
  • min: 69 tokens
  • mean: 399.51 tokens
  • max: 512 tokens
| +* Samples: + | anchor | positive | + |:----------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + | What determines whether the act in question shall be punished if the offender is in the service of the legal holder of the data? | Everyone who obtains access to data recorded in a computer or in the external memory of a computer or transmitted by telecommunication systems shall be punished with imprisonment for up to six months or by a fine from 29 to 15,000 Euro, under the condition that these acts have been committed without right, especially in violation of prohibitions or of security measures taken by the legal holder. If the act concerns the international relations or the security of the State, he shall be punished according to Article 148.
If the offender is in the service of the legal holder of the data, the act of the preceding paragraph shall be punished only if it has been explicitly prohibited by internal regulations or by a written decision of the holder or of a competent employee of his.
| + | What must be causally connected to the perpetrator's deceptive acts? | According to Article 386 paragraph 1 of the Greek Penal Code,

"Whoever, with the intent to obtain for themselves or another an unlawful pecuniary benefit, causes damage to another’s property by persuading someone to act, omit, or tolerate something through the knowing misrepresentation of false facts as true, or through the unlawful concealment or suppression of true facts, shall be punished by imprisonment of at least three months, and if the damage caused is particularly large, by imprisonment of at least two years."

From these provisions, it follows that, for the crime of fraud to be established, the following elements are required:

a) The intent of the perpetrator to obtain for themselves or another an unlawful pecuniary benefit;

b) The knowing misrepresentation of false facts as true, or the unlawful concealment or suppression of true facts, as a result of which—serving as the causal factor—someone is deceived and proceeds to an act, omission, or acquiescence detrimental to th...
| + | Who can be punished with imprisonment? | 1. Anyone who, by knowingly presenting false facts as true or by unlawfully concealing or withholding true facts, damages another person's property by persuading someone to act, omission, or tolerance with the aim of obtaining, for themselves or another, an unlawful financial gain from the damage to that property shall be punished with imprisonment, "and if the damage caused is particularly great, with imprisonment of at least three (3) months and a fine." .
If the damage caused exceeds a total of one hundred and twenty thousand (120,000) euros, imprisonment of up to ten (10) years and a fine shall be imposed.
2. If the fraud is directed directly against the legal entity of the Greek State, legal entities governed by public law, or local government organizations, and the damage caused exceeds a total of one hundred and twenty thousand (120,000) euros, a prison sentence of at least ten (10) years and a fine of up to one thousand (1,000) daily units shall be imposed. This offense shall b...
| +* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: + ```json + { + "loss": "MultipleNegativesRankingLoss", + "matryoshka_dims": [ + 1024, + 768, + 512, + 256, + 128, + 64 + ], + "matryoshka_weights": [ + 1, + 1, + 1, + 1, + 1, + 1 + ], + "n_dims_per_step": -1 + } + ``` + +### Training Hyperparameters +#### Non-Default Hyperparameters + +- `eval_strategy`: epoch +- `gradient_accumulation_steps`: 2 +- `learning_rate`: 2e-05 +- `num_train_epochs`: 10 +- `lr_scheduler_type`: cosine +- `warmup_ratio`: 0.1 +- `bf16`: True +- `tf32`: True +- `load_best_model_at_end`: True +- `optim`: adamw_torch_fused +- `batch_sampler`: no_duplicates + +#### All Hyperparameters +
Click to expand + +- `overwrite_output_dir`: False +- `do_predict`: False +- `eval_strategy`: epoch +- `prediction_loss_only`: True +- `per_device_train_batch_size`: 8 +- `per_device_eval_batch_size`: 8 +- `per_gpu_train_batch_size`: None +- `per_gpu_eval_batch_size`: None +- `gradient_accumulation_steps`: 2 +- `eval_accumulation_steps`: None +- `torch_empty_cache_steps`: None +- `learning_rate`: 2e-05 +- `weight_decay`: 0.0 +- `adam_beta1`: 0.9 +- `adam_beta2`: 0.999 +- `adam_epsilon`: 1e-08 +- `max_grad_norm`: 1.0 +- `num_train_epochs`: 10 +- `max_steps`: -1 +- `lr_scheduler_type`: cosine +- `lr_scheduler_kwargs`: {} +- `warmup_ratio`: 0.1 +- `warmup_steps`: 0 +- `log_level`: passive +- `log_level_replica`: warning +- `log_on_each_node`: True +- `logging_nan_inf_filter`: True +- `save_safetensors`: True +- `save_on_each_node`: False +- `save_only_model`: False +- `restore_callback_states_from_checkpoint`: False +- `no_cuda`: False +- `use_cpu`: False +- `use_mps_device`: False +- `seed`: 42 +- `data_seed`: None +- `jit_mode_eval`: False +- `use_ipex`: False +- `bf16`: True +- `fp16`: False +- `fp16_opt_level`: O1 +- `half_precision_backend`: auto +- `bf16_full_eval`: False +- `fp16_full_eval`: False +- `tf32`: True +- `local_rank`: 0 +- `ddp_backend`: None +- `tpu_num_cores`: None +- `tpu_metrics_debug`: False +- `debug`: [] +- `dataloader_drop_last`: False +- `dataloader_num_workers`: 0 +- `dataloader_prefetch_factor`: None +- `past_index`: -1 +- `disable_tqdm`: False +- `remove_unused_columns`: True +- `label_names`: None +- `load_best_model_at_end`: True +- `ignore_data_skip`: False +- `fsdp`: [] +- `fsdp_min_num_params`: 0 +- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} +- `tp_size`: 0 +- `fsdp_transformer_layer_cls_to_wrap`: None +- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} +- `deepspeed`: None +- `label_smoothing_factor`: 0.0 +- `optim`: adamw_torch_fused +- `optim_args`: None +- `adafactor`: False +- `group_by_length`: False +- `length_column_name`: length +- `ddp_find_unused_parameters`: None +- `ddp_bucket_cap_mb`: None +- `ddp_broadcast_buffers`: False +- `dataloader_pin_memory`: True +- `dataloader_persistent_workers`: False +- `skip_memory_metrics`: True +- `use_legacy_prediction_loop`: False +- `push_to_hub`: False +- `resume_from_checkpoint`: None +- `hub_model_id`: None +- `hub_strategy`: every_save +- `hub_private_repo`: None +- `hub_always_push`: False +- `gradient_checkpointing`: False +- `gradient_checkpointing_kwargs`: None +- `include_inputs_for_metrics`: False +- `include_for_metrics`: [] +- `eval_do_concat_batches`: True +- `fp16_backend`: auto +- `push_to_hub_model_id`: None +- `push_to_hub_organization`: None +- `mp_parameters`: +- `auto_find_batch_size`: False +- `full_determinism`: False +- `torchdynamo`: None +- `ray_scope`: last +- `ddp_timeout`: 1800 +- `torch_compile`: False +- `torch_compile_backend`: None +- `torch_compile_mode`: None +- `include_tokens_per_second`: False +- `include_num_input_tokens_seen`: False +- `neftune_noise_alpha`: None +- `optim_target_modules`: None +- `batch_eval_metrics`: False +- `eval_on_start`: False +- `use_liger_kernel`: False +- `eval_use_gather_object`: False +- `average_tokens_across_devices`: False +- `prompts`: None +- `batch_sampler`: no_duplicates +- `multi_dataset_batch_sampler`: proportional +- `router_mapping`: {} +- `learning_rate_mapping`: {} + +
+ +### Training Logs +| Epoch | Step | Training Loss | dim_1024_cosine_ndcg@10 | dim_768_cosine_ndcg@10 | dim_512_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_64_cosine_ndcg@10 | +|:------:|:----:|:-------------:|:-----------------------:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:| +| 0.1818 | 1 | 18.029 | - | - | - | - | - | - | +| 0.3636 | 2 | 19.4106 | - | - | - | - | - | - | +| 0.5455 | 3 | 16.6201 | - | - | - | - | - | - | +| 0.7273 | 4 | 15.3048 | - | - | - | - | - | - | +| 0.9091 | 5 | 14.0182 | - | - | - | - | - | - | +| 1.0 | 6 | 6.4771 | - | - | - | - | - | - | +| 1.0909 | 7 | 6.7664 | 0.6167 | 0.5821 | 0.5524 | 0.5177 | 0.5278 | 0.4124 | +| 1.1818 | 8 | 11.8583 | - | - | - | - | - | - | +| 1.3636 | 9 | 11.9216 | - | - | - | - | - | - | +| 1.5455 | 10 | 13.3764 | - | - | - | - | - | - | +| 1.7273 | 11 | 12.9063 | - | - | - | - | - | - | +| 1.9091 | 12 | 13.5984 | - | - | - | - | - | - | +| 2.0 | 13 | 7.8523 | - | - | - | - | - | - | +| 2.0909 | 14 | 4.4487 | 0.5921 | 0.5921 | 0.5518 | 0.5709 | 0.5685 | 0.5113 | +| 2.1818 | 15 | 8.5374 | - | - | - | - | - | - | +| 2.3636 | 16 | 9.6999 | - | - | - | - | - | - | +| 2.5455 | 17 | 9.0121 | - | - | - | - | - | - | +| 2.7273 | 18 | 13.5705 | - | - | - | - | - | - | +| 2.9091 | 19 | 13.0195 | - | - | - | - | - | - | +| 3.0 | 20 | 7.9821 | - | - | - | - | - | - | +| 3.0909 | 21 | 3.2842 | 0.5159 | 0.5636 | 0.5468 | 0.5468 | 0.5468 | 0.5233 | +| 3.1818 | 22 | 4.4446 | - | - | - | - | - | - | +| 3.3636 | 23 | 5.7244 | - | - | - | - | - | - | +| 3.5455 | 24 | 7.1394 | - | - | - | - | - | - | +| 3.7273 | 25 | 16.7583 | - | - | - | - | - | - | +| 3.9091 | 26 | 11.3515 | - | - | - | - | - | - | +| 4.0 | 27 | 8.813 | - | - | - | - | - | - | +| 4.0909 | 28 | 6.9124 | 0.5159 | 0.5468 | 0.4992 | 0.5468 | 0.4992 | 0.4992 | + + +### Framework Versions +- Python: 3.12.12 +- Sentence Transformers: 5.1.1 +- Transformers: 4.51.3 +- PyTorch: 2.8.0+cu126 +- Accelerate: 1.11.0 +- Datasets: 4.0.0 +- Tokenizers: 0.21.4 + +## Citation + +### BibTeX + +#### Sentence Transformers +```bibtex +@inproceedings{reimers-2019-sentence-bert, + title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", + author = "Reimers, Nils and Gurevych, Iryna", + booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", + month = "11", + year = "2019", + publisher = "Association for Computational Linguistics", + url = "https://arxiv.org/abs/1908.10084", +} +``` + +#### MatryoshkaLoss +```bibtex +@misc{kusupati2024matryoshka, + title={Matryoshka Representation Learning}, + author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi}, + year={2024}, + eprint={2205.13147}, + archivePrefix={arXiv}, + primaryClass={cs.LG} +} +``` + +#### MultipleNegativesRankingLoss +```bibtex +@misc{henderson2017efficient, + title={Efficient Natural Language Response Suggestion for Smart Reply}, + author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil}, + year={2017}, + eprint={1705.00652}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` + + + + + + \ No newline at end of file diff --git a/checkpoint-28/config.json b/checkpoint-28/config.json new file mode 100644 index 0000000000000000000000000000000000000000..93f46b2fc5d0e832be8338cb0ce9f09ae81bc1a2 --- /dev/null +++ b/checkpoint-28/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "XLMRobertaModel" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 16, + "num_hidden_layers": 24, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.51.3", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/checkpoint-28/config_sentence_transformers.json b/checkpoint-28/config_sentence_transformers.json new file mode 100644 index 0000000000000000000000000000000000000000..a0390e7f92afe1fdc0e00e1712f50de56290fde6 --- /dev/null +++ b/checkpoint-28/config_sentence_transformers.json @@ -0,0 +1,14 @@ +{ + "model_type": "SentenceTransformer", + "__version__": { + "sentence_transformers": "5.1.1", + "transformers": "4.51.3", + "pytorch": "2.8.0+cu126" + }, + "prompts": { + "query": "", + "document": "" + }, + "default_prompt_name": null, + "similarity_fn_name": "cosine" +} \ No newline at end of file diff --git a/checkpoint-28/model.safetensors b/checkpoint-28/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22d53400c053fdce8488c413d04e6e5d098d0dc8 --- /dev/null +++ b/checkpoint-28/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f97fcbc0e7a951f68c8432dcdd5f2ecd96d044d3c202e0790218b87ffa9a91d +size 2239607176 diff --git a/checkpoint-28/modules.json b/checkpoint-28/modules.json new file mode 100644 index 0000000000000000000000000000000000000000..952a9b81c0bfd99800fabf352f69c7ccd46c5e43 --- /dev/null +++ b/checkpoint-28/modules.json @@ -0,0 +1,20 @@ +[ + { + "idx": 0, + "name": "0", + "path": "", + "type": "sentence_transformers.models.Transformer" + }, + { + "idx": 1, + "name": "1", + "path": "1_Pooling", + "type": "sentence_transformers.models.Pooling" + }, + { + "idx": 2, + "name": "2", + "path": "2_Normalize", + "type": "sentence_transformers.models.Normalize" + } +] \ No newline at end of file diff --git a/checkpoint-28/optimizer.pt b/checkpoint-28/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bdadf6c1ec849cf74e9485ca21b001d6973cf21 --- /dev/null +++ b/checkpoint-28/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc4a1e358287ce2ca3e23fd5b7842322992a275fb872bc5b97cea500ed439d2c +size 4471067142 diff --git a/checkpoint-28/rng_state.pth b/checkpoint-28/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e576127b6b1d4f6737d1415f9fedcf3c833d3723 --- /dev/null +++ b/checkpoint-28/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3171c785711284de5dd361b74d324b51f22ae04ab6c679c768dfe351168dbec4 +size 14645 diff --git a/checkpoint-28/scheduler.pt b/checkpoint-28/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..72ba4bd9239d78b052058568712767e360d22baf --- /dev/null +++ b/checkpoint-28/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b4fb22f017dc0d2e21c4ae1cf21c451d5bfd5c49a6b6e315bfff5d4ae75a1d +size 1465 diff --git a/checkpoint-28/sentence_bert_config.json b/checkpoint-28/sentence_bert_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4eca68d85ecd3034cf4174d8a4033a75344ea62d --- /dev/null +++ b/checkpoint-28/sentence_bert_config.json @@ -0,0 +1,4 @@ +{ + "max_seq_length": 512, + "do_lower_case": false +} \ No newline at end of file diff --git a/checkpoint-28/sentencepiece.bpe.model b/checkpoint-28/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-28/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-28/special_tokens_map.json b/checkpoint-28/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b1879d702821e753ffe4245048eee415d54a9385 --- /dev/null +++ b/checkpoint-28/special_tokens_map.json @@ -0,0 +1,51 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-28/tokenizer.json b/checkpoint-28/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..2a51933f1ccb3cf68d53b877cbfa24734ada642f --- /dev/null +++ b/checkpoint-28/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085 +size 17082987 diff --git a/checkpoint-28/tokenizer_config.json b/checkpoint-28/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff4390b993a1a04254ff2e21bc6bee199cd6e32 --- /dev/null +++ b/checkpoint-28/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/checkpoint-28/trainer_state.json b/checkpoint-28/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c7a853d7fc37ce00345e360f00b016a6f965a347 --- /dev/null +++ b/checkpoint-28/trainer_state.json @@ -0,0 +1,631 @@ +{ + "best_global_step": 14, + "best_metric": 0.5685354415901852, + "best_model_checkpoint": "multilingual-e5-large/checkpoint-14", + "epoch": 4.090909090909091, + "eval_steps": 500, + "global_step": 28, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.18181818181818182, + "grad_norm": 135.6220703125, + "learning_rate": 0.0, + "loss": 18.029, + "step": 1 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 140.5317840576172, + "learning_rate": 4.000000000000001e-06, + "loss": 19.4106, + "step": 2 + }, + { + "epoch": 0.5454545454545454, + "grad_norm": 142.2399444580078, + "learning_rate": 8.000000000000001e-06, + "loss": 16.6201, + "step": 3 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 110.63945770263672, + "learning_rate": 1.2e-05, + "loss": 15.3048, + "step": 4 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 129.43089294433594, + "learning_rate": 1.6000000000000003e-05, + "loss": 14.0182, + "step": 5 + }, + { + "epoch": 1.0, + "grad_norm": 108.25520324707031, + "learning_rate": 2e-05, + "loss": 6.4771, + "step": 6 + }, + { + "epoch": 1.0909090909090908, + "grad_norm": 95.05782318115234, + "learning_rate": 1.9975640502598243e-05, + "loss": 6.7664, + "step": 7 + }, + { + "epoch": 1.0909090909090908, + "eval_dim_1024_cosine_accuracy@1": 0.5714285714285714, + "eval_dim_1024_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_1024_cosine_accuracy@3": 0.5714285714285714, + "eval_dim_1024_cosine_accuracy@5": 0.6190476190476191, + "eval_dim_1024_cosine_map@100": 0.6946498352795571, + "eval_dim_1024_cosine_mrr@10": 0.5888888888888888, + "eval_dim_1024_cosine_ndcg@10": 0.6167053425672016, + "eval_dim_1024_cosine_precision@1": 0.5714285714285714, + "eval_dim_1024_cosine_precision@10": 0.4476190476190477, + "eval_dim_1024_cosine_precision@3": 0.5555555555555556, + "eval_dim_1024_cosine_precision@5": 0.5142857142857142, + "eval_dim_1024_cosine_recall@1": 0.10123626373626372, + "eval_dim_1024_cosine_recall@10": 0.5401404151404151, + "eval_dim_1024_cosine_recall@3": 0.2798992673992674, + "eval_dim_1024_cosine_recall@5": 0.3871336996336997, + "eval_dim_128_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_128_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_128_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_128_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_128_cosine_map@100": 0.5954946117864774, + "eval_dim_128_cosine_mrr@10": 0.49251700680272104, + "eval_dim_128_cosine_ndcg@10": 0.5277801377642622, + "eval_dim_128_cosine_precision@1": 0.47619047619047616, + "eval_dim_128_cosine_precision@10": 0.3761904761904762, + "eval_dim_128_cosine_precision@3": 0.4603174603174603, + "eval_dim_128_cosine_precision@5": 0.42857142857142855, + "eval_dim_128_cosine_recall@1": 0.08536324786324785, + "eval_dim_128_cosine_recall@10": 0.4806166056166057, + "eval_dim_128_cosine_recall@3": 0.23228021978021982, + "eval_dim_128_cosine_recall@5": 0.31967338217338215, + "eval_dim_256_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_256_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_256_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_256_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_256_cosine_map@100": 0.6218563539505393, + "eval_dim_256_cosine_mrr@10": 0.4925170068027211, + "eval_dim_256_cosine_ndcg@10": 0.5177339964253599, + "eval_dim_256_cosine_precision@1": 0.47619047619047616, + "eval_dim_256_cosine_precision@10": 0.3476190476190476, + "eval_dim_256_cosine_precision@3": 0.4603174603174603, + "eval_dim_256_cosine_precision@5": 0.419047619047619, + "eval_dim_256_cosine_recall@1": 0.09543650793650793, + "eval_dim_256_cosine_recall@10": 0.47817460317460325, + "eval_dim_256_cosine_recall@3": 0.2625, + "eval_dim_256_cosine_recall@5": 0.35813492063492064, + "eval_dim_512_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_512_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_512_cosine_map@100": 0.6608463504287858, + "eval_dim_512_cosine_mrr@10": 0.5333333333333333, + "eval_dim_512_cosine_ndcg@10": 0.5523687509163372, + "eval_dim_512_cosine_precision@1": 0.5238095238095238, + "eval_dim_512_cosine_precision@10": 0.3761904761904762, + "eval_dim_512_cosine_precision@3": 0.5079365079365079, + "eval_dim_512_cosine_precision@5": 0.4666666666666666, + "eval_dim_512_cosine_recall@1": 0.0994047619047619, + "eval_dim_512_cosine_recall@10": 0.5019841269841271, + "eval_dim_512_cosine_recall@3": 0.2744047619047619, + "eval_dim_512_cosine_recall@5": 0.37797619047619047, + "eval_dim_64_cosine_accuracy@1": 0.3333333333333333, + "eval_dim_64_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_64_cosine_accuracy@3": 0.38095238095238093, + "eval_dim_64_cosine_accuracy@5": 0.38095238095238093, + "eval_dim_64_cosine_map@100": 0.5579595177809107, + "eval_dim_64_cosine_mrr@10": 0.3764172335600907, + "eval_dim_64_cosine_ndcg@10": 0.41244392103396355, + "eval_dim_64_cosine_precision@1": 0.3333333333333333, + "eval_dim_64_cosine_precision@10": 0.2619047619047619, + "eval_dim_64_cosine_precision@3": 0.3492063492063492, + "eval_dim_64_cosine_precision@5": 0.3142857142857143, + "eval_dim_64_cosine_recall@1": 0.07063492063492063, + "eval_dim_64_cosine_recall@10": 0.42002442002442003, + "eval_dim_64_cosine_recall@3": 0.2357142857142857, + "eval_dim_64_cosine_recall@5": 0.34523809523809523, + "eval_dim_768_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_768_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_768_cosine_map@100": 0.6727109022414055, + "eval_dim_768_cosine_mrr@10": 0.5480725623582765, + "eval_dim_768_cosine_ndcg@10": 0.5820705880762242, + "eval_dim_768_cosine_precision@1": 0.5238095238095238, + "eval_dim_768_cosine_precision@10": 0.41904761904761906, + "eval_dim_768_cosine_precision@3": 0.5079365079365079, + "eval_dim_768_cosine_precision@5": 0.4666666666666666, + "eval_dim_768_cosine_recall@1": 0.09726800976800977, + "eval_dim_768_cosine_recall@10": 0.5163308913308913, + "eval_dim_768_cosine_recall@3": 0.2679945054945055, + "eval_dim_768_cosine_recall@5": 0.36729242979242976, + "eval_runtime": 7.0357, + "eval_samples_per_second": 0.0, + "eval_sequential_score": 0.41244392103396355, + "eval_steps_per_second": 0.0, + "step": 7 + }, + { + "epoch": 1.1818181818181819, + "grad_norm": 159.73828125, + "learning_rate": 1.9902680687415704e-05, + "loss": 11.8583, + "step": 8 + }, + { + "epoch": 1.3636363636363638, + "grad_norm": 134.70603942871094, + "learning_rate": 1.9781476007338058e-05, + "loss": 11.9216, + "step": 9 + }, + { + "epoch": 1.5454545454545454, + "grad_norm": 139.73654174804688, + "learning_rate": 1.961261695938319e-05, + "loss": 13.3764, + "step": 10 + }, + { + "epoch": 1.7272727272727273, + "grad_norm": 173.23065185546875, + "learning_rate": 1.9396926207859085e-05, + "loss": 12.9063, + "step": 11 + }, + { + "epoch": 1.9090909090909092, + "grad_norm": 208.78482055664062, + "learning_rate": 1.913545457642601e-05, + "loss": 13.5984, + "step": 12 + }, + { + "epoch": 2.0, + "grad_norm": 298.82183837890625, + "learning_rate": 1.8829475928589272e-05, + "loss": 7.8523, + "step": 13 + }, + { + "epoch": 2.090909090909091, + "grad_norm": 156.5718994140625, + "learning_rate": 1.848048096156426e-05, + "loss": 4.4487, + "step": 14 + }, + { + "epoch": 2.090909090909091, + "eval_dim_1024_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_1024_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_1024_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_1024_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_1024_cosine_map@100": 0.67423207909377, + "eval_dim_1024_cosine_mrr@10": 0.5480725623582765, + "eval_dim_1024_cosine_ndcg@10": 0.5921167294151266, + "eval_dim_1024_cosine_precision@1": 0.5238095238095238, + "eval_dim_1024_cosine_precision@10": 0.4476190476190477, + "eval_dim_1024_cosine_precision@3": 0.5079365079365079, + "eval_dim_1024_cosine_precision@5": 0.47619047619047616, + "eval_dim_1024_cosine_recall@1": 0.08933150183150182, + "eval_dim_1024_cosine_recall@10": 0.5401404151404151, + "eval_dim_1024_cosine_recall@3": 0.24418498168498168, + "eval_dim_1024_cosine_recall@5": 0.33951465201465203, + "eval_dim_128_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_128_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_128_cosine_map@100": 0.6489604480560528, + "eval_dim_128_cosine_mrr@10": 0.5401360544217686, + "eval_dim_128_cosine_ndcg@10": 0.5685354415901852, + "eval_dim_128_cosine_precision@1": 0.5238095238095238, + "eval_dim_128_cosine_precision@10": 0.4238095238095239, + "eval_dim_128_cosine_precision@3": 0.5238095238095238, + "eval_dim_128_cosine_precision@5": 0.5047619047619047, + "eval_dim_128_cosine_recall@1": 0.07345848595848595, + "eval_dim_128_cosine_recall@10": 0.5202991452991453, + "eval_dim_128_cosine_recall@3": 0.2203754578754579, + "eval_dim_128_cosine_recall@5": 0.34745115995116, + "eval_dim_256_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_256_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_256_cosine_map@100": 0.651530364911684, + "eval_dim_256_cosine_mrr@10": 0.5401360544217686, + "eval_dim_256_cosine_ndcg@10": 0.5708936958722651, + "eval_dim_256_cosine_precision@1": 0.5238095238095238, + "eval_dim_256_cosine_precision@10": 0.4238095238095239, + "eval_dim_256_cosine_precision@3": 0.5079365079365079, + "eval_dim_256_cosine_precision@5": 0.49523809523809514, + "eval_dim_256_cosine_recall@1": 0.0813949938949939, + "eval_dim_256_cosine_recall@10": 0.5202991452991453, + "eval_dim_256_cosine_recall@3": 0.22037545787545787, + "eval_dim_256_cosine_recall@5": 0.33951465201465203, + "eval_dim_512_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_512_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_512_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_512_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_512_cosine_map@100": 0.6265911712939339, + "eval_dim_512_cosine_mrr@10": 0.5020408163265305, + "eval_dim_512_cosine_ndcg@10": 0.5518338753600308, + "eval_dim_512_cosine_precision@1": 0.47619047619047616, + "eval_dim_512_cosine_precision@10": 0.4238095238095239, + "eval_dim_512_cosine_precision@3": 0.4603174603174603, + "eval_dim_512_cosine_precision@5": 0.45714285714285713, + "eval_dim_512_cosine_recall@1": 0.07345848595848595, + "eval_dim_512_cosine_recall@10": 0.5202991452991453, + "eval_dim_512_cosine_recall@3": 0.19656593406593406, + "eval_dim_512_cosine_recall@5": 0.3077686202686203, + "eval_dim_64_cosine_accuracy@1": 0.42857142857142855, + "eval_dim_64_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_64_cosine_accuracy@3": 0.42857142857142855, + "eval_dim_64_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_64_cosine_map@100": 0.5888462989137369, + "eval_dim_64_cosine_mrr@10": 0.45963718820861665, + "eval_dim_64_cosine_ndcg@10": 0.51131642091388, + "eval_dim_64_cosine_precision@1": 0.42857142857142855, + "eval_dim_64_cosine_precision@10": 0.3999999999999999, + "eval_dim_64_cosine_precision@3": 0.42857142857142855, + "eval_dim_64_cosine_precision@5": 0.42857142857142855, + "eval_dim_64_cosine_recall@1": 0.053617216117216114, + "eval_dim_64_cosine_recall@10": 0.5004578754578755, + "eval_dim_64_cosine_recall@3": 0.16085164835164836, + "eval_dim_64_cosine_recall@5": 0.27205433455433453, + "eval_dim_768_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_768_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_768_cosine_map@100": 0.67423207909377, + "eval_dim_768_cosine_mrr@10": 0.5480725623582765, + "eval_dim_768_cosine_ndcg@10": 0.5921167294151266, + "eval_dim_768_cosine_precision@1": 0.5238095238095238, + "eval_dim_768_cosine_precision@10": 0.4476190476190477, + "eval_dim_768_cosine_precision@3": 0.5079365079365079, + "eval_dim_768_cosine_precision@5": 0.47619047619047616, + "eval_dim_768_cosine_recall@1": 0.08933150183150182, + "eval_dim_768_cosine_recall@10": 0.5401404151404151, + "eval_dim_768_cosine_recall@3": 0.24418498168498168, + "eval_dim_768_cosine_recall@5": 0.33951465201465203, + "eval_runtime": 6.9723, + "eval_samples_per_second": 0.0, + "eval_sequential_score": 0.51131642091388, + "eval_steps_per_second": 0.0, + "step": 14 + }, + { + "epoch": 2.1818181818181817, + "grad_norm": 156.31887817382812, + "learning_rate": 1.8090169943749477e-05, + "loss": 8.5374, + "step": 15 + }, + { + "epoch": 2.3636363636363638, + "grad_norm": 205.52923583984375, + "learning_rate": 1.766044443118978e-05, + "loss": 9.6999, + "step": 16 + }, + { + "epoch": 2.5454545454545454, + "grad_norm": 254.7772216796875, + "learning_rate": 1.7193398003386514e-05, + "loss": 9.0121, + "step": 17 + }, + { + "epoch": 2.7272727272727275, + "grad_norm": 476.75164794921875, + "learning_rate": 1.6691306063588583e-05, + "loss": 13.5705, + "step": 18 + }, + { + "epoch": 2.909090909090909, + "grad_norm": 376.09954833984375, + "learning_rate": 1.6156614753256583e-05, + "loss": 13.0195, + "step": 19 + }, + { + "epoch": 3.0, + "grad_norm": 387.24322509765625, + "learning_rate": 1.5591929034707468e-05, + "loss": 7.9821, + "step": 20 + }, + { + "epoch": 3.090909090909091, + "grad_norm": 210.42813110351562, + "learning_rate": 1.5000000000000002e-05, + "loss": 3.2842, + "step": 21 + }, + { + "epoch": 3.090909090909091, + "eval_dim_1024_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_1024_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_1024_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_1024_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_1024_cosine_map@100": 0.616370861587526, + "eval_dim_1024_cosine_mrr@10": 0.49092970521541945, + "eval_dim_1024_cosine_ndcg@10": 0.5159384546892658, + "eval_dim_1024_cosine_precision@1": 0.47619047619047616, + "eval_dim_1024_cosine_precision@10": 0.4, + "eval_dim_1024_cosine_precision@3": 0.4603174603174603, + "eval_dim_1024_cosine_precision@5": 0.419047619047619, + "eval_dim_1024_cosine_recall@1": 0.07822039072039072, + "eval_dim_1024_cosine_recall@10": 0.4449023199023199, + "eval_dim_1024_cosine_recall@3": 0.21085164835164832, + "eval_dim_1024_cosine_recall@5": 0.27602258852258854, + "eval_dim_128_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_128_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_128_cosine_map@100": 0.6278310491545506, + "eval_dim_128_cosine_mrr@10": 0.5306122448979591, + "eval_dim_128_cosine_ndcg@10": 0.5468399582764966, + "eval_dim_128_cosine_precision@1": 0.5238095238095238, + "eval_dim_128_cosine_precision@10": 0.4238095238095239, + "eval_dim_128_cosine_precision@3": 0.5079365079365079, + "eval_dim_128_cosine_precision@5": 0.4666666666666666, + "eval_dim_128_cosine_recall@1": 0.0811965811965812, + "eval_dim_128_cosine_recall@10": 0.4548229548229548, + "eval_dim_128_cosine_recall@3": 0.21978021978021975, + "eval_dim_128_cosine_recall@5": 0.2909035409035409, + "eval_dim_256_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_256_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_256_cosine_map@100": 0.6295752260160459, + "eval_dim_256_cosine_mrr@10": 0.5306122448979591, + "eval_dim_256_cosine_ndcg@10": 0.5468399582764966, + "eval_dim_256_cosine_precision@1": 0.5238095238095238, + "eval_dim_256_cosine_precision@10": 0.4238095238095239, + "eval_dim_256_cosine_precision@3": 0.5079365079365079, + "eval_dim_256_cosine_precision@5": 0.4666666666666666, + "eval_dim_256_cosine_recall@1": 0.0811965811965812, + "eval_dim_256_cosine_recall@10": 0.4548229548229548, + "eval_dim_256_cosine_recall@3": 0.21978021978021975, + "eval_dim_256_cosine_recall@5": 0.2909035409035409, + "eval_dim_512_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_512_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_512_cosine_map@100": 0.6298362122328386, + "eval_dim_512_cosine_mrr@10": 0.5306122448979591, + "eval_dim_512_cosine_ndcg@10": 0.5468399582764966, + "eval_dim_512_cosine_precision@1": 0.5238095238095238, + "eval_dim_512_cosine_precision@10": 0.4238095238095239, + "eval_dim_512_cosine_precision@3": 0.5079365079365079, + "eval_dim_512_cosine_precision@5": 0.4666666666666666, + "eval_dim_512_cosine_recall@1": 0.0811965811965812, + "eval_dim_512_cosine_recall@10": 0.4548229548229548, + "eval_dim_512_cosine_recall@3": 0.21978021978021975, + "eval_dim_512_cosine_recall@5": 0.2909035409035409, + "eval_dim_64_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_64_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_64_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_64_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_64_cosine_map@100": 0.6058793096030948, + "eval_dim_64_cosine_mrr@10": 0.49251700680272104, + "eval_dim_64_cosine_ndcg@10": 0.5232746482532176, + "eval_dim_64_cosine_precision@1": 0.47619047619047616, + "eval_dim_64_cosine_precision@10": 0.4238095238095238, + "eval_dim_64_cosine_precision@3": 0.4603174603174603, + "eval_dim_64_cosine_precision@5": 0.4476190476190475, + "eval_dim_64_cosine_recall@1": 0.06631562881562882, + "eval_dim_64_cosine_recall@10": 0.46474358974358976, + "eval_dim_64_cosine_recall@3": 0.17513736263736263, + "eval_dim_64_cosine_recall@5": 0.2641178266178266, + "eval_dim_768_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_768_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_768_cosine_map@100": 0.6486454426450273, + "eval_dim_768_cosine_mrr@10": 0.538548752834467, + "eval_dim_768_cosine_ndcg@10": 0.5635575023083134, + "eval_dim_768_cosine_precision@1": 0.5238095238095238, + "eval_dim_768_cosine_precision@10": 0.4476190476190477, + "eval_dim_768_cosine_precision@3": 0.5079365079365079, + "eval_dim_768_cosine_precision@5": 0.4666666666666666, + "eval_dim_768_cosine_recall@1": 0.0811965811965812, + "eval_dim_768_cosine_recall@10": 0.47466422466422464, + "eval_dim_768_cosine_recall@3": 0.21978021978021975, + "eval_dim_768_cosine_recall@5": 0.2909035409035409, + "eval_runtime": 7.0685, + "eval_samples_per_second": 0.0, + "eval_sequential_score": 0.5232746482532176, + "eval_steps_per_second": 0.0, + "step": 21 + }, + { + "epoch": 3.1818181818181817, + "grad_norm": 214.07061767578125, + "learning_rate": 1.4383711467890776e-05, + "loss": 4.4446, + "step": 22 + }, + { + "epoch": 3.3636363636363638, + "grad_norm": 234.80874633789062, + "learning_rate": 1.3746065934159123e-05, + "loss": 5.7244, + "step": 23 + }, + { + "epoch": 3.5454545454545454, + "grad_norm": 337.2349853515625, + "learning_rate": 1.3090169943749475e-05, + "loss": 7.1394, + "step": 24 + }, + { + "epoch": 3.7272727272727275, + "grad_norm": 562.9892578125, + "learning_rate": 1.2419218955996677e-05, + "loss": 16.7583, + "step": 25 + }, + { + "epoch": 3.909090909090909, + "grad_norm": 1042.4781494140625, + "learning_rate": 1.1736481776669307e-05, + "loss": 11.3515, + "step": 26 + }, + { + "epoch": 4.0, + "grad_norm": 663.6896362304688, + "learning_rate": 1.1045284632676535e-05, + "loss": 8.813, + "step": 27 + }, + { + "epoch": 4.090909090909091, + "grad_norm": 799.2377319335938, + "learning_rate": 1.0348994967025012e-05, + "loss": 6.9124, + "step": 28 + }, + { + "epoch": 4.090909090909091, + "eval_dim_1024_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_1024_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_1024_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_1024_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_1024_cosine_map@100": 0.6149109740313521, + "eval_dim_1024_cosine_mrr@10": 0.49092970521541945, + "eval_dim_1024_cosine_ndcg@10": 0.5159384546892658, + "eval_dim_1024_cosine_precision@1": 0.47619047619047616, + "eval_dim_1024_cosine_precision@10": 0.4, + "eval_dim_1024_cosine_precision@3": 0.4603174603174603, + "eval_dim_1024_cosine_precision@5": 0.419047619047619, + "eval_dim_1024_cosine_recall@1": 0.07822039072039072, + "eval_dim_1024_cosine_recall@10": 0.4449023199023199, + "eval_dim_1024_cosine_recall@3": 0.21085164835164832, + "eval_dim_1024_cosine_recall@5": 0.27602258852258854, + "eval_dim_128_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_128_cosine_accuracy@10": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_128_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_128_cosine_map@100": 0.6025310247157158, + "eval_dim_128_cosine_mrr@10": 0.48299319727891155, + "eval_dim_128_cosine_ndcg@10": 0.49922091065744895, + "eval_dim_128_cosine_precision@1": 0.47619047619047616, + "eval_dim_128_cosine_precision@10": 0.3761904761904762, + "eval_dim_128_cosine_precision@3": 0.4603174603174603, + "eval_dim_128_cosine_precision@5": 0.419047619047619, + "eval_dim_128_cosine_recall@1": 0.07822039072039072, + "eval_dim_128_cosine_recall@10": 0.42506105006105005, + "eval_dim_128_cosine_recall@3": 0.21085164835164832, + "eval_dim_128_cosine_recall@5": 0.27602258852258854, + "eval_dim_256_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_256_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_256_cosine_map@100": 0.6411393184007045, + "eval_dim_256_cosine_mrr@10": 0.5306122448979591, + "eval_dim_256_cosine_ndcg@10": 0.5468399582764966, + "eval_dim_256_cosine_precision@1": 0.5238095238095238, + "eval_dim_256_cosine_precision@10": 0.4238095238095239, + "eval_dim_256_cosine_precision@3": 0.5079365079365079, + "eval_dim_256_cosine_precision@5": 0.4666666666666666, + "eval_dim_256_cosine_recall@1": 0.08005189255189255, + "eval_dim_256_cosine_recall@10": 0.4433760683760684, + "eval_dim_256_cosine_recall@3": 0.21634615384615385, + "eval_dim_256_cosine_recall@5": 0.28518009768009767, + "eval_dim_512_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_512_cosine_accuracy@10": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_512_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_512_cosine_map@100": 0.5978106306698094, + "eval_dim_512_cosine_mrr@10": 0.48299319727891155, + "eval_dim_512_cosine_ndcg@10": 0.49922091065744895, + "eval_dim_512_cosine_precision@1": 0.47619047619047616, + "eval_dim_512_cosine_precision@10": 0.3761904761904762, + "eval_dim_512_cosine_precision@3": 0.4603174603174603, + "eval_dim_512_cosine_precision@5": 0.419047619047619, + "eval_dim_512_cosine_recall@1": 0.07822039072039072, + "eval_dim_512_cosine_recall@10": 0.42506105006105005, + "eval_dim_512_cosine_recall@3": 0.21085164835164832, + "eval_dim_512_cosine_recall@5": 0.27602258852258854, + "eval_dim_64_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_64_cosine_accuracy@10": 0.5238095238095238, + "eval_dim_64_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_64_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_64_cosine_map@100": 0.5960251374266525, + "eval_dim_64_cosine_mrr@10": 0.48299319727891155, + "eval_dim_64_cosine_ndcg@10": 0.49922091065744895, + "eval_dim_64_cosine_precision@1": 0.47619047619047616, + "eval_dim_64_cosine_precision@10": 0.3761904761904762, + "eval_dim_64_cosine_precision@3": 0.4603174603174603, + "eval_dim_64_cosine_precision@5": 0.419047619047619, + "eval_dim_64_cosine_recall@1": 0.07822039072039072, + "eval_dim_64_cosine_recall@10": 0.42506105006105005, + "eval_dim_64_cosine_recall@3": 0.21085164835164832, + "eval_dim_64_cosine_recall@5": 0.27602258852258854, + "eval_dim_768_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_768_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_768_cosine_map@100": 0.6351788392177582, + "eval_dim_768_cosine_mrr@10": 0.5306122448979591, + "eval_dim_768_cosine_ndcg@10": 0.5468399582764966, + "eval_dim_768_cosine_precision@1": 0.5238095238095238, + "eval_dim_768_cosine_precision@10": 0.4238095238095238, + "eval_dim_768_cosine_precision@3": 0.5079365079365079, + "eval_dim_768_cosine_precision@5": 0.4666666666666666, + "eval_dim_768_cosine_recall@1": 0.08218864468864469, + "eval_dim_768_cosine_recall@10": 0.46474358974358976, + "eval_dim_768_cosine_recall@3": 0.22275641025641024, + "eval_dim_768_cosine_recall@5": 0.2958638583638584, + "eval_runtime": 7.0378, + "eval_samples_per_second": 0.0, + "eval_sequential_score": 0.49922091065744895, + "eval_steps_per_second": 0.0, + "step": 28 + } + ], + "logging_steps": 1, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 2 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-28/training_args.bin b/checkpoint-28/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8da3332899cbb380f0ea340bef3e2069d6cf1db2 --- /dev/null +++ b/checkpoint-28/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14debc6c3f8c5edee5db8d97a3a78a007d313a13e4b96f43026da543b59bef8c +size 6097 diff --git a/checkpoint-35/1_Pooling/config.json b/checkpoint-35/1_Pooling/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3f060ee536308b48017dad1a834f306f115695a3 --- /dev/null +++ b/checkpoint-35/1_Pooling/config.json @@ -0,0 +1,10 @@ +{ + "word_embedding_dimension": 1024, + "pooling_mode_cls_token": false, + "pooling_mode_mean_tokens": true, + "pooling_mode_max_tokens": false, + "pooling_mode_mean_sqrt_len_tokens": false, + "pooling_mode_weightedmean_tokens": false, + "pooling_mode_lasttoken": false, + "include_prompt": true +} \ No newline at end of file diff --git a/checkpoint-35/README.md b/checkpoint-35/README.md new file mode 100644 index 0000000000000000000000000000000000000000..299f0e3410cfa4e3240dcaaace01700eb792279f --- /dev/null +++ b/checkpoint-35/README.md @@ -0,0 +1,1658 @@ +--- +language: +- en +license: apache-2.0 +tags: +- sentence-transformers +- sentence-similarity +- feature-extraction +- dense +- generated_from_trainer +- dataset_size:82 +- loss:MatryoshkaLoss +- loss:MultipleNegativesRankingLoss +base_model: intfloat/multilingual-e5-large +widget: +- source_sentence: When did the victims give away credentials? + sentences: + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without it being necessary that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence that is detrimental + to themselves or another; and + + + c) Damage to another person’s property, as defined under civil law, which must + be causally linked to the deceptive acts or omissions of the perpetrator. It is + not required that the person deceived and the person who suffered the damage be + the same individual. + + + The term “facts”, within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts referring to the present or the past, in such a manner as to create + the impression of future fulfillment based on a false present situation fabricated + by the perpetrator, who has already formed the decision not to fulfill their obligation, + the crime of fraud is established. + + + The term “property” refers to the totality of a person’s economic assets that + possess monetary value, while damage to property means its reduction—specifically, + the difference between the monetary value the property had before the disposition + caused by the fraudulent conduct and the value remaining after it. Property damage + exists even if the victim possesses an active claim for restitution. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their fraudulent conduct, namely when they made the false + representations that deceived the victim or a third party. Any subsequent moment + at which the victim’s damage actually occurred—thereby completing the fraud—or + the time when the victim carried out the harmful act or omission, is irrelevant.' + - 'Voice phishing involves manipulating victims over the phone. Attackers pose as + bank officials or authorities and use intimidation to extract financial details. + + + Scenario: + + - Victims are coerced into giving away PINs, passwords, or other credentials under + false pretenses of legal or financial emergencies.' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without requiring that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and performs an act, omission, or acquiescence; and + + + c) Damage to another’s property, according to civil law, which must be causally + connected to the perpetrator’s deceptive acts or omissions. It is not required + that the deceived person and the person who suffered the loss be the same. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts relating to the present or the past, in such a way as to create the + impression of future fulfillment, based on a false present situation fabricated + by the perpetrator—who has already made the decision not to fulfill their obligation—then + the crime of fraud is established. + + + The term “property” denotes the totality of a person’s economic assets possessing + monetary value, while damage to property refers to its reduction—specifically, + the difference between the property’s monetary value before the disposition caused + by the fraudulent conduct and its value afterward. Property damage exists even + if the victim has an active claim for its restitution. + + + The time of commission of fraud is considered to be the moment when the perpetrator + acted and completed the deceptive conduct, that is, when they made the false representations + which deceived the victim or a third party. Any later time at which the victim’s + financial loss occurred—thus completing the fraud—or the time when the harmful + act or omission of the deceived person took place, is irrelevant. + + + The reference to multiple modes of commission of fraud (i.e., both the misrepresentation + of false facts and the concealment of true ones) may create ambiguity and contradiction, + unless it is made clear from the overall findings that the offense was committed + in one particular manner, and that the reference to the other merely serves to + define the intent (mens rea) of the perpetrator—specifically, that the representations + were false. + + + Furthermore, a conviction must contain the specific and well-reasoned justification + required by Articles 93 paragraph 3 of the Constitution and 139 of the Code of + Criminal Procedure. The absence of such reasoning constitutes grounds for cassation + (appeal) under Article 510 paragraph 1(d) of the Code of Criminal Procedure, when + the judgment does not set out, with clarity, completeness, and consistency, the + factual circumstances established by the evidence, upon which the court based + its findings regarding the objective and subjective elements of the offense, the + evidence supporting those findings, and the legal reasoning through which those + facts were subsumed under the applicable substantive criminal provision. + + + For the existence of such reasoning, the explanatory and operative parts of the + decision may complement each other, as they form a single, unified whole. + + + The existence of intent (dolus) does not generally need to be specially justified, + since it is inherent in the will to bring about the factual circumstances constituting + the objective elements of the offense, and it is presumed from their realization + in each particular case—unless the law requires additional elements for criminal + liability, such as the act being committed with knowledge of a specific circumstance + (direct intent) or with the pursuit of a further purpose, i.e., the achievement + of an additional result (offenses requiring a special subjective element). + + + Furthermore, under Article 510 paragraph 1(e) of the Code of Criminal Procedure, + a misapplication of substantive criminal law also constitutes grounds for cassation. + Such misapplication occurs when the trial court incorrectly applies the law to + the facts it has found to be true, or when the violation occurs indirectly, namely + when the reasoning of the judgment—comprising the combination of its factual and + operative parts and relating to the elements and identity of the offense—contains + ambiguities, contradictions, or logical gaps, rendering it impossible to verify, + on appeal, whether the law was applied correctly. In such cases, the judgment + lacks a lawful basis.' +- source_sentence: What must be the outcome of the deception in relation to property + damage? + sentences: + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without requiring that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and performs an act, omission, or acquiescence; and + + + c) Damage to another’s property, according to civil law, which must be causally + connected to the perpetrator’s deceptive acts or omissions. It is not required + that the deceived person and the person who suffered the loss be the same. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts relating to the present or the past, in such a way as to create the + impression of future fulfillment, based on a false present situation fabricated + by the perpetrator—who has already made the decision not to fulfill their obligation—then + the crime of fraud is established. + + + The term “property” denotes the totality of a person’s economic assets possessing + monetary value, while damage to property refers to its reduction—specifically, + the difference between the property’s monetary value before the disposition caused + by the fraudulent conduct and its value afterward. Property damage exists even + if the victim has an active claim for its restitution. + + + The time of commission of fraud is considered to be the moment when the perpetrator + acted and completed the deceptive conduct, that is, when they made the false representations + which deceived the victim or a third party. Any later time at which the victim’s + financial loss occurred—thus completing the fraud—or the time when the harmful + act or omission of the deceived person took place, is irrelevant. + + + The reference to multiple modes of commission of fraud (i.e., both the misrepresentation + of false facts and the concealment of true ones) may create ambiguity and contradiction, + unless it is made clear from the overall findings that the offense was committed + in one particular manner, and that the reference to the other merely serves to + define the intent (mens rea) of the perpetrator—specifically, that the representations + were false. + + + Furthermore, a conviction must contain the specific and well-reasoned justification + required by Articles 93 paragraph 3 of the Constitution and 139 of the Code of + Criminal Procedure. The absence of such reasoning constitutes grounds for cassation + (appeal) under Article 510 paragraph 1(d) of the Code of Criminal Procedure, when + the judgment does not set out, with clarity, completeness, and consistency, the + factual circumstances established by the evidence, upon which the court based + its findings regarding the objective and subjective elements of the offense, the + evidence supporting those findings, and the legal reasoning through which those + facts were subsumed under the applicable substantive criminal provision. + + + For the existence of such reasoning, the explanatory and operative parts of the + decision may complement each other, as they form a single, unified whole. + + + The existence of intent (dolus) does not generally need to be specially justified, + since it is inherent in the will to bring about the factual circumstances constituting + the objective elements of the offense, and it is presumed from their realization + in each particular case—unless the law requires additional elements for criminal + liability, such as the act being committed with knowledge of a specific circumstance + (direct intent) or with the pursuit of a further purpose, i.e., the achievement + of an additional result (offenses requiring a special subjective element). + + + Furthermore, under Article 510 paragraph 1(e) of the Code of Criminal Procedure, + a misapplication of substantive criminal law also constitutes grounds for cassation. + Such misapplication occurs when the trial court incorrectly applies the law to + the facts it has found to be true, or when the violation occurs indirectly, namely + when the reasoning of the judgment—comprising the combination of its factual and + operative parts and relating to the elements and identity of the offense—contains + ambiguities, contradictions, or logical gaps, rendering it impossible to verify, + on appeal, whether the law was applied correctly. In such cases, the judgment + lacks a lawful basis.' + - 'According to Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From these provisions, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence detrimental to themselves + or another; and + + + c) Damage to another’s property, as defined under civil law, which must be causally + connected to the perpetrator’s deceptive acts. + + + From the above provisions, it is deduced that the crime of fraud is established + both objectively and subjectively through the knowing misrepresentation of false + facts as true, or the unlawful concealment or suppression of true ones, by which + another person is deceived and, as a result, performs an act, omission, or acquiescence + involving a disposition of property that directly and necessarily causes financial + damage to the deceived person or another, with the intent that the perpetrator + or another gain an unlawful benefit. It is irrelevant whether this intended benefit + was ultimately achieved. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those expected to occur in the future, + such as mere promises or contractual obligations. The false fact must have existed + in the past or must be a present circumstance at the time it is asserted, and + cannot relate to the future. + + + However, when future circumstances—that is, promises or contractual obligations—are + accompanied by false assurances and representations of other false facts referring + to the present or past, in such a way as to create the impression of future fulfillment, + based on a false present situation or supposed ability of the perpetrator, who + had already made the decision not to fulfill their obligation, then the crime + of fraud is established.' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another person’s property by persuading someone to act, + omit, or tolerate something through the knowing misrepresentation of false facts + as true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision, it follows that for the crime of fraud to be established, + the following elements are required: + + + a) Intent of the perpetrator to obtain for themselves or another an unlawful pecuniary + benefit, regardless of whether this benefit was actually realized; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which, as a causal factor, someone + is deceived and acts in a way that is detrimental to themselves or another (by + an act, omission, or acquiescence); and + + + c) Damage to another’s property, in the sense recognized by civil law, which must + be causally linked to the fraudulent conduct (the deceptive act or omission of + the perpetrator) and to the resulting deception of the person who made the property + disposition. It is not required that the person deceived be the same person who + suffered the damage. + + + Property damage exists when there is a reduction or deterioration in the victim’s + assets, even if the victim has an active claim to restitution. However, as an + element of the objective aspect of the crime of fraud, the damage must be the + direct, necessary, and exclusive result of the property disposition—namely, the + act, omission, or acquiescence performed by the person deceived by the perpetrator’s + fraudulent conduct. + + + There must therefore be a causal connection between the perpetrator’s deceptive + behavior and the deception it caused, as well as between this deception and the + resulting property damage, which must be the direct, necessary, and exclusive + outcome of the deception and of the act, omission, or acquiescence of the deceived + person. + + + The term “facts” refers to real circumstances relating to the past or present, + and not to those expected to occur in the future, such as mere promises or contractual + obligations. However, when such promises or obligations are accompanied by false + assurances and representations of other false facts relating to the present or + the past, in such a way as to create the impression of future fulfillment, based + on the false present situation presented by a perpetrator who has already made + the decision not to fulfill their obligation, then the crime of fraud is established. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their deceptive conduct—that is, when they made the false + representations that deceived the victim or a third party. Any later time at which + the victim’s financial loss actually occurred—thus completing the fraud—or the + time when the deceived person performed the harmful act or omission, is irrelevant.' +- source_sentence: How are victims tricked in email phishing scams? + sentences: + - 'According to Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From these provisions, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence detrimental to themselves + or another; and + + + c) Damage to another’s property, as defined under civil law, which must be causally + connected to the perpetrator’s deceptive acts. + + + From the above provisions, it is deduced that the crime of fraud is established + both objectively and subjectively through the knowing misrepresentation of false + facts as true, or the unlawful concealment or suppression of true ones, by which + another person is deceived and, as a result, performs an act, omission, or acquiescence + involving a disposition of property that directly and necessarily causes financial + damage to the deceived person or another, with the intent that the perpetrator + or another gain an unlawful benefit. It is irrelevant whether this intended benefit + was ultimately achieved. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those expected to occur in the future, + such as mere promises or contractual obligations. The false fact must have existed + in the past or must be a present circumstance at the time it is asserted, and + cannot relate to the future. + + + However, when future circumstances—that is, promises or contractual obligations—are + accompanied by false assurances and representations of other false facts referring + to the present or past, in such a way as to create the impression of future fulfillment, + based on a false present situation or supposed ability of the perpetrator, who + had already made the decision not to fulfill their obligation, then the crime + of fraud is established.' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without it being necessary that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence that is detrimental + to themselves or another; and + + + c) Damage to another person’s property, as defined under civil law, which must + be causally linked to the deceptive acts or omissions of the perpetrator. It is + not required that the person deceived and the person who suffered the damage be + the same individual. + + + The term “facts”, within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts referring to the present or the past, in such a manner as to create + the impression of future fulfillment based on a false present situation fabricated + by the perpetrator, who has already formed the decision not to fulfill their obligation, + the crime of fraud is established. + + + The term “property” refers to the totality of a person’s economic assets that + possess monetary value, while damage to property means its reduction—specifically, + the difference between the monetary value the property had before the disposition + caused by the fraudulent conduct and the value remaining after it. Property damage + exists even if the victim possesses an active claim for restitution. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their fraudulent conduct, namely when they made the false + representations that deceived the victim or a third party. Any subsequent moment + at which the victim’s damage actually occurred—thereby completing the fraud—or + the time when the victim carried out the harmful act or omission, is irrelevant.' + - 'Email phishing is a type of identity theft scam conducted via email or SMS. The + attacker uses social engineering tactics such as impersonating trusted entities + and inducing urgency. Victims are tricked into disclosing personal information + or downloading malware. + + + Scenarios: + + - Scenario 1: Emails impersonating high-ranking executives accuse victims of crimes + to coerce them into revealing information or opening malware-laden attachments. + + - Scenario 2: Emails/SMS from fake banks or authorities alert victims of data + breaches, directing them to spoofed websites to input credentials. + + - Scenario 3: SMS messages deliver disguised malware apps that harvest sensitive + data. + + - Scenario 4: SMS links lead to pharming sites that mimic trusted brands and steal + login data through fake pop-ups.' +- source_sentence: What circumstances do the term 'facts' refer to within the meaning + of the provision? + sentences: + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another person’s property by persuading someone to act, + omit, or tolerate something through the knowing misrepresentation of false facts + as true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision, it follows that for the crime of fraud to be established, + the following elements are required: + + + a) Intent of the perpetrator to obtain for themselves or another an unlawful pecuniary + benefit, regardless of whether this benefit was actually realized; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which, as a causal factor, someone + is deceived and acts in a way that is detrimental to themselves or another (by + an act, omission, or acquiescence); and + + + c) Damage to another’s property, in the sense recognized by civil law, which must + be causally linked to the fraudulent conduct (the deceptive act or omission of + the perpetrator) and to the resulting deception of the person who made the property + disposition. It is not required that the person deceived be the same person who + suffered the damage. + + + Property damage exists when there is a reduction or deterioration in the victim’s + assets, even if the victim has an active claim to restitution. However, as an + element of the objective aspect of the crime of fraud, the damage must be the + direct, necessary, and exclusive result of the property disposition—namely, the + act, omission, or acquiescence performed by the person deceived by the perpetrator’s + fraudulent conduct. + + + There must therefore be a causal connection between the perpetrator’s deceptive + behavior and the deception it caused, as well as between this deception and the + resulting property damage, which must be the direct, necessary, and exclusive + outcome of the deception and of the act, omission, or acquiescence of the deceived + person. + + + The term “facts” refers to real circumstances relating to the past or present, + and not to those expected to occur in the future, such as mere promises or contractual + obligations. However, when such promises or obligations are accompanied by false + assurances and representations of other false facts relating to the present or + the past, in such a way as to create the impression of future fulfillment, based + on the false present situation presented by a perpetrator who has already made + the decision not to fulfill their obligation, then the crime of fraud is established. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their deceptive conduct—that is, when they made the false + representations that deceived the victim or a third party. Any later time at which + the victim’s financial loss actually occurred—thus completing the fraud—or the + time when the deceived person performed the harmful act or omission, is irrelevant.' + - '1. Anyone who, by knowingly presenting false facts as true or by unlawfully concealing + or withholding true facts, damages another person''s property by persuading someone + to act, omission, or tolerance with the aim of obtaining, for themselves or another, + an unlawful financial gain from the damage to that property shall be punished + with imprisonment, "and if the damage caused is particularly great, with imprisonment + of at least three (3) months and a fine." . + + If the damage caused exceeds a total of one hundred and twenty thousand (120,000) + euros, imprisonment of up to ten (10) years and a fine shall be imposed. + + 2. If the fraud is directed directly against the legal entity of the Greek State, + legal entities governed by public law, or local government organizations, and + the damage caused exceeds a total of one hundred and twenty thousand (120,000) + euros, a prison sentence of at least ten (10) years and a fine of up to one thousand + (1,000) daily units shall be imposed. This offense shall be time-barred after + twenty (20) years. + + ' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without it being necessary that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence that is detrimental + to themselves or another; and + + + c) Damage to another person’s property, as defined under civil law, which must + be causally linked to the deceptive acts or omissions of the perpetrator. It is + not required that the person deceived and the person who suffered the damage be + the same individual. + + + The term “facts”, within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts referring to the present or the past, in such a manner as to create + the impression of future fulfillment based on a false present situation fabricated + by the perpetrator, who has already formed the decision not to fulfill their obligation, + the crime of fraud is established. + + + The term “property” refers to the totality of a person’s economic assets that + possess monetary value, while damage to property means its reduction—specifically, + the difference between the monetary value the property had before the disposition + caused by the fraudulent conduct and the value remaining after it. Property damage + exists even if the victim possesses an active claim for restitution. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their fraudulent conduct, namely when they made the false + representations that deceived the victim or a third party. Any subsequent moment + at which the victim’s damage actually occurred—thereby completing the fraud—or + the time when the victim carried out the harmful act or omission, is irrelevant.' +- source_sentence: When is the time of commission of the fraud considered? + sentences: + - 'Spear phishing targets specific individuals or employees within an organization + using personalized, deceptive emails. Unlike mass phishing, these emails are crafted + to seem familiar and urgent. + + + Scenarios: + + - CEO Fraud: Attackers impersonate executives to extract financial or sensitive + data from employees. + + - Whaling: High-ranking executives are targeted using tailored fraud emails that + press for immediate action without verification.' + - 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From this provision it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit, without it being necessary that the benefit actually materialize; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence that is detrimental + to themselves or another; and + + + c) Damage to another person’s property, as defined under civil law, which must + be causally linked to the deceptive acts or omissions of the perpetrator. It is + not required that the person deceived and the person who suffered the damage be + the same individual. + + + The term “facts”, within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those that will occur in the future, + such as mere promises or contractual obligations. However, when such promises + or obligations are accompanied by false assurances and representations of other + false facts referring to the present or the past, in such a manner as to create + the impression of future fulfillment based on a false present situation fabricated + by the perpetrator, who has already formed the decision not to fulfill their obligation, + the crime of fraud is established. + + + The term “property” refers to the totality of a person’s economic assets that + possess monetary value, while damage to property means its reduction—specifically, + the difference between the monetary value the property had before the disposition + caused by the fraudulent conduct and the value remaining after it. Property damage + exists even if the victim possesses an active claim for restitution. + + + The time of commission of the fraud is considered to be the moment when the perpetrator + acted and completed their fraudulent conduct, namely when they made the false + representations that deceived the victim or a third party. Any subsequent moment + at which the victim’s damage actually occurred—thereby completing the fraud—or + the time when the victim carried out the harmful act or omission, is irrelevant.' + - 'According to Article 386 paragraph 1 of the Greek Penal Code, + + + "Whoever, with the intent to obtain for themselves or another an unlawful pecuniary + benefit, causes damage to another’s property by persuading someone to act, omit, + or tolerate something through the knowing misrepresentation of false facts as + true, or through the unlawful concealment or suppression of true facts, shall + be punished by imprisonment of at least three months, and if the damage caused + is particularly large, by imprisonment of at least two years." + + + From these provisions, it follows that, for the crime of fraud to be established, + the following elements are required: + + + a) The intent of the perpetrator to obtain for themselves or another an unlawful + pecuniary benefit; + + + b) The knowing misrepresentation of false facts as true, or the unlawful concealment + or suppression of true facts, as a result of which—serving as the causal factor—someone + is deceived and proceeds to an act, omission, or acquiescence detrimental to themselves + or another; and + + + c) Damage to another’s property, as defined under civil law, which must be causally + connected to the perpetrator’s deceptive acts. + + + From the above provisions, it is deduced that the crime of fraud is established + both objectively and subjectively through the knowing misrepresentation of false + facts as true, or the unlawful concealment or suppression of true ones, by which + another person is deceived and, as a result, performs an act, omission, or acquiescence + involving a disposition of property that directly and necessarily causes financial + damage to the deceived person or another, with the intent that the perpetrator + or another gain an unlawful benefit. It is irrelevant whether this intended benefit + was ultimately achieved. + + + The term “facts,” within the meaning of the above provision, refers to real circumstances + relating to the past or present, and not to those expected to occur in the future, + such as mere promises or contractual obligations. The false fact must have existed + in the past or must be a present circumstance at the time it is asserted, and + cannot relate to the future. + + + However, when future circumstances—that is, promises or contractual obligations—are + accompanied by false assurances and representations of other false facts referring + to the present or past, in such a way as to create the impression of future fulfillment, + based on a false present situation or supposed ability of the perpetrator, who + had already made the decision not to fulfill their obligation, then the crime + of fraud is established.' +pipeline_tag: sentence-similarity +library_name: sentence-transformers +metrics: +- cosine_accuracy@1 +- cosine_accuracy@3 +- cosine_accuracy@5 +- cosine_accuracy@10 +- cosine_precision@1 +- cosine_precision@3 +- cosine_precision@5 +- cosine_precision@10 +- cosine_recall@1 +- cosine_recall@3 +- cosine_recall@5 +- cosine_recall@10 +- cosine_ndcg@10 +- cosine_mrr@10 +- cosine_map@100 +model-index: +- name: multilingual_e5_large Finetuned on Data + results: + - task: + type: information-retrieval + name: Information Retrieval + dataset: + name: dim 1024 + type: dim_1024 + metrics: + - type: cosine_accuracy@1 + value: 0.5238095238095238 + name: Cosine Accuracy@1 + - type: cosine_accuracy@3 + value: 0.5238095238095238 + name: Cosine Accuracy@3 + - type: cosine_accuracy@5 + value: 0.5238095238095238 + name: Cosine Accuracy@5 + - type: cosine_accuracy@10 + value: 0.6190476190476191 + name: Cosine Accuracy@10 + - type: cosine_precision@1 + value: 0.5238095238095238 + name: Cosine Precision@1 + - type: cosine_precision@3 + value: 0.5079365079365079 + name: Cosine Precision@3 + - type: cosine_precision@5 + value: 0.4666666666666666 + name: Cosine Precision@5 + - type: cosine_precision@10 + value: 0.4428571428571429 + name: Cosine Precision@10 + - type: cosine_recall@1 + value: 0.08218864468864469 + name: Cosine Recall@1 + - type: cosine_recall@3 + value: 0.22275641025641024 + name: Cosine Recall@3 + - type: cosine_recall@5 + value: 0.2958638583638584 + name: Cosine Recall@5 + - type: cosine_recall@10 + value: 0.4766483516483517 + name: Cosine Recall@10 + - type: cosine_ndcg@10 + value: 0.5598242514045669 + name: Cosine Ndcg@10 + - type: cosine_mrr@10 + value: 0.5374149659863945 + name: Cosine Mrr@10 + - type: cosine_map@100 + value: 0.6534286699882501 + name: Cosine Map@100 + - task: + type: information-retrieval + name: Information Retrieval + dataset: + name: dim 768 + type: dim_768 + metrics: + - type: cosine_accuracy@1 + value: 0.5238095238095238 + name: Cosine Accuracy@1 + - type: cosine_accuracy@3 + value: 0.5238095238095238 + name: Cosine Accuracy@3 + - type: cosine_accuracy@5 + value: 0.5238095238095238 + name: Cosine Accuracy@5 + - type: cosine_accuracy@10 + value: 0.6190476190476191 + name: Cosine Accuracy@10 + - type: cosine_precision@1 + value: 0.5238095238095238 + name: Cosine Precision@1 + - type: cosine_precision@3 + value: 0.5079365079365079 + name: Cosine Precision@3 + - type: cosine_precision@5 + value: 0.4666666666666666 + name: Cosine Precision@5 + - type: cosine_precision@10 + value: 0.4428571428571429 + name: Cosine Precision@10 + - type: cosine_recall@1 + value: 0.08218864468864469 + name: Cosine Recall@1 + - type: cosine_recall@3 + value: 0.22275641025641024 + name: Cosine Recall@3 + - type: cosine_recall@5 + value: 0.2958638583638584 + name: Cosine Recall@5 + - type: cosine_recall@10 + value: 0.4766483516483517 + name: Cosine Recall@10 + - type: cosine_ndcg@10 + value: 0.5598242514045669 + name: Cosine Ndcg@10 + - type: cosine_mrr@10 + value: 0.5374149659863945 + name: Cosine Mrr@10 + - type: cosine_map@100 + value: 0.653075337994289 + name: Cosine Map@100 + - task: + type: information-retrieval + name: Information Retrieval + dataset: + name: dim 512 + type: dim_512 + metrics: + - type: cosine_accuracy@1 + value: 0.5238095238095238 + name: Cosine Accuracy@1 + - type: cosine_accuracy@3 + value: 0.5238095238095238 + name: Cosine Accuracy@3 + - type: cosine_accuracy@5 + value: 0.5238095238095238 + name: Cosine Accuracy@5 + - type: cosine_accuracy@10 + value: 0.6190476190476191 + name: Cosine Accuracy@10 + - type: cosine_precision@1 + value: 0.5238095238095238 + name: Cosine Precision@1 + - type: cosine_precision@3 + value: 0.5079365079365079 + name: Cosine Precision@3 + - type: cosine_precision@5 + value: 0.4666666666666666 + name: Cosine Precision@5 + - type: cosine_precision@10 + value: 0.4428571428571429 + name: Cosine Precision@10 + - type: cosine_recall@1 + value: 0.08218864468864469 + name: Cosine Recall@1 + - type: cosine_recall@3 + value: 0.22275641025641024 + name: Cosine Recall@3 + - type: cosine_recall@5 + value: 0.2958638583638584 + name: Cosine Recall@5 + - type: cosine_recall@10 + value: 0.4766483516483517 + name: Cosine Recall@10 + - type: cosine_ndcg@10 + value: 0.5598242514045669 + name: Cosine Ndcg@10 + - type: cosine_mrr@10 + value: 0.5374149659863945 + name: Cosine Mrr@10 + - type: cosine_map@100 + value: 0.6492208787775379 + name: Cosine Map@100 + - task: + type: information-retrieval + name: Information Retrieval + dataset: + name: dim 256 + type: dim_256 + metrics: + - type: cosine_accuracy@1 + value: 0.6190476190476191 + name: Cosine Accuracy@1 + - type: cosine_accuracy@3 + value: 0.6190476190476191 + name: Cosine Accuracy@3 + - type: cosine_accuracy@5 + value: 0.6190476190476191 + name: Cosine Accuracy@5 + - type: cosine_accuracy@10 + value: 0.6666666666666666 + name: Cosine Accuracy@10 + - type: cosine_precision@1 + value: 0.6190476190476191 + name: Cosine Precision@1 + - type: cosine_precision@3 + value: 0.6031746031746031 + name: Cosine Precision@3 + - type: cosine_precision@5 + value: 0.5619047619047619 + name: Cosine Precision@5 + - type: cosine_precision@10 + value: 0.5190476190476192 + name: Cosine Precision@10 + - type: cosine_recall@1 + value: 0.08600427350427349 + name: Cosine Recall@1 + - type: cosine_recall@3 + value: 0.2342032967032967 + name: Cosine Recall@3 + - type: cosine_recall@5 + value: 0.31494200244200243 + name: Cosine Recall@5 + - type: cosine_recall@10 + value: 0.5028998778998779 + name: Cosine Recall@10 + - type: cosine_ndcg@10 + value: 0.6420780535145918 + name: Cosine Ndcg@10 + - type: cosine_mrr@10 + value: 0.6258503401360545 + name: Cosine Mrr@10 + - type: cosine_map@100 + value: 0.6975707466438095 + name: Cosine Map@100 + - task: + type: information-retrieval + name: Information Retrieval + dataset: + name: dim 128 + type: dim_128 + metrics: + - type: cosine_accuracy@1 + value: 0.5238095238095238 + name: Cosine Accuracy@1 + - type: cosine_accuracy@3 + value: 0.5238095238095238 + name: Cosine Accuracy@3 + - type: cosine_accuracy@5 + value: 0.5238095238095238 + name: Cosine Accuracy@5 + - type: cosine_accuracy@10 + value: 0.6190476190476191 + name: Cosine Accuracy@10 + - type: cosine_precision@1 + value: 0.5238095238095238 + name: Cosine Precision@1 + - type: cosine_precision@3 + value: 0.5079365079365079 + name: Cosine Precision@3 + - type: cosine_precision@5 + value: 0.4666666666666666 + name: Cosine Precision@5 + - type: cosine_precision@10 + value: 0.4428571428571429 + name: Cosine Precision@10 + - type: cosine_recall@1 + value: 0.0811965811965812 + name: Cosine Recall@1 + - type: cosine_recall@3 + value: 0.21978021978021975 + name: Cosine Recall@3 + - type: cosine_recall@5 + value: 0.2909035409035409 + name: Cosine Recall@5 + - type: cosine_recall@10 + value: 0.46672771672771673 + name: Cosine Recall@10 + - type: cosine_ndcg@10 + value: 0.5598242514045669 + name: Cosine Ndcg@10 + - type: cosine_mrr@10 + value: 0.5374149659863945 + name: Cosine Mrr@10 + - type: cosine_map@100 + value: 0.6478872365910466 + name: Cosine Map@100 + - task: + type: information-retrieval + name: Information Retrieval + dataset: + name: dim 64 + type: dim_64 + metrics: + - type: cosine_accuracy@1 + value: 0.42857142857142855 + name: Cosine Accuracy@1 + - type: cosine_accuracy@3 + value: 0.47619047619047616 + name: Cosine Accuracy@3 + - type: cosine_accuracy@5 + value: 0.47619047619047616 + name: Cosine Accuracy@5 + - type: cosine_accuracy@10 + value: 0.5714285714285714 + name: Cosine Accuracy@10 + - type: cosine_precision@1 + value: 0.42857142857142855 + name: Cosine Precision@1 + - type: cosine_precision@3 + value: 0.4444444444444445 + name: Cosine Precision@3 + - type: cosine_precision@5 + value: 0.419047619047619 + name: Cosine Precision@5 + - type: cosine_precision@10 + value: 0.3952380952380953 + name: Cosine Precision@10 + - type: cosine_recall@1 + value: 0.054410866910866905 + name: Cosine Recall@1 + - type: cosine_recall@3 + value: 0.18704212454212454 + name: Cosine Recall@3 + - type: cosine_recall@5 + value: 0.27602258852258854 + name: Cosine Recall@5 + - type: cosine_recall@10 + value: 0.43696581196581197 + name: Cosine Recall@10 + - type: cosine_ndcg@10 + value: 0.4917595713548203 + name: Cosine Ndcg@10 + - type: cosine_mrr@10 + value: 0.45804988662131524 + name: Cosine Mrr@10 + - type: cosine_map@100 + value: 0.5872011588310861 + name: Cosine Map@100 +--- + +# multilingual_e5_large Finetuned on Data + +This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more. + +## Model Details + +### Model Description +- **Model Type:** Sentence Transformer +- **Base model:** [intfloat/multilingual-e5-large](https://huggingface.co/intfloat/multilingual-e5-large) +- **Maximum Sequence Length:** 512 tokens +- **Output Dimensionality:** 1024 dimensions +- **Similarity Function:** Cosine Similarity + +- **Language:** en +- **License:** apache-2.0 + +### Model Sources + +- **Documentation:** [Sentence Transformers Documentation](https://sbert.net) +- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers) +- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers) + +### Full Model Architecture + +``` +SentenceTransformer( + (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'XLMRobertaModel'}) + (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) + (2): Normalize() +) +``` + +## Usage + +### Direct Usage (Sentence Transformers) + +First install the Sentence Transformers library: + +```bash +pip install -U sentence-transformers +``` + +Then you can load this model and run inference. +```python +from sentence_transformers import SentenceTransformer + +# Download from the 🤗 Hub +model = SentenceTransformer("sentence_transformers_model_id") +# Run inference +sentences = [ + 'When is the time of commission of the fraud considered?', + 'According to the provision of Article 386 paragraph 1 of the Greek Penal Code,\n\n"Whoever, with the intent to obtain for themselves or another an unlawful pecuniary benefit, causes damage to another’s property by persuading someone to act, omit, or tolerate something through the knowing misrepresentation of false facts as true, or through the unlawful concealment or suppression of true facts, shall be punished by imprisonment of at least three months, and if the damage caused is particularly large, by imprisonment of at least two years."\n\nFrom this provision it follows that, for the crime of fraud to be established, the following elements are required:\n\na) The intent of the perpetrator to obtain for themselves or another an unlawful pecuniary benefit, without it being necessary that the benefit actually materialize;\n\nb) The knowing misrepresentation of false facts as true, or the unlawful concealment or suppression of true facts, as a result of which—serving as the causal factor—someone is deceived and proceeds to an act, omission, or acquiescence that is detrimental to themselves or another; and\n\nc) Damage to another person’s property, as defined under civil law, which must be causally linked to the deceptive acts or omissions of the perpetrator. It is not required that the person deceived and the person who suffered the damage be the same individual.\n\nThe term “facts”, within the meaning of the above provision, refers to real circumstances relating to the past or present, and not to those that will occur in the future, such as mere promises or contractual obligations. However, when such promises or obligations are accompanied by false assurances and representations of other false facts referring to the present or the past, in such a manner as to create the impression of future fulfillment based on a false present situation fabricated by the perpetrator, who has already formed the decision not to fulfill their obligation, the crime of fraud is established.\n\nThe term “property” refers to the totality of a person’s economic assets that possess monetary value, while damage to property means its reduction—specifically, the difference between the monetary value the property had before the disposition caused by the fraudulent conduct and the value remaining after it. Property damage exists even if the victim possesses an active claim for restitution.\n\nThe time of commission of the fraud is considered to be the moment when the perpetrator acted and completed their fraudulent conduct, namely when they made the false representations that deceived the victim or a third party. Any subsequent moment at which the victim’s damage actually occurred—thereby completing the fraud—or the time when the victim carried out the harmful act or omission, is irrelevant.', + 'Spear phishing targets specific individuals or employees within an organization using personalized, deceptive emails. Unlike mass phishing, these emails are crafted to seem familiar and urgent.\n\nScenarios:\n- CEO Fraud: Attackers impersonate executives to extract financial or sensitive data from employees.\n- Whaling: High-ranking executives are targeted using tailored fraud emails that press for immediate action without verification.', +] +embeddings = model.encode(sentences) +print(embeddings.shape) +# [3, 1024] + +# Get the similarity scores for the embeddings +similarities = model.similarity(embeddings, embeddings) +print(similarities) +# tensor([[1.0000, 0.5608, 0.2769], +# [0.5608, 1.0000, 0.3160], +# [0.2769, 0.3160, 1.0001]]) +``` + + + + + + + +## Evaluation + +### Metrics + +#### Information Retrieval + +* Dataset: `dim_1024` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 1024 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.5238 | +| cosine_accuracy@3 | 0.5238 | +| cosine_accuracy@5 | 0.5238 | +| cosine_accuracy@10 | 0.619 | +| cosine_precision@1 | 0.5238 | +| cosine_precision@3 | 0.5079 | +| cosine_precision@5 | 0.4667 | +| cosine_precision@10 | 0.4429 | +| cosine_recall@1 | 0.0822 | +| cosine_recall@3 | 0.2228 | +| cosine_recall@5 | 0.2959 | +| cosine_recall@10 | 0.4766 | +| **cosine_ndcg@10** | **0.5598** | +| cosine_mrr@10 | 0.5374 | +| cosine_map@100 | 0.6534 | + +#### Information Retrieval + +* Dataset: `dim_768` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 768 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.5238 | +| cosine_accuracy@3 | 0.5238 | +| cosine_accuracy@5 | 0.5238 | +| cosine_accuracy@10 | 0.619 | +| cosine_precision@1 | 0.5238 | +| cosine_precision@3 | 0.5079 | +| cosine_precision@5 | 0.4667 | +| cosine_precision@10 | 0.4429 | +| cosine_recall@1 | 0.0822 | +| cosine_recall@3 | 0.2228 | +| cosine_recall@5 | 0.2959 | +| cosine_recall@10 | 0.4766 | +| **cosine_ndcg@10** | **0.5598** | +| cosine_mrr@10 | 0.5374 | +| cosine_map@100 | 0.6531 | + +#### Information Retrieval + +* Dataset: `dim_512` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 512 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.5238 | +| cosine_accuracy@3 | 0.5238 | +| cosine_accuracy@5 | 0.5238 | +| cosine_accuracy@10 | 0.619 | +| cosine_precision@1 | 0.5238 | +| cosine_precision@3 | 0.5079 | +| cosine_precision@5 | 0.4667 | +| cosine_precision@10 | 0.4429 | +| cosine_recall@1 | 0.0822 | +| cosine_recall@3 | 0.2228 | +| cosine_recall@5 | 0.2959 | +| cosine_recall@10 | 0.4766 | +| **cosine_ndcg@10** | **0.5598** | +| cosine_mrr@10 | 0.5374 | +| cosine_map@100 | 0.6492 | + +#### Information Retrieval + +* Dataset: `dim_256` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 256 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.619 | +| cosine_accuracy@3 | 0.619 | +| cosine_accuracy@5 | 0.619 | +| cosine_accuracy@10 | 0.6667 | +| cosine_precision@1 | 0.619 | +| cosine_precision@3 | 0.6032 | +| cosine_precision@5 | 0.5619 | +| cosine_precision@10 | 0.519 | +| cosine_recall@1 | 0.086 | +| cosine_recall@3 | 0.2342 | +| cosine_recall@5 | 0.3149 | +| cosine_recall@10 | 0.5029 | +| **cosine_ndcg@10** | **0.6421** | +| cosine_mrr@10 | 0.6259 | +| cosine_map@100 | 0.6976 | + +#### Information Retrieval + +* Dataset: `dim_128` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 128 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.5238 | +| cosine_accuracy@3 | 0.5238 | +| cosine_accuracy@5 | 0.5238 | +| cosine_accuracy@10 | 0.619 | +| cosine_precision@1 | 0.5238 | +| cosine_precision@3 | 0.5079 | +| cosine_precision@5 | 0.4667 | +| cosine_precision@10 | 0.4429 | +| cosine_recall@1 | 0.0812 | +| cosine_recall@3 | 0.2198 | +| cosine_recall@5 | 0.2909 | +| cosine_recall@10 | 0.4667 | +| **cosine_ndcg@10** | **0.5598** | +| cosine_mrr@10 | 0.5374 | +| cosine_map@100 | 0.6479 | + +#### Information Retrieval + +* Dataset: `dim_64` +* Evaluated with [InformationRetrievalEvaluator](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters: + ```json + { + "truncate_dim": 64 + } + ``` + +| Metric | Value | +|:--------------------|:-----------| +| cosine_accuracy@1 | 0.4286 | +| cosine_accuracy@3 | 0.4762 | +| cosine_accuracy@5 | 0.4762 | +| cosine_accuracy@10 | 0.5714 | +| cosine_precision@1 | 0.4286 | +| cosine_precision@3 | 0.4444 | +| cosine_precision@5 | 0.419 | +| cosine_precision@10 | 0.3952 | +| cosine_recall@1 | 0.0544 | +| cosine_recall@3 | 0.187 | +| cosine_recall@5 | 0.276 | +| cosine_recall@10 | 0.437 | +| **cosine_ndcg@10** | **0.4918** | +| cosine_mrr@10 | 0.458 | +| cosine_map@100 | 0.5872 | + + + + + +## Training Details + +### Training Dataset + +#### Unnamed Dataset + +* Size: 82 training samples +* Columns: anchor and positive +* Approximate statistics based on the first 82 samples: + | | anchor | positive | + |:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------| + | type | string | string | + | details |
  • min: 9 tokens
  • mean: 18.17 tokens
  • max: 34 tokens
|
  • min: 69 tokens
  • mean: 399.51 tokens
  • max: 512 tokens
| +* Samples: + | anchor | positive | + |:----------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| + | What determines whether the act in question shall be punished if the offender is in the service of the legal holder of the data? | Everyone who obtains access to data recorded in a computer or in the external memory of a computer or transmitted by telecommunication systems shall be punished with imprisonment for up to six months or by a fine from 29 to 15,000 Euro, under the condition that these acts have been committed without right, especially in violation of prohibitions or of security measures taken by the legal holder. If the act concerns the international relations or the security of the State, he shall be punished according to Article 148.
If the offender is in the service of the legal holder of the data, the act of the preceding paragraph shall be punished only if it has been explicitly prohibited by internal regulations or by a written decision of the holder or of a competent employee of his.
| + | What must be causally connected to the perpetrator's deceptive acts? | According to Article 386 paragraph 1 of the Greek Penal Code,

"Whoever, with the intent to obtain for themselves or another an unlawful pecuniary benefit, causes damage to another’s property by persuading someone to act, omit, or tolerate something through the knowing misrepresentation of false facts as true, or through the unlawful concealment or suppression of true facts, shall be punished by imprisonment of at least three months, and if the damage caused is particularly large, by imprisonment of at least two years."

From these provisions, it follows that, for the crime of fraud to be established, the following elements are required:

a) The intent of the perpetrator to obtain for themselves or another an unlawful pecuniary benefit;

b) The knowing misrepresentation of false facts as true, or the unlawful concealment or suppression of true facts, as a result of which—serving as the causal factor—someone is deceived and proceeds to an act, omission, or acquiescence detrimental to th...
| + | Who can be punished with imprisonment? | 1. Anyone who, by knowingly presenting false facts as true or by unlawfully concealing or withholding true facts, damages another person's property by persuading someone to act, omission, or tolerance with the aim of obtaining, for themselves or another, an unlawful financial gain from the damage to that property shall be punished with imprisonment, "and if the damage caused is particularly great, with imprisonment of at least three (3) months and a fine." .
If the damage caused exceeds a total of one hundred and twenty thousand (120,000) euros, imprisonment of up to ten (10) years and a fine shall be imposed.
2. If the fraud is directed directly against the legal entity of the Greek State, legal entities governed by public law, or local government organizations, and the damage caused exceeds a total of one hundred and twenty thousand (120,000) euros, a prison sentence of at least ten (10) years and a fine of up to one thousand (1,000) daily units shall be imposed. This offense shall b...
| +* Loss: [MatryoshkaLoss](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters: + ```json + { + "loss": "MultipleNegativesRankingLoss", + "matryoshka_dims": [ + 1024, + 768, + 512, + 256, + 128, + 64 + ], + "matryoshka_weights": [ + 1, + 1, + 1, + 1, + 1, + 1 + ], + "n_dims_per_step": -1 + } + ``` + +### Training Hyperparameters +#### Non-Default Hyperparameters + +- `eval_strategy`: epoch +- `gradient_accumulation_steps`: 2 +- `learning_rate`: 2e-05 +- `num_train_epochs`: 10 +- `lr_scheduler_type`: cosine +- `warmup_ratio`: 0.1 +- `bf16`: True +- `tf32`: True +- `load_best_model_at_end`: True +- `optim`: adamw_torch_fused +- `batch_sampler`: no_duplicates + +#### All Hyperparameters +
Click to expand + +- `overwrite_output_dir`: False +- `do_predict`: False +- `eval_strategy`: epoch +- `prediction_loss_only`: True +- `per_device_train_batch_size`: 8 +- `per_device_eval_batch_size`: 8 +- `per_gpu_train_batch_size`: None +- `per_gpu_eval_batch_size`: None +- `gradient_accumulation_steps`: 2 +- `eval_accumulation_steps`: None +- `torch_empty_cache_steps`: None +- `learning_rate`: 2e-05 +- `weight_decay`: 0.0 +- `adam_beta1`: 0.9 +- `adam_beta2`: 0.999 +- `adam_epsilon`: 1e-08 +- `max_grad_norm`: 1.0 +- `num_train_epochs`: 10 +- `max_steps`: -1 +- `lr_scheduler_type`: cosine +- `lr_scheduler_kwargs`: {} +- `warmup_ratio`: 0.1 +- `warmup_steps`: 0 +- `log_level`: passive +- `log_level_replica`: warning +- `log_on_each_node`: True +- `logging_nan_inf_filter`: True +- `save_safetensors`: True +- `save_on_each_node`: False +- `save_only_model`: False +- `restore_callback_states_from_checkpoint`: False +- `no_cuda`: False +- `use_cpu`: False +- `use_mps_device`: False +- `seed`: 42 +- `data_seed`: None +- `jit_mode_eval`: False +- `use_ipex`: False +- `bf16`: True +- `fp16`: False +- `fp16_opt_level`: O1 +- `half_precision_backend`: auto +- `bf16_full_eval`: False +- `fp16_full_eval`: False +- `tf32`: True +- `local_rank`: 0 +- `ddp_backend`: None +- `tpu_num_cores`: None +- `tpu_metrics_debug`: False +- `debug`: [] +- `dataloader_drop_last`: False +- `dataloader_num_workers`: 0 +- `dataloader_prefetch_factor`: None +- `past_index`: -1 +- `disable_tqdm`: False +- `remove_unused_columns`: True +- `label_names`: None +- `load_best_model_at_end`: True +- `ignore_data_skip`: False +- `fsdp`: [] +- `fsdp_min_num_params`: 0 +- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} +- `tp_size`: 0 +- `fsdp_transformer_layer_cls_to_wrap`: None +- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} +- `deepspeed`: None +- `label_smoothing_factor`: 0.0 +- `optim`: adamw_torch_fused +- `optim_args`: None +- `adafactor`: False +- `group_by_length`: False +- `length_column_name`: length +- `ddp_find_unused_parameters`: None +- `ddp_bucket_cap_mb`: None +- `ddp_broadcast_buffers`: False +- `dataloader_pin_memory`: True +- `dataloader_persistent_workers`: False +- `skip_memory_metrics`: True +- `use_legacy_prediction_loop`: False +- `push_to_hub`: False +- `resume_from_checkpoint`: None +- `hub_model_id`: None +- `hub_strategy`: every_save +- `hub_private_repo`: None +- `hub_always_push`: False +- `gradient_checkpointing`: False +- `gradient_checkpointing_kwargs`: None +- `include_inputs_for_metrics`: False +- `include_for_metrics`: [] +- `eval_do_concat_batches`: True +- `fp16_backend`: auto +- `push_to_hub_model_id`: None +- `push_to_hub_organization`: None +- `mp_parameters`: +- `auto_find_batch_size`: False +- `full_determinism`: False +- `torchdynamo`: None +- `ray_scope`: last +- `ddp_timeout`: 1800 +- `torch_compile`: False +- `torch_compile_backend`: None +- `torch_compile_mode`: None +- `include_tokens_per_second`: False +- `include_num_input_tokens_seen`: False +- `neftune_noise_alpha`: None +- `optim_target_modules`: None +- `batch_eval_metrics`: False +- `eval_on_start`: False +- `use_liger_kernel`: False +- `eval_use_gather_object`: False +- `average_tokens_across_devices`: False +- `prompts`: None +- `batch_sampler`: no_duplicates +- `multi_dataset_batch_sampler`: proportional +- `router_mapping`: {} +- `learning_rate_mapping`: {} + +
+ +### Training Logs +| Epoch | Step | Training Loss | dim_1024_cosine_ndcg@10 | dim_768_cosine_ndcg@10 | dim_512_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_64_cosine_ndcg@10 | +|:------:|:----:|:-------------:|:-----------------------:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:| +| 0.1818 | 1 | 18.029 | - | - | - | - | - | - | +| 0.3636 | 2 | 19.4106 | - | - | - | - | - | - | +| 0.5455 | 3 | 16.6201 | - | - | - | - | - | - | +| 0.7273 | 4 | 15.3048 | - | - | - | - | - | - | +| 0.9091 | 5 | 14.0182 | - | - | - | - | - | - | +| 1.0 | 6 | 6.4771 | - | - | - | - | - | - | +| 1.0909 | 7 | 6.7664 | 0.6167 | 0.5821 | 0.5524 | 0.5177 | 0.5278 | 0.4124 | +| 1.1818 | 8 | 11.8583 | - | - | - | - | - | - | +| 1.3636 | 9 | 11.9216 | - | - | - | - | - | - | +| 1.5455 | 10 | 13.3764 | - | - | - | - | - | - | +| 1.7273 | 11 | 12.9063 | - | - | - | - | - | - | +| 1.9091 | 12 | 13.5984 | - | - | - | - | - | - | +| 2.0 | 13 | 7.8523 | - | - | - | - | - | - | +| 2.0909 | 14 | 4.4487 | 0.5921 | 0.5921 | 0.5518 | 0.5709 | 0.5685 | 0.5113 | +| 2.1818 | 15 | 8.5374 | - | - | - | - | - | - | +| 2.3636 | 16 | 9.6999 | - | - | - | - | - | - | +| 2.5455 | 17 | 9.0121 | - | - | - | - | - | - | +| 2.7273 | 18 | 13.5705 | - | - | - | - | - | - | +| 2.9091 | 19 | 13.0195 | - | - | - | - | - | - | +| 3.0 | 20 | 7.9821 | - | - | - | - | - | - | +| 3.0909 | 21 | 3.2842 | 0.5159 | 0.5636 | 0.5468 | 0.5468 | 0.5468 | 0.5233 | +| 3.1818 | 22 | 4.4446 | - | - | - | - | - | - | +| 3.3636 | 23 | 5.7244 | - | - | - | - | - | - | +| 3.5455 | 24 | 7.1394 | - | - | - | - | - | - | +| 3.7273 | 25 | 16.7583 | - | - | - | - | - | - | +| 3.9091 | 26 | 11.3515 | - | - | - | - | - | - | +| 4.0 | 27 | 8.813 | - | - | - | - | - | - | +| 4.0909 | 28 | 6.9124 | 0.5159 | 0.5468 | 0.4992 | 0.5468 | 0.4992 | 0.4992 | +| 4.1818 | 29 | 6.1814 | - | - | - | - | - | - | +| 4.3636 | 30 | 7.1606 | - | - | - | - | - | - | +| 4.5455 | 31 | 5.0888 | - | - | - | - | - | - | +| 4.7273 | 32 | 5.0684 | - | - | - | - | - | - | +| 4.9091 | 33 | 6.7382 | - | - | - | - | - | - | +| 5.0 | 34 | 7.0497 | - | - | - | - | - | - | +| 5.0909 | 35 | 6.582 | 0.5598 | 0.5598 | 0.5598 | 0.6421 | 0.5598 | 0.4918 | + + +### Framework Versions +- Python: 3.12.12 +- Sentence Transformers: 5.1.1 +- Transformers: 4.51.3 +- PyTorch: 2.8.0+cu126 +- Accelerate: 1.11.0 +- Datasets: 4.0.0 +- Tokenizers: 0.21.4 + +## Citation + +### BibTeX + +#### Sentence Transformers +```bibtex +@inproceedings{reimers-2019-sentence-bert, + title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", + author = "Reimers, Nils and Gurevych, Iryna", + booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", + month = "11", + year = "2019", + publisher = "Association for Computational Linguistics", + url = "https://arxiv.org/abs/1908.10084", +} +``` + +#### MatryoshkaLoss +```bibtex +@misc{kusupati2024matryoshka, + title={Matryoshka Representation Learning}, + author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi}, + year={2024}, + eprint={2205.13147}, + archivePrefix={arXiv}, + primaryClass={cs.LG} +} +``` + +#### MultipleNegativesRankingLoss +```bibtex +@misc{henderson2017efficient, + title={Efficient Natural Language Response Suggestion for Smart Reply}, + author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil}, + year={2017}, + eprint={1705.00652}, + archivePrefix={arXiv}, + primaryClass={cs.CL} +} +``` + + + + + + \ No newline at end of file diff --git a/checkpoint-35/config.json b/checkpoint-35/config.json new file mode 100644 index 0000000000000000000000000000000000000000..93f46b2fc5d0e832be8338cb0ce9f09ae81bc1a2 --- /dev/null +++ b/checkpoint-35/config.json @@ -0,0 +1,27 @@ +{ + "architectures": [ + "XLMRobertaModel" + ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, + "eos_token_id": 2, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "max_position_embeddings": 514, + "model_type": "xlm-roberta", + "num_attention_heads": 16, + "num_hidden_layers": 24, + "output_past": true, + "pad_token_id": 1, + "position_embedding_type": "absolute", + "torch_dtype": "float32", + "transformers_version": "4.51.3", + "type_vocab_size": 1, + "use_cache": true, + "vocab_size": 250002 +} diff --git a/checkpoint-35/config_sentence_transformers.json b/checkpoint-35/config_sentence_transformers.json new file mode 100644 index 0000000000000000000000000000000000000000..a0390e7f92afe1fdc0e00e1712f50de56290fde6 --- /dev/null +++ b/checkpoint-35/config_sentence_transformers.json @@ -0,0 +1,14 @@ +{ + "model_type": "SentenceTransformer", + "__version__": { + "sentence_transformers": "5.1.1", + "transformers": "4.51.3", + "pytorch": "2.8.0+cu126" + }, + "prompts": { + "query": "", + "document": "" + }, + "default_prompt_name": null, + "similarity_fn_name": "cosine" +} \ No newline at end of file diff --git a/checkpoint-35/model.safetensors b/checkpoint-35/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad8a6b11403b6b88881ee6a87f7b28f9a940245d --- /dev/null +++ b/checkpoint-35/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f7897be8f9d594942e4a3af80d3943c8b741feec27f8307cc90b80b9524898c +size 2239607176 diff --git a/checkpoint-35/modules.json b/checkpoint-35/modules.json new file mode 100644 index 0000000000000000000000000000000000000000..952a9b81c0bfd99800fabf352f69c7ccd46c5e43 --- /dev/null +++ b/checkpoint-35/modules.json @@ -0,0 +1,20 @@ +[ + { + "idx": 0, + "name": "0", + "path": "", + "type": "sentence_transformers.models.Transformer" + }, + { + "idx": 1, + "name": "1", + "path": "1_Pooling", + "type": "sentence_transformers.models.Pooling" + }, + { + "idx": 2, + "name": "2", + "path": "2_Normalize", + "type": "sentence_transformers.models.Normalize" + } +] \ No newline at end of file diff --git a/checkpoint-35/optimizer.pt b/checkpoint-35/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f592445dbe38f1011a21b4ee2afe0956d28e0f5e --- /dev/null +++ b/checkpoint-35/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c026b136a1e7d5fc0d68e1f5c9885274ee96bd6b327534f96178dde6c62f3d9 +size 4471067142 diff --git a/checkpoint-35/rng_state.pth b/checkpoint-35/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a7742c3bf98c08468355b3e55987386d102cdd2d --- /dev/null +++ b/checkpoint-35/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2e5f9980b1d32c44b03ed896bf9b5d0a406317fc3796843a35decde00b68a3b +size 14645 diff --git a/checkpoint-35/scheduler.pt b/checkpoint-35/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..69584372b88e835125666509ab4a8139fcd5c938 --- /dev/null +++ b/checkpoint-35/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a649381ddd14598478361c263c6e25680ae024d7e4ac70d033eba4fa322d038 +size 1465 diff --git a/checkpoint-35/sentence_bert_config.json b/checkpoint-35/sentence_bert_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4eca68d85ecd3034cf4174d8a4033a75344ea62d --- /dev/null +++ b/checkpoint-35/sentence_bert_config.json @@ -0,0 +1,4 @@ +{ + "max_seq_length": 512, + "do_lower_case": false +} \ No newline at end of file diff --git a/checkpoint-35/sentencepiece.bpe.model b/checkpoint-35/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-35/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-35/special_tokens_map.json b/checkpoint-35/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b1879d702821e753ffe4245048eee415d54a9385 --- /dev/null +++ b/checkpoint-35/special_tokens_map.json @@ -0,0 +1,51 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "cls_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "sep_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-35/tokenizer.json b/checkpoint-35/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..2a51933f1ccb3cf68d53b877cbfa24734ada642f --- /dev/null +++ b/checkpoint-35/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:883b037111086fd4dfebbbc9b7cee11e1517b5e0c0514879478661440f137085 +size 17082987 diff --git a/checkpoint-35/tokenizer_config.json b/checkpoint-35/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff4390b993a1a04254ff2e21bc6bee199cd6e32 --- /dev/null +++ b/checkpoint-35/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "mask_token": "", + "model_max_length": 512, + "pad_token": "", + "sep_token": "", + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/checkpoint-35/trainer_state.json b/checkpoint-35/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1cc91d5ed60e3753b8b3edc9862b135b896c5eee --- /dev/null +++ b/checkpoint-35/trainer_state.json @@ -0,0 +1,778 @@ +{ + "best_global_step": 14, + "best_metric": 0.5685354415901852, + "best_model_checkpoint": "multilingual-e5-large/checkpoint-14", + "epoch": 5.090909090909091, + "eval_steps": 500, + "global_step": 35, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.18181818181818182, + "grad_norm": 135.6220703125, + "learning_rate": 0.0, + "loss": 18.029, + "step": 1 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 140.5317840576172, + "learning_rate": 4.000000000000001e-06, + "loss": 19.4106, + "step": 2 + }, + { + "epoch": 0.5454545454545454, + "grad_norm": 142.2399444580078, + "learning_rate": 8.000000000000001e-06, + "loss": 16.6201, + "step": 3 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 110.63945770263672, + "learning_rate": 1.2e-05, + "loss": 15.3048, + "step": 4 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 129.43089294433594, + "learning_rate": 1.6000000000000003e-05, + "loss": 14.0182, + "step": 5 + }, + { + "epoch": 1.0, + "grad_norm": 108.25520324707031, + "learning_rate": 2e-05, + "loss": 6.4771, + "step": 6 + }, + { + "epoch": 1.0909090909090908, + "grad_norm": 95.05782318115234, + "learning_rate": 1.9975640502598243e-05, + "loss": 6.7664, + "step": 7 + }, + { + "epoch": 1.0909090909090908, + "eval_dim_1024_cosine_accuracy@1": 0.5714285714285714, + "eval_dim_1024_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_1024_cosine_accuracy@3": 0.5714285714285714, + "eval_dim_1024_cosine_accuracy@5": 0.6190476190476191, + "eval_dim_1024_cosine_map@100": 0.6946498352795571, + "eval_dim_1024_cosine_mrr@10": 0.5888888888888888, + "eval_dim_1024_cosine_ndcg@10": 0.6167053425672016, + "eval_dim_1024_cosine_precision@1": 0.5714285714285714, + "eval_dim_1024_cosine_precision@10": 0.4476190476190477, + "eval_dim_1024_cosine_precision@3": 0.5555555555555556, + "eval_dim_1024_cosine_precision@5": 0.5142857142857142, + "eval_dim_1024_cosine_recall@1": 0.10123626373626372, + "eval_dim_1024_cosine_recall@10": 0.5401404151404151, + "eval_dim_1024_cosine_recall@3": 0.2798992673992674, + "eval_dim_1024_cosine_recall@5": 0.3871336996336997, + "eval_dim_128_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_128_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_128_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_128_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_128_cosine_map@100": 0.5954946117864774, + "eval_dim_128_cosine_mrr@10": 0.49251700680272104, + "eval_dim_128_cosine_ndcg@10": 0.5277801377642622, + "eval_dim_128_cosine_precision@1": 0.47619047619047616, + "eval_dim_128_cosine_precision@10": 0.3761904761904762, + "eval_dim_128_cosine_precision@3": 0.4603174603174603, + "eval_dim_128_cosine_precision@5": 0.42857142857142855, + "eval_dim_128_cosine_recall@1": 0.08536324786324785, + "eval_dim_128_cosine_recall@10": 0.4806166056166057, + "eval_dim_128_cosine_recall@3": 0.23228021978021982, + "eval_dim_128_cosine_recall@5": 0.31967338217338215, + "eval_dim_256_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_256_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_256_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_256_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_256_cosine_map@100": 0.6218563539505393, + "eval_dim_256_cosine_mrr@10": 0.4925170068027211, + "eval_dim_256_cosine_ndcg@10": 0.5177339964253599, + "eval_dim_256_cosine_precision@1": 0.47619047619047616, + "eval_dim_256_cosine_precision@10": 0.3476190476190476, + "eval_dim_256_cosine_precision@3": 0.4603174603174603, + "eval_dim_256_cosine_precision@5": 0.419047619047619, + "eval_dim_256_cosine_recall@1": 0.09543650793650793, + "eval_dim_256_cosine_recall@10": 0.47817460317460325, + "eval_dim_256_cosine_recall@3": 0.2625, + "eval_dim_256_cosine_recall@5": 0.35813492063492064, + "eval_dim_512_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_512_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_512_cosine_map@100": 0.6608463504287858, + "eval_dim_512_cosine_mrr@10": 0.5333333333333333, + "eval_dim_512_cosine_ndcg@10": 0.5523687509163372, + "eval_dim_512_cosine_precision@1": 0.5238095238095238, + "eval_dim_512_cosine_precision@10": 0.3761904761904762, + "eval_dim_512_cosine_precision@3": 0.5079365079365079, + "eval_dim_512_cosine_precision@5": 0.4666666666666666, + "eval_dim_512_cosine_recall@1": 0.0994047619047619, + "eval_dim_512_cosine_recall@10": 0.5019841269841271, + "eval_dim_512_cosine_recall@3": 0.2744047619047619, + "eval_dim_512_cosine_recall@5": 0.37797619047619047, + "eval_dim_64_cosine_accuracy@1": 0.3333333333333333, + "eval_dim_64_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_64_cosine_accuracy@3": 0.38095238095238093, + "eval_dim_64_cosine_accuracy@5": 0.38095238095238093, + "eval_dim_64_cosine_map@100": 0.5579595177809107, + "eval_dim_64_cosine_mrr@10": 0.3764172335600907, + "eval_dim_64_cosine_ndcg@10": 0.41244392103396355, + "eval_dim_64_cosine_precision@1": 0.3333333333333333, + "eval_dim_64_cosine_precision@10": 0.2619047619047619, + "eval_dim_64_cosine_precision@3": 0.3492063492063492, + "eval_dim_64_cosine_precision@5": 0.3142857142857143, + "eval_dim_64_cosine_recall@1": 0.07063492063492063, + "eval_dim_64_cosine_recall@10": 0.42002442002442003, + "eval_dim_64_cosine_recall@3": 0.2357142857142857, + "eval_dim_64_cosine_recall@5": 0.34523809523809523, + "eval_dim_768_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_768_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_768_cosine_map@100": 0.6727109022414055, + "eval_dim_768_cosine_mrr@10": 0.5480725623582765, + "eval_dim_768_cosine_ndcg@10": 0.5820705880762242, + "eval_dim_768_cosine_precision@1": 0.5238095238095238, + "eval_dim_768_cosine_precision@10": 0.41904761904761906, + "eval_dim_768_cosine_precision@3": 0.5079365079365079, + "eval_dim_768_cosine_precision@5": 0.4666666666666666, + "eval_dim_768_cosine_recall@1": 0.09726800976800977, + "eval_dim_768_cosine_recall@10": 0.5163308913308913, + "eval_dim_768_cosine_recall@3": 0.2679945054945055, + "eval_dim_768_cosine_recall@5": 0.36729242979242976, + "eval_runtime": 7.0357, + "eval_samples_per_second": 0.0, + "eval_sequential_score": 0.41244392103396355, + "eval_steps_per_second": 0.0, + "step": 7 + }, + { + "epoch": 1.1818181818181819, + "grad_norm": 159.73828125, + "learning_rate": 1.9902680687415704e-05, + "loss": 11.8583, + "step": 8 + }, + { + "epoch": 1.3636363636363638, + "grad_norm": 134.70603942871094, + "learning_rate": 1.9781476007338058e-05, + "loss": 11.9216, + "step": 9 + }, + { + "epoch": 1.5454545454545454, + "grad_norm": 139.73654174804688, + "learning_rate": 1.961261695938319e-05, + "loss": 13.3764, + "step": 10 + }, + { + "epoch": 1.7272727272727273, + "grad_norm": 173.23065185546875, + "learning_rate": 1.9396926207859085e-05, + "loss": 12.9063, + "step": 11 + }, + { + "epoch": 1.9090909090909092, + "grad_norm": 208.78482055664062, + "learning_rate": 1.913545457642601e-05, + "loss": 13.5984, + "step": 12 + }, + { + "epoch": 2.0, + "grad_norm": 298.82183837890625, + "learning_rate": 1.8829475928589272e-05, + "loss": 7.8523, + "step": 13 + }, + { + "epoch": 2.090909090909091, + "grad_norm": 156.5718994140625, + "learning_rate": 1.848048096156426e-05, + "loss": 4.4487, + "step": 14 + }, + { + "epoch": 2.090909090909091, + "eval_dim_1024_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_1024_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_1024_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_1024_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_1024_cosine_map@100": 0.67423207909377, + "eval_dim_1024_cosine_mrr@10": 0.5480725623582765, + "eval_dim_1024_cosine_ndcg@10": 0.5921167294151266, + "eval_dim_1024_cosine_precision@1": 0.5238095238095238, + "eval_dim_1024_cosine_precision@10": 0.4476190476190477, + "eval_dim_1024_cosine_precision@3": 0.5079365079365079, + "eval_dim_1024_cosine_precision@5": 0.47619047619047616, + "eval_dim_1024_cosine_recall@1": 0.08933150183150182, + "eval_dim_1024_cosine_recall@10": 0.5401404151404151, + "eval_dim_1024_cosine_recall@3": 0.24418498168498168, + "eval_dim_1024_cosine_recall@5": 0.33951465201465203, + "eval_dim_128_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_128_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_128_cosine_map@100": 0.6489604480560528, + "eval_dim_128_cosine_mrr@10": 0.5401360544217686, + "eval_dim_128_cosine_ndcg@10": 0.5685354415901852, + "eval_dim_128_cosine_precision@1": 0.5238095238095238, + "eval_dim_128_cosine_precision@10": 0.4238095238095239, + "eval_dim_128_cosine_precision@3": 0.5238095238095238, + "eval_dim_128_cosine_precision@5": 0.5047619047619047, + "eval_dim_128_cosine_recall@1": 0.07345848595848595, + "eval_dim_128_cosine_recall@10": 0.5202991452991453, + "eval_dim_128_cosine_recall@3": 0.2203754578754579, + "eval_dim_128_cosine_recall@5": 0.34745115995116, + "eval_dim_256_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_256_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_256_cosine_map@100": 0.651530364911684, + "eval_dim_256_cosine_mrr@10": 0.5401360544217686, + "eval_dim_256_cosine_ndcg@10": 0.5708936958722651, + "eval_dim_256_cosine_precision@1": 0.5238095238095238, + "eval_dim_256_cosine_precision@10": 0.4238095238095239, + "eval_dim_256_cosine_precision@3": 0.5079365079365079, + "eval_dim_256_cosine_precision@5": 0.49523809523809514, + "eval_dim_256_cosine_recall@1": 0.0813949938949939, + "eval_dim_256_cosine_recall@10": 0.5202991452991453, + "eval_dim_256_cosine_recall@3": 0.22037545787545787, + "eval_dim_256_cosine_recall@5": 0.33951465201465203, + "eval_dim_512_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_512_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_512_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_512_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_512_cosine_map@100": 0.6265911712939339, + "eval_dim_512_cosine_mrr@10": 0.5020408163265305, + "eval_dim_512_cosine_ndcg@10": 0.5518338753600308, + "eval_dim_512_cosine_precision@1": 0.47619047619047616, + "eval_dim_512_cosine_precision@10": 0.4238095238095239, + "eval_dim_512_cosine_precision@3": 0.4603174603174603, + "eval_dim_512_cosine_precision@5": 0.45714285714285713, + "eval_dim_512_cosine_recall@1": 0.07345848595848595, + "eval_dim_512_cosine_recall@10": 0.5202991452991453, + "eval_dim_512_cosine_recall@3": 0.19656593406593406, + "eval_dim_512_cosine_recall@5": 0.3077686202686203, + "eval_dim_64_cosine_accuracy@1": 0.42857142857142855, + "eval_dim_64_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_64_cosine_accuracy@3": 0.42857142857142855, + "eval_dim_64_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_64_cosine_map@100": 0.5888462989137369, + "eval_dim_64_cosine_mrr@10": 0.45963718820861665, + "eval_dim_64_cosine_ndcg@10": 0.51131642091388, + "eval_dim_64_cosine_precision@1": 0.42857142857142855, + "eval_dim_64_cosine_precision@10": 0.3999999999999999, + "eval_dim_64_cosine_precision@3": 0.42857142857142855, + "eval_dim_64_cosine_precision@5": 0.42857142857142855, + "eval_dim_64_cosine_recall@1": 0.053617216117216114, + "eval_dim_64_cosine_recall@10": 0.5004578754578755, + "eval_dim_64_cosine_recall@3": 0.16085164835164836, + "eval_dim_64_cosine_recall@5": 0.27205433455433453, + "eval_dim_768_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_768_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@5": 0.5714285714285714, + "eval_dim_768_cosine_map@100": 0.67423207909377, + "eval_dim_768_cosine_mrr@10": 0.5480725623582765, + "eval_dim_768_cosine_ndcg@10": 0.5921167294151266, + "eval_dim_768_cosine_precision@1": 0.5238095238095238, + "eval_dim_768_cosine_precision@10": 0.4476190476190477, + "eval_dim_768_cosine_precision@3": 0.5079365079365079, + "eval_dim_768_cosine_precision@5": 0.47619047619047616, + "eval_dim_768_cosine_recall@1": 0.08933150183150182, + "eval_dim_768_cosine_recall@10": 0.5401404151404151, + "eval_dim_768_cosine_recall@3": 0.24418498168498168, + "eval_dim_768_cosine_recall@5": 0.33951465201465203, + "eval_runtime": 6.9723, + "eval_samples_per_second": 0.0, + "eval_sequential_score": 0.51131642091388, + "eval_steps_per_second": 0.0, + "step": 14 + }, + { + "epoch": 2.1818181818181817, + "grad_norm": 156.31887817382812, + "learning_rate": 1.8090169943749477e-05, + "loss": 8.5374, + "step": 15 + }, + { + "epoch": 2.3636363636363638, + "grad_norm": 205.52923583984375, + "learning_rate": 1.766044443118978e-05, + "loss": 9.6999, + "step": 16 + }, + { + "epoch": 2.5454545454545454, + "grad_norm": 254.7772216796875, + "learning_rate": 1.7193398003386514e-05, + "loss": 9.0121, + "step": 17 + }, + { + "epoch": 2.7272727272727275, + "grad_norm": 476.75164794921875, + "learning_rate": 1.6691306063588583e-05, + "loss": 13.5705, + "step": 18 + }, + { + "epoch": 2.909090909090909, + "grad_norm": 376.09954833984375, + "learning_rate": 1.6156614753256583e-05, + "loss": 13.0195, + "step": 19 + }, + { + "epoch": 3.0, + "grad_norm": 387.24322509765625, + "learning_rate": 1.5591929034707468e-05, + "loss": 7.9821, + "step": 20 + }, + { + "epoch": 3.090909090909091, + "grad_norm": 210.42813110351562, + "learning_rate": 1.5000000000000002e-05, + "loss": 3.2842, + "step": 21 + }, + { + "epoch": 3.090909090909091, + "eval_dim_1024_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_1024_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_1024_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_1024_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_1024_cosine_map@100": 0.616370861587526, + "eval_dim_1024_cosine_mrr@10": 0.49092970521541945, + "eval_dim_1024_cosine_ndcg@10": 0.5159384546892658, + "eval_dim_1024_cosine_precision@1": 0.47619047619047616, + "eval_dim_1024_cosine_precision@10": 0.4, + "eval_dim_1024_cosine_precision@3": 0.4603174603174603, + "eval_dim_1024_cosine_precision@5": 0.419047619047619, + "eval_dim_1024_cosine_recall@1": 0.07822039072039072, + "eval_dim_1024_cosine_recall@10": 0.4449023199023199, + "eval_dim_1024_cosine_recall@3": 0.21085164835164832, + "eval_dim_1024_cosine_recall@5": 0.27602258852258854, + "eval_dim_128_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_128_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_128_cosine_map@100": 0.6278310491545506, + "eval_dim_128_cosine_mrr@10": 0.5306122448979591, + "eval_dim_128_cosine_ndcg@10": 0.5468399582764966, + "eval_dim_128_cosine_precision@1": 0.5238095238095238, + "eval_dim_128_cosine_precision@10": 0.4238095238095239, + "eval_dim_128_cosine_precision@3": 0.5079365079365079, + "eval_dim_128_cosine_precision@5": 0.4666666666666666, + "eval_dim_128_cosine_recall@1": 0.0811965811965812, + "eval_dim_128_cosine_recall@10": 0.4548229548229548, + "eval_dim_128_cosine_recall@3": 0.21978021978021975, + "eval_dim_128_cosine_recall@5": 0.2909035409035409, + "eval_dim_256_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_256_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_256_cosine_map@100": 0.6295752260160459, + "eval_dim_256_cosine_mrr@10": 0.5306122448979591, + "eval_dim_256_cosine_ndcg@10": 0.5468399582764966, + "eval_dim_256_cosine_precision@1": 0.5238095238095238, + "eval_dim_256_cosine_precision@10": 0.4238095238095239, + "eval_dim_256_cosine_precision@3": 0.5079365079365079, + "eval_dim_256_cosine_precision@5": 0.4666666666666666, + "eval_dim_256_cosine_recall@1": 0.0811965811965812, + "eval_dim_256_cosine_recall@10": 0.4548229548229548, + "eval_dim_256_cosine_recall@3": 0.21978021978021975, + "eval_dim_256_cosine_recall@5": 0.2909035409035409, + "eval_dim_512_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_512_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_512_cosine_map@100": 0.6298362122328386, + "eval_dim_512_cosine_mrr@10": 0.5306122448979591, + "eval_dim_512_cosine_ndcg@10": 0.5468399582764966, + "eval_dim_512_cosine_precision@1": 0.5238095238095238, + "eval_dim_512_cosine_precision@10": 0.4238095238095239, + "eval_dim_512_cosine_precision@3": 0.5079365079365079, + "eval_dim_512_cosine_precision@5": 0.4666666666666666, + "eval_dim_512_cosine_recall@1": 0.0811965811965812, + "eval_dim_512_cosine_recall@10": 0.4548229548229548, + "eval_dim_512_cosine_recall@3": 0.21978021978021975, + "eval_dim_512_cosine_recall@5": 0.2909035409035409, + "eval_dim_64_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_64_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_64_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_64_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_64_cosine_map@100": 0.6058793096030948, + "eval_dim_64_cosine_mrr@10": 0.49251700680272104, + "eval_dim_64_cosine_ndcg@10": 0.5232746482532176, + "eval_dim_64_cosine_precision@1": 0.47619047619047616, + "eval_dim_64_cosine_precision@10": 0.4238095238095238, + "eval_dim_64_cosine_precision@3": 0.4603174603174603, + "eval_dim_64_cosine_precision@5": 0.4476190476190475, + "eval_dim_64_cosine_recall@1": 0.06631562881562882, + "eval_dim_64_cosine_recall@10": 0.46474358974358976, + "eval_dim_64_cosine_recall@3": 0.17513736263736263, + "eval_dim_64_cosine_recall@5": 0.2641178266178266, + "eval_dim_768_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_768_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_768_cosine_map@100": 0.6486454426450273, + "eval_dim_768_cosine_mrr@10": 0.538548752834467, + "eval_dim_768_cosine_ndcg@10": 0.5635575023083134, + "eval_dim_768_cosine_precision@1": 0.5238095238095238, + "eval_dim_768_cosine_precision@10": 0.4476190476190477, + "eval_dim_768_cosine_precision@3": 0.5079365079365079, + "eval_dim_768_cosine_precision@5": 0.4666666666666666, + "eval_dim_768_cosine_recall@1": 0.0811965811965812, + "eval_dim_768_cosine_recall@10": 0.47466422466422464, + "eval_dim_768_cosine_recall@3": 0.21978021978021975, + "eval_dim_768_cosine_recall@5": 0.2909035409035409, + "eval_runtime": 7.0685, + "eval_samples_per_second": 0.0, + "eval_sequential_score": 0.5232746482532176, + "eval_steps_per_second": 0.0, + "step": 21 + }, + { + "epoch": 3.1818181818181817, + "grad_norm": 214.07061767578125, + "learning_rate": 1.4383711467890776e-05, + "loss": 4.4446, + "step": 22 + }, + { + "epoch": 3.3636363636363638, + "grad_norm": 234.80874633789062, + "learning_rate": 1.3746065934159123e-05, + "loss": 5.7244, + "step": 23 + }, + { + "epoch": 3.5454545454545454, + "grad_norm": 337.2349853515625, + "learning_rate": 1.3090169943749475e-05, + "loss": 7.1394, + "step": 24 + }, + { + "epoch": 3.7272727272727275, + "grad_norm": 562.9892578125, + "learning_rate": 1.2419218955996677e-05, + "loss": 16.7583, + "step": 25 + }, + { + "epoch": 3.909090909090909, + "grad_norm": 1042.4781494140625, + "learning_rate": 1.1736481776669307e-05, + "loss": 11.3515, + "step": 26 + }, + { + "epoch": 4.0, + "grad_norm": 663.6896362304688, + "learning_rate": 1.1045284632676535e-05, + "loss": 8.813, + "step": 27 + }, + { + "epoch": 4.090909090909091, + "grad_norm": 799.2377319335938, + "learning_rate": 1.0348994967025012e-05, + "loss": 6.9124, + "step": 28 + }, + { + "epoch": 4.090909090909091, + "eval_dim_1024_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_1024_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_1024_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_1024_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_1024_cosine_map@100": 0.6149109740313521, + "eval_dim_1024_cosine_mrr@10": 0.49092970521541945, + "eval_dim_1024_cosine_ndcg@10": 0.5159384546892658, + "eval_dim_1024_cosine_precision@1": 0.47619047619047616, + "eval_dim_1024_cosine_precision@10": 0.4, + "eval_dim_1024_cosine_precision@3": 0.4603174603174603, + "eval_dim_1024_cosine_precision@5": 0.419047619047619, + "eval_dim_1024_cosine_recall@1": 0.07822039072039072, + "eval_dim_1024_cosine_recall@10": 0.4449023199023199, + "eval_dim_1024_cosine_recall@3": 0.21085164835164832, + "eval_dim_1024_cosine_recall@5": 0.27602258852258854, + "eval_dim_128_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_128_cosine_accuracy@10": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_128_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_128_cosine_map@100": 0.6025310247157158, + "eval_dim_128_cosine_mrr@10": 0.48299319727891155, + "eval_dim_128_cosine_ndcg@10": 0.49922091065744895, + "eval_dim_128_cosine_precision@1": 0.47619047619047616, + "eval_dim_128_cosine_precision@10": 0.3761904761904762, + "eval_dim_128_cosine_precision@3": 0.4603174603174603, + "eval_dim_128_cosine_precision@5": 0.419047619047619, + "eval_dim_128_cosine_recall@1": 0.07822039072039072, + "eval_dim_128_cosine_recall@10": 0.42506105006105005, + "eval_dim_128_cosine_recall@3": 0.21085164835164832, + "eval_dim_128_cosine_recall@5": 0.27602258852258854, + "eval_dim_256_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_256_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_256_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_256_cosine_map@100": 0.6411393184007045, + "eval_dim_256_cosine_mrr@10": 0.5306122448979591, + "eval_dim_256_cosine_ndcg@10": 0.5468399582764966, + "eval_dim_256_cosine_precision@1": 0.5238095238095238, + "eval_dim_256_cosine_precision@10": 0.4238095238095239, + "eval_dim_256_cosine_precision@3": 0.5079365079365079, + "eval_dim_256_cosine_precision@5": 0.4666666666666666, + "eval_dim_256_cosine_recall@1": 0.08005189255189255, + "eval_dim_256_cosine_recall@10": 0.4433760683760684, + "eval_dim_256_cosine_recall@3": 0.21634615384615385, + "eval_dim_256_cosine_recall@5": 0.28518009768009767, + "eval_dim_512_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_512_cosine_accuracy@10": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_512_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_512_cosine_map@100": 0.5978106306698094, + "eval_dim_512_cosine_mrr@10": 0.48299319727891155, + "eval_dim_512_cosine_ndcg@10": 0.49922091065744895, + "eval_dim_512_cosine_precision@1": 0.47619047619047616, + "eval_dim_512_cosine_precision@10": 0.3761904761904762, + "eval_dim_512_cosine_precision@3": 0.4603174603174603, + "eval_dim_512_cosine_precision@5": 0.419047619047619, + "eval_dim_512_cosine_recall@1": 0.07822039072039072, + "eval_dim_512_cosine_recall@10": 0.42506105006105005, + "eval_dim_512_cosine_recall@3": 0.21085164835164832, + "eval_dim_512_cosine_recall@5": 0.27602258852258854, + "eval_dim_64_cosine_accuracy@1": 0.47619047619047616, + "eval_dim_64_cosine_accuracy@10": 0.5238095238095238, + "eval_dim_64_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_64_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_64_cosine_map@100": 0.5960251374266525, + "eval_dim_64_cosine_mrr@10": 0.48299319727891155, + "eval_dim_64_cosine_ndcg@10": 0.49922091065744895, + "eval_dim_64_cosine_precision@1": 0.47619047619047616, + "eval_dim_64_cosine_precision@10": 0.3761904761904762, + "eval_dim_64_cosine_precision@3": 0.4603174603174603, + "eval_dim_64_cosine_precision@5": 0.419047619047619, + "eval_dim_64_cosine_recall@1": 0.07822039072039072, + "eval_dim_64_cosine_recall@10": 0.42506105006105005, + "eval_dim_64_cosine_recall@3": 0.21085164835164832, + "eval_dim_64_cosine_recall@5": 0.27602258852258854, + "eval_dim_768_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_768_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_768_cosine_map@100": 0.6351788392177582, + "eval_dim_768_cosine_mrr@10": 0.5306122448979591, + "eval_dim_768_cosine_ndcg@10": 0.5468399582764966, + "eval_dim_768_cosine_precision@1": 0.5238095238095238, + "eval_dim_768_cosine_precision@10": 0.4238095238095238, + "eval_dim_768_cosine_precision@3": 0.5079365079365079, + "eval_dim_768_cosine_precision@5": 0.4666666666666666, + "eval_dim_768_cosine_recall@1": 0.08218864468864469, + "eval_dim_768_cosine_recall@10": 0.46474358974358976, + "eval_dim_768_cosine_recall@3": 0.22275641025641024, + "eval_dim_768_cosine_recall@5": 0.2958638583638584, + "eval_runtime": 7.0378, + "eval_samples_per_second": 0.0, + "eval_sequential_score": 0.49922091065744895, + "eval_steps_per_second": 0.0, + "step": 28 + }, + { + "epoch": 4.181818181818182, + "grad_norm": 278.7558898925781, + "learning_rate": 9.651005032974994e-06, + "loss": 6.1814, + "step": 29 + }, + { + "epoch": 4.363636363636363, + "grad_norm": 312.5499572753906, + "learning_rate": 8.954715367323468e-06, + "loss": 7.1606, + "step": 30 + }, + { + "epoch": 4.545454545454545, + "grad_norm": 164.37811279296875, + "learning_rate": 8.263518223330698e-06, + "loss": 5.0888, + "step": 31 + }, + { + "epoch": 4.7272727272727275, + "grad_norm": 288.98602294921875, + "learning_rate": 7.580781044003324e-06, + "loss": 5.0684, + "step": 32 + }, + { + "epoch": 4.909090909090909, + "grad_norm": 384.2759704589844, + "learning_rate": 6.909830056250527e-06, + "loss": 6.7382, + "step": 33 + }, + { + "epoch": 5.0, + "grad_norm": 906.340576171875, + "learning_rate": 6.25393406584088e-06, + "loss": 7.0497, + "step": 34 + }, + { + "epoch": 5.090909090909091, + "grad_norm": 403.593017578125, + "learning_rate": 5.616288532109225e-06, + "loss": 6.582, + "step": 35 + }, + { + "epoch": 5.090909090909091, + "eval_dim_1024_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_1024_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_1024_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_1024_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_1024_cosine_map@100": 0.6534286699882501, + "eval_dim_1024_cosine_mrr@10": 0.5374149659863945, + "eval_dim_1024_cosine_ndcg@10": 0.5598242514045669, + "eval_dim_1024_cosine_precision@1": 0.5238095238095238, + "eval_dim_1024_cosine_precision@10": 0.4428571428571429, + "eval_dim_1024_cosine_precision@3": 0.5079365079365079, + "eval_dim_1024_cosine_precision@5": 0.4666666666666666, + "eval_dim_1024_cosine_recall@1": 0.08218864468864469, + "eval_dim_1024_cosine_recall@10": 0.4766483516483517, + "eval_dim_1024_cosine_recall@3": 0.22275641025641024, + "eval_dim_1024_cosine_recall@5": 0.2958638583638584, + "eval_dim_128_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_128_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_128_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_128_cosine_map@100": 0.6478872365910466, + "eval_dim_128_cosine_mrr@10": 0.5374149659863945, + "eval_dim_128_cosine_ndcg@10": 0.5598242514045669, + "eval_dim_128_cosine_precision@1": 0.5238095238095238, + "eval_dim_128_cosine_precision@10": 0.4428571428571429, + "eval_dim_128_cosine_precision@3": 0.5079365079365079, + "eval_dim_128_cosine_precision@5": 0.4666666666666666, + "eval_dim_128_cosine_recall@1": 0.0811965811965812, + "eval_dim_128_cosine_recall@10": 0.46672771672771673, + "eval_dim_128_cosine_recall@3": 0.21978021978021975, + "eval_dim_128_cosine_recall@5": 0.2909035409035409, + "eval_dim_256_cosine_accuracy@1": 0.6190476190476191, + "eval_dim_256_cosine_accuracy@10": 0.6666666666666666, + "eval_dim_256_cosine_accuracy@3": 0.6190476190476191, + "eval_dim_256_cosine_accuracy@5": 0.6190476190476191, + "eval_dim_256_cosine_map@100": 0.6975707466438095, + "eval_dim_256_cosine_mrr@10": 0.6258503401360545, + "eval_dim_256_cosine_ndcg@10": 0.6420780535145918, + "eval_dim_256_cosine_precision@1": 0.6190476190476191, + "eval_dim_256_cosine_precision@10": 0.5190476190476192, + "eval_dim_256_cosine_precision@3": 0.6031746031746031, + "eval_dim_256_cosine_precision@5": 0.5619047619047619, + "eval_dim_256_cosine_recall@1": 0.08600427350427349, + "eval_dim_256_cosine_recall@10": 0.5028998778998779, + "eval_dim_256_cosine_recall@3": 0.2342032967032967, + "eval_dim_256_cosine_recall@5": 0.31494200244200243, + "eval_dim_512_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_512_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_512_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_512_cosine_map@100": 0.6492208787775379, + "eval_dim_512_cosine_mrr@10": 0.5374149659863945, + "eval_dim_512_cosine_ndcg@10": 0.5598242514045669, + "eval_dim_512_cosine_precision@1": 0.5238095238095238, + "eval_dim_512_cosine_precision@10": 0.4428571428571429, + "eval_dim_512_cosine_precision@3": 0.5079365079365079, + "eval_dim_512_cosine_precision@5": 0.4666666666666666, + "eval_dim_512_cosine_recall@1": 0.08218864468864469, + "eval_dim_512_cosine_recall@10": 0.4766483516483517, + "eval_dim_512_cosine_recall@3": 0.22275641025641024, + "eval_dim_512_cosine_recall@5": 0.2958638583638584, + "eval_dim_64_cosine_accuracy@1": 0.42857142857142855, + "eval_dim_64_cosine_accuracy@10": 0.5714285714285714, + "eval_dim_64_cosine_accuracy@3": 0.47619047619047616, + "eval_dim_64_cosine_accuracy@5": 0.47619047619047616, + "eval_dim_64_cosine_map@100": 0.5872011588310861, + "eval_dim_64_cosine_mrr@10": 0.45804988662131524, + "eval_dim_64_cosine_ndcg@10": 0.4917595713548203, + "eval_dim_64_cosine_precision@1": 0.42857142857142855, + "eval_dim_64_cosine_precision@10": 0.3952380952380953, + "eval_dim_64_cosine_precision@3": 0.4444444444444445, + "eval_dim_64_cosine_precision@5": 0.419047619047619, + "eval_dim_64_cosine_recall@1": 0.054410866910866905, + "eval_dim_64_cosine_recall@10": 0.43696581196581197, + "eval_dim_64_cosine_recall@3": 0.18704212454212454, + "eval_dim_64_cosine_recall@5": 0.27602258852258854, + "eval_dim_768_cosine_accuracy@1": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@10": 0.6190476190476191, + "eval_dim_768_cosine_accuracy@3": 0.5238095238095238, + "eval_dim_768_cosine_accuracy@5": 0.5238095238095238, + "eval_dim_768_cosine_map@100": 0.653075337994289, + "eval_dim_768_cosine_mrr@10": 0.5374149659863945, + "eval_dim_768_cosine_ndcg@10": 0.5598242514045669, + "eval_dim_768_cosine_precision@1": 0.5238095238095238, + "eval_dim_768_cosine_precision@10": 0.4428571428571429, + "eval_dim_768_cosine_precision@3": 0.5079365079365079, + "eval_dim_768_cosine_precision@5": 0.4666666666666666, + "eval_dim_768_cosine_recall@1": 0.08218864468864469, + "eval_dim_768_cosine_recall@10": 0.4766483516483517, + "eval_dim_768_cosine_recall@3": 0.22275641025641024, + "eval_dim_768_cosine_recall@5": 0.2958638583638584, + "eval_runtime": 7.0134, + "eval_samples_per_second": 0.0, + "eval_sequential_score": 0.4917595713548203, + "eval_steps_per_second": 0.0, + "step": 35 + } + ], + "logging_steps": 1, + "max_steps": 50, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 3 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-35/training_args.bin b/checkpoint-35/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..8da3332899cbb380f0ea340bef3e2069d6cf1db2 --- /dev/null +++ b/checkpoint-35/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14debc6c3f8c5edee5db8d97a3a78a007d313a13e4b96f43026da543b59bef8c +size 6097 diff --git a/config_sentence_transformers.json b/config_sentence_transformers.json index c218704c24acf1071c8da1f913b30934d1945cf7..a0390e7f92afe1fdc0e00e1712f50de56290fde6 100644 --- a/config_sentence_transformers.json +++ b/config_sentence_transformers.json @@ -1,7 +1,7 @@ { "model_type": "SentenceTransformer", "__version__": { - "sentence_transformers": "5.1.0", + "sentence_transformers": "5.1.1", "transformers": "4.51.3", "pytorch": "2.8.0+cu126" }, diff --git a/eval/Information-Retrieval_evaluation_dim_1024_results.csv b/eval/Information-Retrieval_evaluation_dim_1024_results.csv index 9b698a9a48038a9eaa521e9a1ef1eb4ead176218..46b85290e4b05f59342b6e53415772f7b0f319f9 100644 --- a/eval/Information-Retrieval_evaluation_dim_1024_results.csv +++ b/eval/Information-Retrieval_evaluation_dim_1024_results.csv @@ -1,6 +1,11 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100 -1.0,13,0.42509603072983354,0.43085787451984636,0.46030729833546735,0.49295774647887325,0.42509603072983354,0.04813624827530403,0.42402902262057185,0.1404092551990217,0.4133162612035851,0.21147403835662126,0.376056338028169,0.3150476790176975,0.4369535394183281,0.4493804868009024,0.508986346237021 -2.0,26,0.3854033290653009,0.3886043533930858,0.41101152368758004,0.45454545454545453,0.3854033290653009,0.0451686782205082,0.3839095177123346,0.13200270380544088,0.3713188220230474,0.19735476247586825,0.3357874519846351,0.2918225395020212,0.39624870434729537,0.4065329035406967,0.47259820099791205 -3.0,39,0.4180537772087068,0.42189500640204863,0.4500640204865557,0.49039692701664533,0.4180537772087068,0.04734422767263886,0.4163465642338881,0.138064233626932,0.4043533930857874,0.2069980430010945,0.36869398207426374,0.308531274625124,0.42985717334308843,0.44178501209869797,0.506794523350519 -4.0,52,0.4046094750320102,0.4090909090909091,0.43918053777208704,0.47823303457106275,0.4046094750320102,0.04660606778759063,0.40311566367904395,0.13592596883673455,0.39180537772087065,0.20379253880710943,0.3565941101152369,0.3014663479400921,0.4168417068064951,0.42913551824501384,0.490599623630225 -5.0,65,0.3994878361075544,0.40973111395646605,0.44238156209987195,0.4814340588988476,0.3994878361075544,0.046427253250942845,0.3994878361075544,0.1357642561296911,0.3897567221510884,0.20380194680388578,0.3562099871959027,0.30156152834977973,0.41413607910086736,0.42824330702763147,0.4960353530991054 +1.0909090909090908,7,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.6666666666666666,0.5238095238095238,0.09726800976800977,0.5079365079365079,0.2679945054945055,0.4666666666666666,0.36729242979242976,0.4238095238095239,0.5202991452991453,0.5492063492063491,0.5858038389799708,0.6850426678198013 +2.090909090909091,14,0.42857142857142855,0.42857142857142855,0.47619047619047616,0.6190476190476191,0.42857142857142855,0.07583943833943833,0.4126984126984127,0.2037087912087912,0.38095238095238093,0.27205433455433453,0.4,0.47268009768009767,0.46077097505668924,0.5135961782088483,0.6061506408563693 +3.090909090909091,21,0.5714285714285714,0.5714285714285714,0.5714285714285714,0.6666666666666666,0.5714285714285714,0.08417277167277167,0.5555555555555556,0.2287087912087912,0.5142857142857142,0.3057844932844933,0.49523809523809537,0.5044261294261294,0.5861678004535147,0.611176549927361,0.6886543052381489 +4.090909090909091,28,0.47619047619047616,0.47619047619047616,0.47619047619047616,0.5714285714285714,0.47619047619047616,0.07822039072039072,0.4603174603174603,0.21085164835164832,0.419047619047619,0.27602258852258854,0.3952380952380953,0.43696581196581197,0.48979591836734687,0.5122052037855193,0.6175289391668508 +5.090909090909091,35,0.47619047619047616,0.47619047619047616,0.47619047619047616,0.5714285714285714,0.47619047619047616,0.07822039072039072,0.4603174603174603,0.21085164835164832,0.419047619047619,0.27602258852258854,0.3952380952380953,0.43696581196581197,0.48979591836734687,0.5122052037855193,0.6084856212741883 +1.0909090909090908,7,0.5714285714285714,0.5714285714285714,0.6190476190476191,0.6666666666666666,0.5714285714285714,0.10123626373626372,0.5555555555555556,0.2798992673992674,0.5142857142857142,0.3871336996336997,0.4476190476190477,0.5401404151404151,0.5888888888888888,0.6167053425672016,0.6946498352795571 +2.090909090909091,14,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.6666666666666666,0.5238095238095238,0.08933150183150182,0.5079365079365079,0.24418498168498168,0.47619047619047616,0.33951465201465203,0.4476190476190477,0.5401404151404151,0.5480725623582765,0.5921167294151266,0.67423207909377 +3.090909090909091,21,0.47619047619047616,0.47619047619047616,0.47619047619047616,0.5714285714285714,0.47619047619047616,0.07822039072039072,0.4603174603174603,0.21085164835164832,0.419047619047619,0.27602258852258854,0.4,0.4449023199023199,0.49092970521541945,0.5159384546892658,0.616370861587526 +4.090909090909091,28,0.47619047619047616,0.47619047619047616,0.47619047619047616,0.5714285714285714,0.47619047619047616,0.07822039072039072,0.4603174603174603,0.21085164835164832,0.419047619047619,0.27602258852258854,0.4,0.4449023199023199,0.49092970521541945,0.5159384546892658,0.6149109740313521 +5.090909090909091,35,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.6190476190476191,0.5238095238095238,0.08218864468864469,0.5079365079365079,0.22275641025641024,0.4666666666666666,0.2958638583638584,0.4428571428571429,0.4766483516483517,0.5374149659863945,0.5598242514045669,0.6534286699882501 diff --git a/eval/Information-Retrieval_evaluation_dim_128_results.csv b/eval/Information-Retrieval_evaluation_dim_128_results.csv index 4c9dced263e4f8279d7d4c9869a5f74cb32c3d24..6d00a1037c7b891170761ca621f3ff9d0679890d 100644 --- a/eval/Information-Retrieval_evaluation_dim_128_results.csv +++ b/eval/Information-Retrieval_evaluation_dim_128_results.csv @@ -1,6 +1,11 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100 -1.0,13,0.31370038412291934,0.31946222791293216,0.34571062740076824,0.38156209987195905,0.31370038412291934,0.037933385936292925,0.31284677763551,0.11100168654049715,0.30422535211267604,0.1670926413559068,0.2764404609475032,0.24798465115464788,0.32517427799117904,0.33618283481296385,0.3991669558616212 -2.0,26,0.31562099871959026,0.3207426376440461,0.352112676056338,0.39180537772087065,0.31562099871959026,0.03888088094516579,0.31455399061032857,0.11341113256173359,0.30640204865556975,0.17046484922169536,0.280729833546735,0.2550464126190524,0.32839105339105285,0.34177784905202935,0.40471008477532144 -3.0,39,0.34699103713188223,0.35147247119078107,0.37900128040973113,0.4212548015364917,0.34699103713188223,0.04217848526747235,0.34571062740076824,0.12288848071178002,0.3352112676056338,0.18344012521240954,0.3042893725992318,0.2706183968947745,0.35916305916305874,0.37139192897263235,0.4338211299784659 -4.0,52,0.34314980793854033,0.34763124199743917,0.37708066581306016,0.4148527528809219,0.34314980793854033,0.040652627677675406,0.3418693982074264,0.1184519992454523,0.3322663252240717,0.1774839552040709,0.302752880921895,0.26330336421247125,0.35513180090644836,0.36765573601526275,0.4276696379169563 -5.0,65,0.34699103713188223,0.3495518565941101,0.37900128040973113,0.41613316261203587,0.34699103713188223,0.04023580723646711,0.34528382415706355,0.11713599313797275,0.33533930857874517,0.17553870970060761,0.30550576184379,0.2604238007608957,0.35818293599983697,0.36965214083548276,0.4291315309956839 +1.0909090909090908,7,0.42857142857142855,0.42857142857142855,0.47619047619047616,0.5714285714285714,0.42857142857142855,0.08353174603174603,0.4126984126984127,0.22678571428571428,0.38095238095238093,0.310515873015873,0.35238095238095235,0.48214285714285715,0.4528344671201814,0.4968786341770315,0.5805720802319178 +2.090909090909091,14,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.6666666666666666,0.5238095238095238,0.07460317460317459,0.5238095238095238,0.22380952380952382,0.5047619047619047,0.3531746031746032,0.4476190476190477,0.540903540903541,0.5480725623582766,0.585252985622002,0.6586958248111269 +3.090909090909091,21,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.5238095238095238,0.08218864468864469,0.5079365079365079,0.22275641025641024,0.4666666666666666,0.2958638583638584,0.4238095238095238,0.46474358974358976,0.5306122448979591,0.5468399582764966,0.6472797651120922 +4.090909090909091,28,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.5238095238095238,0.0811965811965812,0.5079365079365079,0.21978021978021975,0.4666666666666666,0.2909035409035409,0.4238095238095239,0.4548229548229548,0.5306122448979591,0.5468399582764966,0.6471946786146459 +5.090909090909091,35,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.5238095238095238,0.0811965811965812,0.5079365079365079,0.21978021978021975,0.4666666666666666,0.2909035409035409,0.4238095238095239,0.4548229548229548,0.5306122448979591,0.5468399582764966,0.6401872980411513 +1.0909090909090908,7,0.47619047619047616,0.47619047619047616,0.5238095238095238,0.5714285714285714,0.47619047619047616,0.08536324786324785,0.4603174603174603,0.23228021978021982,0.42857142857142855,0.31967338217338215,0.3761904761904762,0.4806166056166057,0.49251700680272104,0.5277801377642622,0.5954946117864774 +2.090909090909091,14,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.6190476190476191,0.5238095238095238,0.07345848595848595,0.5238095238095238,0.2203754578754579,0.5047619047619047,0.34745115995116,0.4238095238095239,0.5202991452991453,0.5401360544217686,0.5685354415901852,0.6489604480560528 +3.090909090909091,21,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.5238095238095238,0.0811965811965812,0.5079365079365079,0.21978021978021975,0.4666666666666666,0.2909035409035409,0.4238095238095239,0.4548229548229548,0.5306122448979591,0.5468399582764966,0.6278310491545506 +4.090909090909091,28,0.47619047619047616,0.47619047619047616,0.47619047619047616,0.5238095238095238,0.47619047619047616,0.07822039072039072,0.4603174603174603,0.21085164835164832,0.419047619047619,0.27602258852258854,0.3761904761904762,0.42506105006105005,0.48299319727891155,0.49922091065744895,0.6025310247157158 +5.090909090909091,35,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.6190476190476191,0.5238095238095238,0.0811965811965812,0.5079365079365079,0.21978021978021975,0.4666666666666666,0.2909035409035409,0.4428571428571429,0.46672771672771673,0.5374149659863945,0.5598242514045669,0.6478872365910466 diff --git a/eval/Information-Retrieval_evaluation_dim_256_results.csv b/eval/Information-Retrieval_evaluation_dim_256_results.csv index 69b69eb15118598f0cabc9260bdd026daf0f35cb..8593757c08f737b885815625be23888600fd0cad 100644 --- a/eval/Information-Retrieval_evaluation_dim_256_results.csv +++ b/eval/Information-Retrieval_evaluation_dim_256_results.csv @@ -1,6 +1,11 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100 -1.0,13,0.36427656850192064,0.36939820742637647,0.4090909090909091,0.44814340588988477,0.36427656850192064,0.04277277943657814,0.36363636363636365,0.1253991943934714,0.3560819462227913,0.18947388504512186,0.3289372599231754,0.28617844199333936,0.37879397597707415,0.3947006636735062,0.45841270037343096 -2.0,26,0.3418693982074264,0.34763124199743917,0.38092189500640206,0.4238156209987196,0.3418693982074264,0.041410929155814194,0.34165599658557405,0.1212642412965687,0.3322663252240717,0.18173918651563936,0.30441741357234314,0.2705196189189818,0.35596482938032165,0.36923043791328775,0.43770217754051305 -3.0,39,0.38348271446862997,0.3886043533930858,0.4206145966709347,0.4551856594110115,0.38348271446862997,0.04466034705404223,0.382202304737516,0.1301795023253766,0.3718309859154929,0.19500340584044382,0.33886043533930854,0.2892016292717267,0.3957850639188662,0.4087462230477571,0.4709582085612654 -4.0,52,0.3738796414852753,0.37772087067861715,0.40717029449423814,0.44558258642765686,0.3738796414852753,0.043086085492826574,0.37238583013230897,0.125747799799009,0.3615877080665813,0.18837022600852496,0.3297695262483995,0.2792649180924836,0.38572571794402755,0.3980442434686935,0.45855798848430374 -5.0,65,0.3764404609475032,0.382202304737516,0.41037131882202305,0.4532650448143406,0.3764404609475032,0.04345597060609991,0.3758002560819462,0.12739095919345977,0.365044814340589,0.19108724610052447,0.3325864276568502,0.28270389457679657,0.3891762189297396,0.40161260018012396,0.46504361814337597 +1.0909090909090908,7,0.42857142857142855,0.42857142857142855,0.47619047619047616,0.5714285714285714,0.42857142857142855,0.09146825396825396,0.4126984126984127,0.2505952380952381,0.3714285714285714,0.3382936507936508,0.32857142857142857,0.4623015873015873,0.453968253968254,0.4905657437418755,0.5897860301719683 +2.090909090909091,14,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.6666666666666666,0.5238095238095238,0.08040293040293041,0.5079365079365079,0.21739926739926738,0.49523809523809514,0.33455433455433453,0.4476190476190477,0.5302197802197802,0.5480725623582765,0.5876112399040819,0.6534880221374347 +3.090909090909091,21,0.47619047619047616,0.47619047619047616,0.5238095238095238,0.5714285714285714,0.47619047619047616,0.06532356532356533,0.4603174603174603,0.17216117216117216,0.4476190476190475,0.2591575091575092,0.4238095238095239,0.4548229548229548,0.49251700680272104,0.5232746482532176,0.6185831497988614 +4.090909090909091,28,0.6190476190476191,0.6190476190476191,0.6190476190476191,0.7142857142857143,0.6190476190476191,0.08485958485958485,0.6031746031746031,0.23076923076923075,0.5619047619047619,0.30921855921855923,0.5380952380952382,0.5033577533577533,0.6326530612244897,0.6550623466426622,0.7079303137501273 +5.090909090909091,35,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.6190476190476191,0.5238095238095238,0.0811965811965812,0.5079365079365079,0.21978021978021975,0.4666666666666666,0.2909035409035409,0.4428571428571429,0.46672771672771673,0.5374149659863946,0.5598242514045669,0.6375309490216585 +1.0909090909090908,7,0.47619047619047616,0.47619047619047616,0.5238095238095238,0.5714285714285714,0.47619047619047616,0.09543650793650793,0.4603174603174603,0.2625,0.419047619047619,0.35813492063492064,0.3476190476190476,0.47817460317460325,0.4925170068027211,0.5177339964253599,0.6218563539505393 +2.090909090909091,14,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.6190476190476191,0.5238095238095238,0.0813949938949939,0.5079365079365079,0.22037545787545787,0.49523809523809514,0.33951465201465203,0.4238095238095239,0.5202991452991453,0.5401360544217686,0.5708936958722651,0.651530364911684 +3.090909090909091,21,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.5238095238095238,0.0811965811965812,0.5079365079365079,0.21978021978021975,0.4666666666666666,0.2909035409035409,0.4238095238095239,0.4548229548229548,0.5306122448979591,0.5468399582764966,0.6295752260160459 +4.090909090909091,28,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.5238095238095238,0.08005189255189255,0.5079365079365079,0.21634615384615385,0.4666666666666666,0.28518009768009767,0.4238095238095239,0.4433760683760684,0.5306122448979591,0.5468399582764966,0.6411393184007045 +5.090909090909091,35,0.6190476190476191,0.6190476190476191,0.6190476190476191,0.6666666666666666,0.6190476190476191,0.08600427350427349,0.6031746031746031,0.2342032967032967,0.5619047619047619,0.31494200244200243,0.5190476190476192,0.5028998778998779,0.6258503401360545,0.6420780535145918,0.6975707466438095 diff --git a/eval/Information-Retrieval_evaluation_dim_512_results.csv b/eval/Information-Retrieval_evaluation_dim_512_results.csv index a6460efbd002a0a16d6457899d057035b574d1d8..66114df32b9c3f79f74ad838c97ac8d1a65614e2 100644 --- a/eval/Information-Retrieval_evaluation_dim_512_results.csv +++ b/eval/Information-Retrieval_evaluation_dim_512_results.csv @@ -1,6 +1,11 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100 -1.0,13,0.4084507042253521,0.41357234314980795,0.4519846350832266,0.4871959026888604,0.4084507042253521,0.04628035514721453,0.40738369611609043,0.13522898183596205,0.39846350832266325,0.20394662187726964,0.36606914212548014,0.30419701916496145,0.42213127248338495,0.43644305450989757,0.495554245202425 -2.0,26,0.37708066581306016,0.38092189500640206,0.4084507042253521,0.4532650448143406,0.37708066581306016,0.04457309853297402,0.37537345283824153,0.13007205872054822,0.36402048655569774,0.19449520589871025,0.33156209987195906,0.2883968018428671,0.3893670101416575,0.40127522003513455,0.4678017780580059 -3.0,39,0.4065300896286812,0.41229193341869397,0.44494238156209986,0.4814340588988476,0.4065300896286812,0.047372341735862133,0.4054630815194195,0.1383106496582531,0.3947503201024327,0.20757258542171747,0.3596030729833547,0.3071623578952262,0.41950236774180394,0.4329095460408428,0.49555759572196095 -4.0,52,0.39884763124199746,0.4052496798975672,0.44558258642765686,0.47823303457106275,0.39884763124199746,0.04570727180608771,0.3975672215108835,0.13329368345632836,0.3888604353393086,0.2006035854063678,0.35793854033290645,0.2993652309972795,0.41307745462675005,0.42824151108259556,0.4876319198894288 -5.0,65,0.39436619718309857,0.4033290653008963,0.4327784891165173,0.471190781049936,0.39436619718309857,0.046001271870766657,0.39436619718309857,0.13434589294029278,0.38373879641485276,0.2006372923557733,0.348719590268886,0.29407016086576515,0.4078435258012718,0.4209567413561994,0.48375387985213597 +1.0909090909090908,7,0.5714285714285714,0.5714285714285714,0.6190476190476191,0.6190476190476191,0.5714285714285714,0.10123626373626372,0.5555555555555556,0.2798992673992674,0.5142857142857142,0.3871336996336997,0.4238095238095239,0.5202991452991453,0.5809523809523809,0.5999877985353848,0.6968542208140212 +2.090909090909091,14,0.42857142857142855,0.42857142857142855,0.5238095238095238,0.5714285714285714,0.42857142857142855,0.06393467643467643,0.4126984126984127,0.1679945054945055,0.4095238095238095,0.26014957264957267,0.4000000000000001,0.47268009768009767,0.4544217687074829,0.5042148277409831,0.5729749151136284 +3.090909090909091,21,0.47619047619047616,0.47619047619047616,0.5238095238095238,0.5714285714285714,0.47619047619047616,0.06646825396825397,0.4603174603174603,0.17559523809523808,0.4476190476190475,0.2648809523809524,0.4238095238095239,0.46626984126984133,0.49251700680272104,0.5232746482532176,0.5996923579773082 +4.090909090909091,28,0.42857142857142855,0.42857142857142855,0.47619047619047616,0.5714285714285714,0.42857142857142855,0.06234737484737485,0.4126984126984127,0.16323260073260074,0.3999999999999999,0.2442765567765568,0.3952380952380953,0.43696581196581197,0.45170068027210875,0.48863989376224026,0.5842720394586338 +5.090909090909091,35,0.47619047619047616,0.47619047619047616,0.5238095238095238,0.6190476190476191,0.47619047619047616,0.06532356532356533,0.4603174603174603,0.17216117216117216,0.4476190476190475,0.2591575091575092,0.4428571428571429,0.46672771672771673,0.4993197278911565,0.5362589413812879,0.6048904435893294 +1.0909090909090908,7,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.5714285714285714,0.5238095238095238,0.0994047619047619,0.5079365079365079,0.2744047619047619,0.4666666666666666,0.37797619047619047,0.3761904761904762,0.5019841269841271,0.5333333333333333,0.5523687509163372,0.6608463504287858 +2.090909090909091,14,0.47619047619047616,0.47619047619047616,0.5714285714285714,0.6190476190476191,0.47619047619047616,0.07345848595848595,0.4603174603174603,0.19656593406593406,0.45714285714285713,0.3077686202686203,0.4238095238095239,0.5202991452991453,0.5020408163265305,0.5518338753600308,0.6265911712939339 +3.090909090909091,21,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.5238095238095238,0.0811965811965812,0.5079365079365079,0.21978021978021975,0.4666666666666666,0.2909035409035409,0.4238095238095239,0.4548229548229548,0.5306122448979591,0.5468399582764966,0.6298362122328386 +4.090909090909091,28,0.47619047619047616,0.47619047619047616,0.47619047619047616,0.5238095238095238,0.47619047619047616,0.07822039072039072,0.4603174603174603,0.21085164835164832,0.419047619047619,0.27602258852258854,0.3761904761904762,0.42506105006105005,0.48299319727891155,0.49922091065744895,0.5978106306698094 +5.090909090909091,35,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.6190476190476191,0.5238095238095238,0.08218864468864469,0.5079365079365079,0.22275641025641024,0.4666666666666666,0.2958638583638584,0.4428571428571429,0.4766483516483517,0.5374149659863945,0.5598242514045669,0.6492208787775379 diff --git a/eval/Information-Retrieval_evaluation_dim_64_results.csv b/eval/Information-Retrieval_evaluation_dim_64_results.csv index 9dfd1c85cbc61ceff92423e2accc3ced0ab49fcf..6eab7aea196ae8813c8dc3bdbe3e22bea3d4723e 100644 --- a/eval/Information-Retrieval_evaluation_dim_64_results.csv +++ b/eval/Information-Retrieval_evaluation_dim_64_results.csv @@ -1,6 +1,11 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100 -1.0,13,0.24199743918053776,0.24775928297055058,0.26952624839948786,0.29641485275288093,0.24199743918053776,0.029110868794154622,0.24199743918053776,0.08524734139651896,0.23649167733674775,0.12921904680249943,0.21568501920614594,0.19521971714475267,0.25159771761884414,0.2620553065689597,0.31803601064541553 -2.0,26,0.26696542893725994,0.27208706786171577,0.293213828425096,0.33098591549295775,0.26696542893725994,0.031692344067516816,0.26717883055911223,0.09327455064654774,0.2596670934699104,0.14131510912407144,0.237708066581306,0.21473974122936995,0.2776957197731842,0.2881905192612941,0.34719730722747555 -3.0,39,0.3207426376440461,0.322663252240717,0.3405889884763124,0.3725992317541613,0.3207426376440461,0.036318765090758674,0.3188220230473751,0.10562189493083342,0.3088348271446863,0.15867125525021739,0.2797055057618438,0.23840960718091392,0.32884732638253733,0.3371953828075091,0.3933467041096336 -4.0,52,0.3079385403329065,0.31241997439180536,0.33290653008962867,0.36555697823303457,0.3079385403329065,0.03504677697523653,0.3070849338454972,0.10211269541567973,0.29820742637644043,0.15297976910096123,0.27215108834827145,0.23026653514933906,0.31748394406845065,0.3275369456141822,0.38184314430829974 -5.0,65,0.293854033290653,0.30153649167733676,0.32714468629961585,0.3591549295774648,0.293854033290653,0.03212717718240556,0.29449423815621,0.0944529143897263,0.288348271446863,0.14311801944731944,0.2681177976952625,0.221706864386852,0.30537518037517997,0.3185372635267884,0.3744361264744976 +1.0909090909090908,7,0.42857142857142855,0.47619047619047616,0.47619047619047616,0.5238095238095238,0.42857142857142855,0.07658730158730158,0.44444444444444436,0.25357142857142856,0.4095238095238095,0.375,0.30476190476190473,0.44742063492063494,0.4523809523809524,0.47246238779159394,0.5995256189078193 +2.090909090909091,14,0.47619047619047616,0.47619047619047616,0.5714285714285714,0.6666666666666666,0.47619047619047616,0.05758547008547008,0.47619047619047616,0.17275641025641025,0.48571428571428565,0.3157051282051282,0.4476190476190477,0.5401404151404151,0.5099773242630384,0.561687675598723,0.6263427794867359 +3.090909090909091,21,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.6190476190476191,0.5238095238095238,0.0682997557997558,0.5079365079365079,0.18108974358974358,0.49523809523809514,0.27403846153846156,0.47142857142857153,0.4845848595848596,0.5401360544217686,0.5708936958722651,0.6236370795878201 +4.090909090909091,28,0.5714285714285714,0.5714285714285714,0.5714285714285714,0.6190476190476191,0.5714285714285714,0.08417277167277167,0.5555555555555556,0.2287087912087912,0.5142857142857142,0.3057844932844933,0.4523809523809524,0.46871184371184377,0.5782312925170068,0.5814747127674738,0.6503374686019997 +5.090909090909091,35,0.5714285714285714,0.6190476190476191,0.6190476190476191,0.6666666666666666,0.5714285714285714,0.06433150183150182,0.5873015873015872,0.21680402930402928,0.5619047619047619,0.3256257631257632,0.5190476190476192,0.5242673992673993,0.5941043083900227,0.6216324210838928,0.6703100105301816 +1.0909090909090908,7,0.3333333333333333,0.38095238095238093,0.38095238095238093,0.5714285714285714,0.3333333333333333,0.07063492063492063,0.3492063492063492,0.2357142857142857,0.3142857142857143,0.34523809523809523,0.2619047619047619,0.42002442002442003,0.3764172335600907,0.41244392103396355,0.5579595177809107 +2.090909090909091,14,0.42857142857142855,0.42857142857142855,0.47619047619047616,0.6190476190476191,0.42857142857142855,0.053617216117216114,0.42857142857142855,0.16085164835164836,0.42857142857142855,0.27205433455433453,0.3999999999999999,0.5004578754578755,0.45963718820861665,0.51131642091388,0.5888462989137369 +3.090909090909091,21,0.47619047619047616,0.47619047619047616,0.5238095238095238,0.5714285714285714,0.47619047619047616,0.06631562881562882,0.4603174603174603,0.17513736263736263,0.4476190476190475,0.2641178266178266,0.4238095238095238,0.46474358974358976,0.49251700680272104,0.5232746482532176,0.6058793096030948 +4.090909090909091,28,0.47619047619047616,0.47619047619047616,0.47619047619047616,0.5238095238095238,0.47619047619047616,0.07822039072039072,0.4603174603174603,0.21085164835164832,0.419047619047619,0.27602258852258854,0.3761904761904762,0.42506105006105005,0.48299319727891155,0.49922091065744895,0.5960251374266525 +5.090909090909091,35,0.42857142857142855,0.47619047619047616,0.47619047619047616,0.5714285714285714,0.42857142857142855,0.054410866910866905,0.4444444444444445,0.18704212454212454,0.419047619047619,0.27602258852258854,0.3952380952380953,0.43696581196581197,0.45804988662131524,0.4917595713548203,0.5872011588310861 diff --git a/eval/Information-Retrieval_evaluation_dim_768_results.csv b/eval/Information-Retrieval_evaluation_dim_768_results.csv index a316e13194cdfefb11a8dada6872c37ed494e1b8..070df61493757ea8e57d11defd736f1c5cd4a1bf 100644 --- a/eval/Information-Retrieval_evaluation_dim_768_results.csv +++ b/eval/Information-Retrieval_evaluation_dim_768_results.csv @@ -1,6 +1,11 @@ epoch,steps,cosine-Accuracy@1,cosine-Accuracy@3,cosine-Accuracy@5,cosine-Accuracy@10,cosine-Precision@1,cosine-Recall@1,cosine-Precision@3,cosine-Recall@3,cosine-Precision@5,cosine-Recall@5,cosine-Precision@10,cosine-Recall@10,cosine-MRR@10,cosine-NDCG@10,cosine-MAP@100 -1.0,13,0.4193341869398207,0.4238156209987196,0.4551856594110115,0.4910371318822023,0.4193341869398207,0.04699592881851732,0.4180537772087068,0.13721565163348118,0.4076824583866837,0.20710427114944024,0.37272727272727274,0.3107289399017082,0.4315339003719285,0.44456410503401034,0.5037778776293926 -2.0,26,0.37451984635083224,0.37900128040973113,0.40973111395646605,0.4539052496798976,0.37451984635083224,0.04413921317349132,0.3734528382415706,0.12896663310051748,0.3632522407170295,0.19376203165061448,0.3318181818181818,0.28955317519546525,0.38746671950897227,0.4004027303245026,0.4672061011440217 -3.0,39,0.41037131882202305,0.4142125480153649,0.44430217669654287,0.4833546734955186,0.41037131882202305,0.047186779325657804,0.4086641058472044,0.1375709294981283,0.3969270166453265,0.20603724780478358,0.36165172855313704,0.30540044861465576,0.4224889742495374,0.4349862241563486,0.500478306774528 -4.0,52,0.3994878361075544,0.4046094750320102,0.4359795134443022,0.4737516005121639,0.3994878361075544,0.046132270416131195,0.39820742637644047,0.13453018491697852,0.3868117797695263,0.20078864250336506,0.352752880921895,0.29681183299989666,0.41201019246793846,0.4244383191680289,0.48680507317454147 -5.0,65,0.3892445582586428,0.3994878361075544,0.43661971830985913,0.47823303457106275,0.3892445582586428,0.045140545184589424,0.38988476312419973,0.13218231618768836,0.38156209987195905,0.19882192649305613,0.35147247119078107,0.29727932103829957,0.4050812450460335,0.4211330537467249,0.48796722771226453 +1.0909090909090908,7,0.5714285714285714,0.5714285714285714,0.6190476190476191,0.6666666666666666,0.5714285714285714,0.10123626373626372,0.5555555555555556,0.2798992673992674,0.5142857142857142,0.3871336996336997,0.4476190476190477,0.5401404151404151,0.5888888888888888,0.6167053425672016,0.6974271873247901 +2.090909090909091,14,0.47619047619047616,0.47619047619047616,0.5238095238095238,0.6190476190476191,0.47619047619047616,0.0798076923076923,0.4603174603174603,0.21561355311355312,0.42857142857142855,0.2918956043956044,0.4238095238095239,0.4925213675213675,0.5004535147392289,0.544497681796079,0.625923032327447 +3.090909090909091,21,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.5238095238095238,0.08234126984126984,0.5079365079365079,0.22321428571428573,0.4666666666666666,0.29662698412698413,0.4238095238095239,0.46626984126984133,0.5306122448979591,0.5468399582764966,0.6356437726980133 +4.090909090909091,28,0.42857142857142855,0.42857142857142855,0.47619047619047616,0.5714285714285714,0.42857142857142855,0.06234737484737485,0.4126984126984127,0.16323260073260074,0.3999999999999999,0.2442765567765568,0.3952380952380953,0.43696581196581197,0.45170068027210875,0.48863989376224026,0.585003189736897 +5.090909090909091,35,0.47619047619047616,0.47619047619047616,0.47619047619047616,0.5714285714285714,0.47619047619047616,0.07822039072039072,0.4603174603174603,0.21085164835164832,0.419047619047619,0.27602258852258854,0.3952380952380953,0.43696581196581197,0.48979591836734687,0.5122052037855193,0.6082028226573437 +1.0909090909090908,7,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.6666666666666666,0.5238095238095238,0.09726800976800977,0.5079365079365079,0.2679945054945055,0.4666666666666666,0.36729242979242976,0.41904761904761906,0.5163308913308913,0.5480725623582765,0.5820705880762242,0.6727109022414055 +2.090909090909091,14,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.6666666666666666,0.5238095238095238,0.08933150183150182,0.5079365079365079,0.24418498168498168,0.47619047619047616,0.33951465201465203,0.4476190476190477,0.5401404151404151,0.5480725623582765,0.5921167294151266,0.67423207909377 +3.090909090909091,21,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.6190476190476191,0.5238095238095238,0.0811965811965812,0.5079365079365079,0.21978021978021975,0.4666666666666666,0.2909035409035409,0.4476190476190477,0.47466422466422464,0.538548752834467,0.5635575023083134,0.6486454426450273 +4.090909090909091,28,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.5714285714285714,0.5238095238095238,0.08218864468864469,0.5079365079365079,0.22275641025641024,0.4666666666666666,0.2958638583638584,0.4238095238095238,0.46474358974358976,0.5306122448979591,0.5468399582764966,0.6351788392177582 +5.090909090909091,35,0.5238095238095238,0.5238095238095238,0.5238095238095238,0.6190476190476191,0.5238095238095238,0.08218864468864469,0.5079365079365079,0.22275641025641024,0.4666666666666666,0.2958638583638584,0.4428571428571429,0.4766483516483517,0.5374149659863945,0.5598242514045669,0.653075337994289 diff --git a/model.safetensors b/model.safetensors index b2a7b319d88ae5318dafd690b9c88b24058f0615..1722446b91f3a5104288b64b2b62960773485579 100644 --- a/model.safetensors +++ b/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3c726e0ea295d8e17a907aec896c93461dc49634c2088e18d04e5586a8586c39 +oid sha256:46e99ac0a0cb9fa54e6b9a6f77368aacd6fac9b3751511f22a5ca5c9fb7c5204 size 2239607176 diff --git a/tokenizer_config.json b/tokenizer_config.json index 547791bdbf6fb6366b21c8c8de9245052103244f..8ff4390b993a1a04254ff2e21bc6bee199cd6e32 100644 --- a/tokenizer_config.json +++ b/tokenizer_config.json @@ -47,16 +47,9 @@ "eos_token": "", "extra_special_tokens": {}, "mask_token": "", - "max_length": 512, "model_max_length": 512, - "pad_to_multiple_of": null, "pad_token": "", - "pad_token_type_id": 0, - "padding_side": "right", "sep_token": "", - "stride": 0, "tokenizer_class": "XLMRobertaTokenizer", - "truncation_side": "right", - "truncation_strategy": "longest_first", "unk_token": "" } diff --git a/training_args.bin b/training_args.bin index adf9240157358311170ddc099d9cfc69134afff3..8da3332899cbb380f0ea340bef3e2069d6cf1db2 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5cceeb921bff1b3eaa3a7113b28b14620d5699e95bb7b35174f20a7a1c3b15f4 +oid sha256:14debc6c3f8c5edee5db8d97a3a78a007d313a13e4b96f43026da543b59bef8c size 6097