cmboulanger commited on
Commit
ea4c292
·
1 Parent(s): 1f74af3

Revert split-author entries in gold and adapt global instructions

Browse files
docs/tei-element-descriptions.md CHANGED
@@ -34,15 +34,17 @@ act at all.
34
 
35
  ---
36
 
37
- ### 2. State multiplicity explicitly
38
 
39
- Without an explicit instruction, models default to merging all instances into
40
- one span.
41
 
42
  **Bad:** "Name(s) of the author(s) of the cited work."
43
 
44
- **Good:** "Emit a **separate** `author` span for each distinct author — never
45
- merge multiple authors into a single span."
 
 
46
 
47
  This applies to any element that can appear more than once: `author`, `editor`,
48
  `biblScope` (volume vs. issue), `date` (publication year vs. original year), etc.
 
34
 
35
  ---
36
 
37
+ ### 2. State multiplicity and grouping rules explicitly
38
 
39
+ Without explicit instructions, models may make wrong choices about how many
40
+ spans to emit.
41
 
42
  **Bad:** "Name(s) of the author(s) of the cited work."
43
 
44
+ **Good:** "All name parts (`surname`, `forename`, `orgName`) for one or more
45
+ contiguous authors may be placed inside a single `author` span. Emit a separate
46
+ `author` span only when authors are separated by non-name text (e.g. a title or
47
+ date between them)."
48
 
49
  This applies to any element that can appear more than once: `author`, `editor`,
50
  `biblScope` (volume vs. issue), `date` (publication year vs. original year), etc.
scripts/evaluate_llm.py CHANGED
@@ -145,8 +145,9 @@ def _build_schema():
145
  "'author' or 'editor' span.",
146
  "When an organisation acts as author or editor, emit BOTH an 'orgName' span AND an "
147
  "enclosing 'author' (or 'editor') span covering the same text.",
148
- "Emit a separate 'author' span for each distinct author never merge multiple "
149
- "authors into a single span.",
 
150
  "In a bibliography, a dash or underscore may stand for a repeated author or editor "
151
  "name — tag it as 'author' or 'editor' accordingly.",
152
  ],
 
145
  "'author' or 'editor' span.",
146
  "When an organisation acts as author or editor, emit BOTH an 'orgName' span AND an "
147
  "enclosing 'author' (or 'editor') span covering the same text.",
148
+ "All name parts (surname, forename, orgName) for one or more contiguous authors "
149
+ "may be placed inside a single 'author' (or 'editor') span. Emit separate spans "
150
+ "only when authors are separated by non-name text (e.g. a title or date).",
151
  "In a bibliography, a dash or underscore may stand for a repeated author or editor "
152
  "name — tag it as 'author' or 'editor' accordingly.",
153
  ],
tests/fixtures/blbl-examples.tei.xml CHANGED
@@ -22,11 +22,11 @@
22
  <bibl><author><surname>Bugnon</surname> (<forename>A.-L.</forename>)</author>, <title level="a">Le mobilier céramique</title>, in <editor><surname>Méloche</surname></editor> <date>2012</date>, p. <biblScope unit="page">182-196</biblScope>.</bibl>
23
  <bibl><author><orgName>Commission Inter-IREM Collège</orgName></author> &amp; <author><orgName>Commission Inter-IREM Statistiques et Probabilités</orgName></author>, (<date>2012</date>). <title level="a">Probabilités au collège : ne pas laisser l’enseignement des probabilités au hasard…</title>. Dans <title level="j">Brochure APMEP</title> n°<biblScope unit="volume">198</biblScope>.</bibl>
24
  <bibl><author><surname>BARIL</surname>, <forename>Jean</forename></author> (<date>2013</date>). <title level="a">Droit d’accès à l’information environnementale</title>. <publisher>Éditions Yvon Blais</publisher>, <pubPlace>Cowansville, Québec</pubPlace>, <biblScope unit="page">561 p</biblScope>.</bibl>
25
- <bibl><author><surname>Russell</surname>, <forename>D.A.</forename></author> and <author><forename>Michael</forename> <surname>Winterbottom</surname></author> <date>1989</date> [<date>1972</date>]. <title level="m">Classical Literary Criticism. Oxford World Classics</title>. <pubPlace>Oxford</pubPlace>: <publisher>Oxford UP</publisher>.</bibl>
26
  <bibl><author><surname>Doyle</surname> <forename>JJ.</forename></author> <date>1998</date>. <title level="a">Phylogenetic perspectives on nodulation: evolving views of plants and symbiotic bacteria</title>. &lt;italic&gt;<title level="j">Trends in Plant Science</title>&lt;/italic&gt; <biblScope unit="volume">3</biblScope>: <biblScope unit="page">473-478</biblScope>.</bibl>
27
  <bibl><author><surname>Doyle</surname> <forename>JJ</forename></author>. <date>2011</date>. <title level="a">Phylogenetic perspectives on the origins of nodulation</title>. &lt;italic&gt;<title level="j">Molecular Plant-Microbe Interaction</title>&lt;/italic&gt; <biblScope unit="volume">24</biblScope>: <biblScope unit="page">1289-1295</biblScope>.</bibl>
28
- <bibl><label>17.</label><author><surname>Creed</surname> <forename>PA</forename></author>, <author><surname>Hicks</surname> <forename>RE</forename></author>, <author><surname>Machin</surname> <forename>MA</forename></author>. <title level="a">Behavioural plasticity and mental health outcomes for long-term unemployed attending occupational training programmes</title>. <title level="j">J Occup Org Psychol</title>. <date>1998</date>;<biblScope unit="volume">71</biblScope>: <biblScope unit="page">171-91</biblScope>.</bibl>
29
- <bibl><label>25.</label> <author><surname>Spickett-Jones</surname>, <forename>J. G.</forename></author> &amp; <author><forename>T.-Y.</forename> <surname>Eng</surname></author> (<date>2006</date>). “<title level="a">SMEs and the Strategic Context for Communication</title>”’, <title level="j">Journal of Marketing Communications</title>, Vol. <biblScope unit="volume">12</biblScope>(<biblScope unit="issue">3</biblScope>), <biblScope unit="page">225 - 243</biblScope>.</bibl>
30
  <bibl><author><surname>Lillié</surname>, <forename>F.</forename></author>, <title level="m">Analyse tectonique de Gisement Claude</title> (<pubPlace>Cluff Lake, Saskatchewan</pubPlace>). <note type="report">Amok Internal Report</note>. <date>1982</date>.</bibl>
31
  <bibl><author><surname>Netto</surname>, <forename>S.</forename></author> (<date>2010</date>). <title level="a">Représentation de l&apos;informatique à l&apos;école chez de futurs enseignants et des enseignants en poste</title>. <title level="j">Les dossiers des sciences de l&apos;Education</title>(<biblScope unit="volume">23</biblScope>), pp. <biblScope unit="page">61-76</biblScope>.</bibl>
32
  <bibl><author><surname>Jakobson</surname>, <forename>Roman</forename></author> <date>1960</date>. "<title level="a">Closing Statement: Linguistics and Poetics</title>.” In <editor><forename>Thomas A.</forename> <surname>Sebeok</surname></editor>, ed. <title level="m">Style in Language</title>. <pubPlace>Cambridge, Mass</pubPlace>: <publisher>The MIT Press</publisher>. <biblScope unit="page">350-77</biblScope></bibl>
 
22
  <bibl><author><surname>Bugnon</surname> (<forename>A.-L.</forename>)</author>, <title level="a">Le mobilier céramique</title>, in <editor><surname>Méloche</surname></editor> <date>2012</date>, p. <biblScope unit="page">182-196</biblScope>.</bibl>
23
  <bibl><author><orgName>Commission Inter-IREM Collège</orgName></author> &amp; <author><orgName>Commission Inter-IREM Statistiques et Probabilités</orgName></author>, (<date>2012</date>). <title level="a">Probabilités au collège : ne pas laisser l’enseignement des probabilités au hasard…</title>. Dans <title level="j">Brochure APMEP</title> n°<biblScope unit="volume">198</biblScope>.</bibl>
24
  <bibl><author><surname>BARIL</surname>, <forename>Jean</forename></author> (<date>2013</date>). <title level="a">Droit d’accès à l’information environnementale</title>. <publisher>Éditions Yvon Blais</publisher>, <pubPlace>Cowansville, Québec</pubPlace>, <biblScope unit="page">561 p</biblScope>.</bibl>
25
+ <bibl><author><surname>Russell</surname>, <forename>D.A.</forename> and <forename>Michael</forename> <surname>Winterbottom</surname></author> <date>1989</date> [<date>1972</date>]. <title level="m">Classical Literary Criticism. Oxford World Classics</title>. <pubPlace>Oxford</pubPlace>: <publisher>Oxford UP</publisher>.</bibl>
26
  <bibl><author><surname>Doyle</surname> <forename>JJ.</forename></author> <date>1998</date>. <title level="a">Phylogenetic perspectives on nodulation: evolving views of plants and symbiotic bacteria</title>. &lt;italic&gt;<title level="j">Trends in Plant Science</title>&lt;/italic&gt; <biblScope unit="volume">3</biblScope>: <biblScope unit="page">473-478</biblScope>.</bibl>
27
  <bibl><author><surname>Doyle</surname> <forename>JJ</forename></author>. <date>2011</date>. <title level="a">Phylogenetic perspectives on the origins of nodulation</title>. &lt;italic&gt;<title level="j">Molecular Plant-Microbe Interaction</title>&lt;/italic&gt; <biblScope unit="volume">24</biblScope>: <biblScope unit="page">1289-1295</biblScope>.</bibl>
28
+ <bibl><label>17.</label><author><surname>Creed</surname> <forename>PA</forename>, <surname>Hicks</surname> <forename>RE</forename>, <surname>Machin</surname> <forename>MA</forename></author>. <title level="a">Behavioural plasticity and mental health outcomes for long-term unemployed attending occupational training programmes</title>. <title level="j">J Occup Org Psychol</title>. <date>1998</date>;<biblScope unit="volume">71</biblScope>: <biblScope unit="page">171-91</biblScope>.</bibl>
29
+ <bibl><label>25.</label> <author><surname>Spickett-Jones</surname>, <forename>J. G.</forename> &amp; <forename>T.-Y.</forename> <surname>Eng</surname></author> (<date>2006</date>). “<title level="a">SMEs and the Strategic Context for Communication</title>”’, <title level="j">Journal of Marketing Communications</title>, Vol. <biblScope unit="volume">12</biblScope>(<biblScope unit="issue">3</biblScope>), <biblScope unit="page">225 - 243</biblScope>.</bibl>
30
  <bibl><author><surname>Lillié</surname>, <forename>F.</forename></author>, <title level="m">Analyse tectonique de Gisement Claude</title> (<pubPlace>Cluff Lake, Saskatchewan</pubPlace>). <note type="report">Amok Internal Report</note>. <date>1982</date>.</bibl>
31
  <bibl><author><surname>Netto</surname>, <forename>S.</forename></author> (<date>2010</date>). <title level="a">Représentation de l&apos;informatique à l&apos;école chez de futurs enseignants et des enseignants en poste</title>. <title level="j">Les dossiers des sciences de l&apos;Education</title>(<biblScope unit="volume">23</biblScope>), pp. <biblScope unit="page">61-76</biblScope>.</bibl>
32
  <bibl><author><surname>Jakobson</surname>, <forename>Roman</forename></author> <date>1960</date>. "<title level="a">Closing Statement: Linguistics and Poetics</title>.” In <editor><forename>Thomas A.</forename> <surname>Sebeok</surname></editor>, ed. <title level="m">Style in Language</title>. <pubPlace>Cambridge, Mass</pubPlace>: <publisher>The MIT Press</publisher>. <biblScope unit="page">350-77</biblScope></bibl>