Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/COPYING +35 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/allowables.scm +102 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/cmu2ft +21 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/cmu_lts_rules.scm +0 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/cmudict-0.4.diff +0 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/cmudict-0.4.out +0 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/cmudict-0.4.scm +0 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/cmudict_compile.scm +41 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/wsj.wp39.poslexR +0 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/etc/email_filter +47 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/mbrola.scm +103 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/mettree.scm +88 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/module_description.scm +117 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/mrpa_allophones.scm +111 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/mrpa_durs.scm +136 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/mrpa_phones.scm +114 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/multisyn/multisyn.scm +195 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/multisyn/multisyn_pauses.scm +102 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/multisyn/radio_phones_multisyn.scm +136 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/multisyn/send_xwaves.scm +318 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/multisyn/target_cost.scm +410 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/ogimarkup-mode.scm +191 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/pauses.scm +242 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/phoneset.scm +134 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/phrase.scm +171 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/pos.scm +229 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/postlex.scm +587 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/radio_phones.scm +122 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/sable-latin.ent +171 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/sable-mode.scm +560 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/scfg.scm +62 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/scfg_wsj_wp20.gram +523 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/sec.B.hept.ngrambin +0 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/sec.ts20.quad.ngrambin +0 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/singing-mode.scm +673 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/siod.scm +638 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/siteinit.scm +65 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/soleml-mode.scm +336 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/speech.properties +2 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/synthesis.scm +443 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/tilt.scm +972 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/tobi.scm +338 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/tobi_rules.scm +1002 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/token.scm +815 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/tokenpos.scm +286 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/tts.scm +304 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/unilex_phones.scm +189 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/voices.scm +361 -0
- pretrained_models/CosyVoice-ttsfrd/resource/festival/web.scm +103 -0
- pretrained_models/CosyVoice-ttsfrd/resource/jprsc/COPYING +100 -0
pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/COPYING
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
This directory contains an American English Lexicon and letter to
|
| 3 |
+
sournd rules based on CMUDICT 0.4. This distribution falls under
|
| 4 |
+
the following copyright. CMUDICT falls under a simialr free licence
|
| 5 |
+
that has no commercial restrictions.
|
| 6 |
+
|
| 7 |
+
Centre for Speech Technology Research
|
| 8 |
+
University of Edinburgh, UK
|
| 9 |
+
Copyright (c) 1996,1997
|
| 10 |
+
All Rights Reserved.
|
| 11 |
+
|
| 12 |
+
Permission is hereby granted, free of charge, to use and distribute
|
| 13 |
+
this software and its documentation without restriction, including
|
| 14 |
+
without limitation the rights to use, copy, modify, merge, publish,
|
| 15 |
+
distribute, sublicense, and/or sell copies of this work, and to
|
| 16 |
+
permit persons to whom this work is furnished to do so, subject to
|
| 17 |
+
the following conditions:
|
| 18 |
+
1. The code must retain the above copyright notice, this list of
|
| 19 |
+
conditions and the following disclaimer.
|
| 20 |
+
2. Any modifications must be clearly marked as such.
|
| 21 |
+
3. Original authors' names are not deleted.
|
| 22 |
+
4. The authors' names are not used to endorse or promote products
|
| 23 |
+
derived from this software without specific prior written
|
| 24 |
+
permission.
|
| 25 |
+
|
| 26 |
+
THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK
|
| 27 |
+
DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
|
| 28 |
+
ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT
|
| 29 |
+
SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE
|
| 30 |
+
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
| 31 |
+
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
|
| 32 |
+
AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
| 33 |
+
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
| 34 |
+
THIS SOFTWARE.
|
| 35 |
+
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/allowables.scm
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
(require 'lts_build)
|
| 2 |
+
(set! allowables
|
| 3 |
+
'((a _epsilon_ aa aa1 aa0
|
| 4 |
+
ax ax1 ax0
|
| 5 |
+
eh eh1 eh0
|
| 6 |
+
ah ah1 ah0
|
| 7 |
+
ae ae1 ae0
|
| 8 |
+
ey ey1 ey0
|
| 9 |
+
ay ay1 ay0
|
| 10 |
+
er er1 er0
|
| 11 |
+
y-ax0 y-ah1 y-ah0
|
| 12 |
+
aw aw1 aw0
|
| 13 |
+
ao ao1 ao0
|
| 14 |
+
ih ih1 ih0
|
| 15 |
+
w-ax0 w-ah1 w-ah0
|
| 16 |
+
ow ow1 ow0
|
| 17 |
+
w-ey w-ey1 ey0
|
| 18 |
+
iy iy1 iy0)
|
| 19 |
+
(b _epsilon_ b p )
|
| 20 |
+
(c _epsilon_ k ch s sh t-s )
|
| 21 |
+
(d _epsilon_ d t jh)
|
| 22 |
+
(e _epsilon_ ih ih1 ih0
|
| 23 |
+
ax ax1 ax0
|
| 24 |
+
iy iy1 iy0
|
| 25 |
+
er er1 er0
|
| 26 |
+
ax ah1 ah0
|
| 27 |
+
eh eh1 eh0
|
| 28 |
+
ey ey1 ey0
|
| 29 |
+
uw uw1 uw0
|
| 30 |
+
ay ay1 ay0
|
| 31 |
+
ow ow1 ow0
|
| 32 |
+
y-uw y-uw1 y-uw0
|
| 33 |
+
oy oy1 oy0
|
| 34 |
+
aa aa1 aa0)
|
| 35 |
+
(f _epsilon_ f )
|
| 36 |
+
(g _epsilon_ g jh zh k f)
|
| 37 |
+
(h _epsilon_ hh )
|
| 38 |
+
(i _epsilon_ iy iy1 iy0
|
| 39 |
+
ax ax1 ax0
|
| 40 |
+
ih ih1 ih0
|
| 41 |
+
ah ah1 ah0
|
| 42 |
+
ax ah1 ah0
|
| 43 |
+
ay ay1 ay0
|
| 44 |
+
y
|
| 45 |
+
aa aa1 aa0
|
| 46 |
+
ae ae1 ae0
|
| 47 |
+
w-ax0 w-ah1 w-ah0
|
| 48 |
+
eh eh1 eh0
|
| 49 |
+
er er0 er1 )
|
| 50 |
+
(j _epsilon_ jh y hh zh)
|
| 51 |
+
(k _epsilon_ k )
|
| 52 |
+
(l _epsilon_ l ax-l y ax0-l)
|
| 53 |
+
(m _epsilon_ m ax-m m-ax0 ax0-m m-ax0
|
| 54 |
+
m-ae m-ae1 m-ae0
|
| 55 |
+
m-ih m-ih0 )
|
| 56 |
+
(n _epsilon_ n ng n-y)
|
| 57 |
+
(o _epsilon_ ax ax0 ah1 ah0
|
| 58 |
+
ao ao1 ao0
|
| 59 |
+
ow ow1 ow0
|
| 60 |
+
uw uw1 uw0
|
| 61 |
+
er er1 er0
|
| 62 |
+
aa aa1 aa0
|
| 63 |
+
aw aw1 aw0
|
| 64 |
+
oy oy1 oy0
|
| 65 |
+
uh uh1 uh0
|
| 66 |
+
w
|
| 67 |
+
w-ax0 w-ah1 w-ah0
|
| 68 |
+
aa aa1 aa0
|
| 69 |
+
ih ih1 ih0
|
| 70 |
+
ae ae1 ae0)
|
| 71 |
+
(p _epsilon_ p f)
|
| 72 |
+
(q _epsilon_ k )
|
| 73 |
+
(r _epsilon_ r er1 er er0 )
|
| 74 |
+
(s _epsilon_ s sh z zh ch)
|
| 75 |
+
(t _epsilon_ t th sh ch dh d s zh)
|
| 76 |
+
(u _epsilon_
|
| 77 |
+
ax ax0
|
| 78 |
+
ah ah1 ah0
|
| 79 |
+
uw uw1 uw0
|
| 80 |
+
er er1 er0
|
| 81 |
+
uh uh1 uh0
|
| 82 |
+
y-uw y-uw1 y-uw0
|
| 83 |
+
ax-w ah1-w ah0-w
|
| 84 |
+
y-er y-er1 y-er0
|
| 85 |
+
y-ax y-ax0 y-ah1 y-ah0
|
| 86 |
+
w
|
| 87 |
+
ih ih1 ih0
|
| 88 |
+
ao ao1 ao0
|
| 89 |
+
eh eh1 eh0
|
| 90 |
+
y-uh y-uh1 y-uh0 )
|
| 91 |
+
(v _epsilon_ v f)
|
| 92 |
+
(w _epsilon_ w v f)
|
| 93 |
+
(x _epsilon_ k-s g-z ng-z k-sh z g-zh zh)
|
| 94 |
+
(y _epsilon_
|
| 95 |
+
iy iy1 iy0
|
| 96 |
+
ih ih1 ih0
|
| 97 |
+
ay ay1 ay0
|
| 98 |
+
y
|
| 99 |
+
ax ax0 ah1 ah0)
|
| 100 |
+
(z _epsilon_ z t-s zh s)
|
| 101 |
+
(# #)))
|
| 102 |
+
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/cmu2ft
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
+
#
|
| 3 |
+
# Converts cmulexicon to Festival format
|
| 4 |
+
#
|
| 5 |
+
# usage: cmu2ft cmudict.0.1 cmu_lex.scm
|
| 6 |
+
|
| 7 |
+
#sed 's/er0/er0 r/' | sed 's/er1/er1 r/' | sed 's/er2/er2 r/' |
|
| 8 |
+
|
| 9 |
+
echo >$2
|
| 10 |
+
echo ";; CMUDICT-0.4 Converted to Festival lexicon format" >>$2
|
| 11 |
+
cat $1 | tr "[A-Z]" "[a-z]" | sed 's/ah0/ax/g' |
|
| 12 |
+
tr 2 1 |
|
| 13 |
+
awk '{if ($1 == "##")
|
| 14 |
+
printf(";; %s\n",$0);
|
| 15 |
+
else if ($1 ~ /^[a-z][a-z]*$/)
|
| 16 |
+
{ printf("(")
|
| 17 |
+
printf("\"%s\" nil (%s",$1,$2)
|
| 18 |
+
for (i=3; i <= NF; i++)
|
| 19 |
+
printf " %s",$i
|
| 20 |
+
printf "))\n"
|
| 21 |
+
}} ' >> $2
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/cmu_lts_rules.scm
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/cmudict-0.4.diff
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/cmudict-0.4.out
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/cmudict-0.4.scm
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/cmu/cmudict_compile.scm
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 2008 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Compile the lexicon
|
| 35 |
+
;;;
|
| 36 |
+
|
| 37 |
+
(load "cmulex.scm")
|
| 38 |
+
(lex.compile "all.scm" "cmudict-0.4.out")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/dicts/wsj.wp39.poslexR
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/etc/email_filter
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
+
###########################################################################
|
| 3 |
+
## ##
|
| 4 |
+
## Centre for Speech Technology Research ##
|
| 5 |
+
## University of Edinburgh, UK ##
|
| 6 |
+
## Copyright (c) 1996,1997 ##
|
| 7 |
+
## All Rights Reserved. ##
|
| 8 |
+
## ##
|
| 9 |
+
## Permission is hereby granted, free of charge, to use and distribute ##
|
| 10 |
+
## this software and its documentation without restriction, including ##
|
| 11 |
+
## without limitation the rights to use, copy, modify, merge, publish, ##
|
| 12 |
+
## distribute, sublicense, and/or sell copies of this work, and to ##
|
| 13 |
+
## permit persons to whom this work is furnished to do so, subject to ##
|
| 14 |
+
## the following conditions: ##
|
| 15 |
+
## 1. The code must retain the above copyright notice, this list of ##
|
| 16 |
+
## conditions and the following disclaimer. ##
|
| 17 |
+
## 2. Any modifications must be clearly marked as such. ##
|
| 18 |
+
## 3. Original authors' names are not deleted. ##
|
| 19 |
+
## 4. The authors' names are not used to endorse or promote products ##
|
| 20 |
+
## derived from this software without specific prior written ##
|
| 21 |
+
## permission. ##
|
| 22 |
+
## ##
|
| 23 |
+
## THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ##
|
| 24 |
+
## DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ##
|
| 25 |
+
## ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ##
|
| 26 |
+
## SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ##
|
| 27 |
+
## FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ##
|
| 28 |
+
## WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ##
|
| 29 |
+
## AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ##
|
| 30 |
+
## ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ##
|
| 31 |
+
## THIS SOFTWARE. ##
|
| 32 |
+
## ##
|
| 33 |
+
###########################################################################
|
| 34 |
+
## ##
|
| 35 |
+
## Email filter for tts text mode ##
|
| 36 |
+
## usage: email_filter email_message >filtered_message ##
|
| 37 |
+
## ##
|
| 38 |
+
## Extracts the From and Subject lines form the head and the body of ##
|
| 39 |
+
## of the message, I suppose it could also do signature extraction ##
|
| 40 |
+
## ##
|
| 41 |
+
###########################################################################
|
| 42 |
+
grep "^From: " $1
|
| 43 |
+
echo
|
| 44 |
+
grep "^Subject: " $1
|
| 45 |
+
echo
|
| 46 |
+
# delete up to first blank line (i.e. the header)
|
| 47 |
+
sed '1,/^$/ d' $1
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/mbrola.scm
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Support for MBROLA as an external module.
|
| 35 |
+
;;;
|
| 36 |
+
|
| 37 |
+
;;; You might want to set this in your sitevars.scm
|
| 38 |
+
(defvar mbrola_progname "/cstr/external/mbrola/mbrola"
|
| 39 |
+
"mbrola_progname
|
| 40 |
+
The program name for mbrola.")
|
| 41 |
+
(defvar mbrola_database "fr1"
|
| 42 |
+
"mbrola_database
|
| 43 |
+
The name of the MBROLA database to usde during MBROLA Synthesis.")
|
| 44 |
+
|
| 45 |
+
(define (MBROLA_Synth utt)
|
| 46 |
+
"(MBROLA_Synth UTT)
|
| 47 |
+
Synthesize using MBROLA as external module. Basically dump the info
|
| 48 |
+
from this utterance. Call MBROLA and reload the waveform into utt.
|
| 49 |
+
[see MBROLA]"
|
| 50 |
+
(let ((filename (make_tmp_filename))
|
| 51 |
+
)
|
| 52 |
+
(save_segments_mbrola utt filename)
|
| 53 |
+
(system (string-append mbrola_progname " "
|
| 54 |
+
mbrola_database " "
|
| 55 |
+
filename " "
|
| 56 |
+
filename ".au"))
|
| 57 |
+
(utt.import.wave utt (string-append filename ".au"))
|
| 58 |
+
(apply_hooks after_synth_hooks utt)
|
| 59 |
+
(delete-file filename)
|
| 60 |
+
(delete-file (string-append filename ".au"))
|
| 61 |
+
utt))
|
| 62 |
+
|
| 63 |
+
(define (save_segments_mbrola utt filename)
|
| 64 |
+
"(save_segments_mbrola UTT FILENAME)
|
| 65 |
+
Save segment information in MBROLA format in filename. The format is
|
| 66 |
+
phone duration (ms) [% position F0 target]*. [see MBROLA]"
|
| 67 |
+
(let ((fd (fopen filename "w")))
|
| 68 |
+
(mapcar
|
| 69 |
+
(lambda (segment)
|
| 70 |
+
(save_seg_mbrola_entry
|
| 71 |
+
(item.feat segment 'name)
|
| 72 |
+
(item.feat segment 'segment_start)
|
| 73 |
+
(item.feat segment 'segment_duration)
|
| 74 |
+
(mapcar
|
| 75 |
+
(lambda (targ_item)
|
| 76 |
+
(list
|
| 77 |
+
(item.feat targ_item "pos")
|
| 78 |
+
(item.feat targ_item "f0")))
|
| 79 |
+
(item.relation.daughters segment 'Target)) ;; list of targets
|
| 80 |
+
fd))
|
| 81 |
+
(utt.relation.items utt 'Segment))
|
| 82 |
+
(fclose fd)))
|
| 83 |
+
|
| 84 |
+
(define (save_seg_mbrola_entry name start dur targs fd)
|
| 85 |
+
"(save_seg_mbrola_entry ENTRY NAME START DUR TARGS FD)
|
| 86 |
+
Entry contains, (name duration num_targs start 1st_targ_pos 1st_targ_val)."
|
| 87 |
+
(format fd "%s %d " name (nint (* dur 1000)))
|
| 88 |
+
(if targs ;; if there are any targets
|
| 89 |
+
(mapcar
|
| 90 |
+
(lambda (targ) ;; targ_pos and targ_val
|
| 91 |
+
(let ((targ_pos (car targ))
|
| 92 |
+
(targ_val (car (cdr targ))))
|
| 93 |
+
|
| 94 |
+
(format fd "%d %d "
|
| 95 |
+
(nint (* 100 (/ (- targ_pos start) dur))) ;; % pos of target
|
| 96 |
+
(nint (parse-number targ_val))) ;; target value
|
| 97 |
+
))
|
| 98 |
+
targs))
|
| 99 |
+
(terpri fd)
|
| 100 |
+
(terpri fd)
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
(provide 'mbrola)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/mettree.scm
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1998 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Some (experimental) data for investigating metrical trees
|
| 35 |
+
;;;
|
| 36 |
+
|
| 37 |
+
;;; Set up generation of metrical tree, this includes getting
|
| 38 |
+
;;; a syntactic parse
|
| 39 |
+
;;;
|
| 40 |
+
;;; Use as
|
| 41 |
+
;;; (set! utt1 (metsynth (Utterance Text "For afternoon tea")))
|
| 42 |
+
;;; (utt.relation_tree utt1 'MetricalTree)
|
| 43 |
+
|
| 44 |
+
(require 'scfg)
|
| 45 |
+
(set! scfg_grammar (load (path-append libdir "scfg_wsj_wp20.gram") t))
|
| 46 |
+
|
| 47 |
+
(define (mettext utt)
|
| 48 |
+
(Initialize utt)
|
| 49 |
+
(Text utt)
|
| 50 |
+
(Token_POS utt)
|
| 51 |
+
(Token utt)
|
| 52 |
+
(POS utt)
|
| 53 |
+
(print "here1")
|
| 54 |
+
(Phrasify utt)
|
| 55 |
+
(print "here2")
|
| 56 |
+
(ProbParse utt)
|
| 57 |
+
(print "here3")
|
| 58 |
+
(auto_metrical_tree utt)
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
(define (metsynth utt)
|
| 62 |
+
(mettext utt)
|
| 63 |
+
(Wave_Synth utt)
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
;;; Assumed everything is using Roger diphones
|
| 67 |
+
|
| 68 |
+
;;(lex.create "cmu_mettree")
|
| 69 |
+
;;;(lex.set.phoneset "radio_phones")
|
| 70 |
+
;;(lex.set.phoneset "radio_phones")
|
| 71 |
+
|
| 72 |
+
(define (setup_cmu_mettree_lex)
|
| 73 |
+
"(setup_cmu_mettreelex)
|
| 74 |
+
Lexicon derived from the CMU lexicon (cmudict-0.1), around 100,000 entries,
|
| 75 |
+
in the radio phoneset (sort of darpa-like)."
|
| 76 |
+
(if (not (member_string "cmu_mettree" (lex.list)))
|
| 77 |
+
(begin
|
| 78 |
+
(print "making cmu lexicon")
|
| 79 |
+
(lex.create "cmu_mettree")
|
| 80 |
+
(lex.set.compile.file (path-append lexdir "cmu_mettree_lex.out"))
|
| 81 |
+
(lex.set.phoneset "radio")
|
| 82 |
+
(require 'lts__us) ;; US English letter to sound rules
|
| 83 |
+
(lex.set.lts.method 'lts_rules)
|
| 84 |
+
(lex.set.lts.ruleset 'nrl_us))))
|
| 85 |
+
|
| 86 |
+
(provide 'mettree)
|
| 87 |
+
|
| 88 |
+
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/module_description.scm
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Handle module descriptions.
|
| 35 |
+
;;;
|
| 36 |
+
|
| 37 |
+
(defvar *module-descriptions* nil
|
| 38 |
+
"*module-descriptions*
|
| 39 |
+
An association list recording the description objects for proclaimed
|
| 40 |
+
modules.")
|
| 41 |
+
|
| 42 |
+
(define (set_module_description mod desc)
|
| 43 |
+
"(set_module_description MOD DESC)
|
| 44 |
+
Set the description for the module named MOD."
|
| 45 |
+
(let ((entry (assoc mod *module-descriptions*)))
|
| 46 |
+
(if entry
|
| 47 |
+
(set-cdr! entry (cons desc nil))
|
| 48 |
+
(set! *module-descriptions* (cons (cons mod (cons desc nil))
|
| 49 |
+
*module-descriptions*))
|
| 50 |
+
)
|
| 51 |
+
)
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
(define (module_description mod)
|
| 55 |
+
"(module_description MOD)
|
| 56 |
+
Returns the description record of the module named by symbol MOD"
|
| 57 |
+
(let ((entry (assoc mod *module-descriptions*)))
|
| 58 |
+
(if entry
|
| 59 |
+
(car (cdr entry))
|
| 60 |
+
nil
|
| 61 |
+
)
|
| 62 |
+
)
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
(defmac (proclaim form)
|
| 66 |
+
"(proclaim NAME &opt DESCRIPTION...)
|
| 67 |
+
Anounce the availability of a module NAME. DESCRIPTION
|
| 68 |
+
is a description in a fixed format."
|
| 69 |
+
(let ((name (car (cdr form)))
|
| 70 |
+
(description (cdr form))
|
| 71 |
+
)
|
| 72 |
+
(list 'proclaim-real (list 'quote name) (list 'quote description))
|
| 73 |
+
)
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
(define (proclaim-real name description)
|
| 77 |
+
(set! *modules* (cons name *modules*))
|
| 78 |
+
; (if description
|
| 79 |
+
; (set_module_description name (create_module_description description))
|
| 80 |
+
; )
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
(define (describe_module mod)
|
| 84 |
+
"(describe_module MOD)
|
| 85 |
+
Describe the module named by the symbol MOD."
|
| 86 |
+
|
| 87 |
+
(let ((entry (module_description mod)))
|
| 88 |
+
(format t "---------------------\n")
|
| 89 |
+
(if entry
|
| 90 |
+
(print_module_description entry)
|
| 91 |
+
(format t "No description for %l\n" mod)
|
| 92 |
+
)
|
| 93 |
+
(format t "---------------------\n")
|
| 94 |
+
)
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
(define (describe_all_modules)
|
| 98 |
+
"(describe_all_modules)
|
| 99 |
+
Print descriptions of all proclaimed modules"
|
| 100 |
+
(format t "---------------------\n")
|
| 101 |
+
(let ((p *module-descriptions*))
|
| 102 |
+
(while p
|
| 103 |
+
(print_module_description (car (cdr (car p))))
|
| 104 |
+
(format t "---------------------\n")
|
| 105 |
+
(set! p (cdr p))
|
| 106 |
+
)
|
| 107 |
+
)
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
(proclaim
|
| 111 |
+
module_description 1.1
|
| 112 |
+
"CSTR" "Richard Caley <rjc@cstr.ed.ac.uk>"
|
| 113 |
+
( "Handle module descriptions from C++ and from Scheme."
|
| 114 |
+
)
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
(provide 'module_description)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/mrpa_allophones.scm
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;; ;;
|
| 3 |
+
;; Centre for Speech Technology Research ;;
|
| 4 |
+
;; University of Edinburgh, UK ;;
|
| 5 |
+
;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;; All Rights Reserved. ;;
|
| 7 |
+
;; ;;
|
| 8 |
+
;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;; the following conditions: ;;
|
| 14 |
+
;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;; conditions and the following disclaimer. ;;
|
| 16 |
+
;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;; derived from this software without specific prior written ;;
|
| 20 |
+
;; permission. ;;
|
| 21 |
+
;; ;;
|
| 22 |
+
;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;; THIS SOFTWARE. ;;
|
| 31 |
+
;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;
|
| 34 |
+
;; A definition of the extended mrpa phone set used for some diphone sets
|
| 35 |
+
;;
|
| 36 |
+
|
| 37 |
+
(defPhoneSet
|
| 38 |
+
mrpa_allophones
|
| 39 |
+
;;; Phone Features
|
| 40 |
+
(;; vowel or consonant
|
| 41 |
+
(vc + -)
|
| 42 |
+
;; vowel length: short long dipthong schwa
|
| 43 |
+
(vlng s l d a 0)
|
| 44 |
+
;; vowel height: high mid low
|
| 45 |
+
(vheight 1 2 3 -)
|
| 46 |
+
;; vowel frontness: front mid back
|
| 47 |
+
(vfront 1 2 3 -)
|
| 48 |
+
;; lip rounding
|
| 49 |
+
(vrnd + -)
|
| 50 |
+
;; consonant type: stop fricative affricative nasal liquid
|
| 51 |
+
(ctype s f a n l 0)
|
| 52 |
+
;; place of articulation: labial alveolar palatal labio-dental
|
| 53 |
+
;; dental velar
|
| 54 |
+
(cplace l a p b d v 0)
|
| 55 |
+
;; consonant voicing
|
| 56 |
+
(cvox + -)
|
| 57 |
+
)
|
| 58 |
+
;; Phone set members
|
| 59 |
+
(
|
| 60 |
+
(uh + s 2 3 - 0 0 +)
|
| 61 |
+
(e + s 2 1 - 0 0 +)
|
| 62 |
+
(a + s 3 1 - 0 0 +)
|
| 63 |
+
(o + s 3 3 - 0 0 +)
|
| 64 |
+
(i + s 1 1 - 0 0 +)
|
| 65 |
+
(u + s 1 3 + 0 0 +)
|
| 66 |
+
(ii + l 1 1 - 0 0 +)
|
| 67 |
+
(uu + l 2 3 + 0 0 +)
|
| 68 |
+
(oo + l 3 2 - 0 0 +)
|
| 69 |
+
(aa + l 3 1 - 0 0 +)
|
| 70 |
+
(@@ + l 2 2 - 0 0 +)
|
| 71 |
+
(ai + d 3 1 - 0 0 +)
|
| 72 |
+
(ei + d 2 1 - 0 0 +)
|
| 73 |
+
(oi + d 3 3 - 0 0 +)
|
| 74 |
+
(au + d 3 3 + 0 0 +)
|
| 75 |
+
(ou + d 3 3 + 0 0 +)
|
| 76 |
+
(e@ + d 2 1 - 0 0 +)
|
| 77 |
+
(i@ + d 1 1 - 0 0 +)
|
| 78 |
+
(u@ + d 3 1 - 0 0 +)
|
| 79 |
+
(@ + a - - - 0 0 +)
|
| 80 |
+
(p - 0 - - + s l -)
|
| 81 |
+
(t - 0 - - + s a -)
|
| 82 |
+
(k - 0 - - + s p -)
|
| 83 |
+
(b - 0 - - + s l +)
|
| 84 |
+
(d - 0 - - + s a +)
|
| 85 |
+
(g - 0 - - + s p +)
|
| 86 |
+
(s - 0 - - + f a -)
|
| 87 |
+
(z - 0 - - + f a +)
|
| 88 |
+
(sh - 0 - - + f p -)
|
| 89 |
+
(zh - 0 - - + f p +)
|
| 90 |
+
(f - 0 - - + f b -)
|
| 91 |
+
(v - 0 - - + f b +)
|
| 92 |
+
(th - 0 - - + f d -)
|
| 93 |
+
(dh - 0 - - + f d +)
|
| 94 |
+
(ch - 0 - - + a a -)
|
| 95 |
+
(jh - 0 - - + a a +)
|
| 96 |
+
(h - 0 - - + a v -)
|
| 97 |
+
(m - 0 - - + n l +)
|
| 98 |
+
(n - 0 - - + n d +)
|
| 99 |
+
(ng - 0 - - + n v +)
|
| 100 |
+
(l - 0 - - + l d +)
|
| 101 |
+
(ll - 0 - - + l d +)
|
| 102 |
+
(y - 0 - - + l a +)
|
| 103 |
+
(r - 0 - - + l p +)
|
| 104 |
+
(w - 0 - - + l l +)
|
| 105 |
+
(# - 0 - - - 0 0 -)
|
| 106 |
+
)
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
(PhoneSet.silences '(#))
|
| 110 |
+
|
| 111 |
+
(provide 'mrpa_allophones)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/mrpa_durs.scm
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; mrpa average phoneme durations from gsw 450
|
| 35 |
+
;;;
|
| 36 |
+
(set! phoneme_durations
|
| 37 |
+
'(
|
| 38 |
+
(u 0.067)
|
| 39 |
+
(i@ 0.146)
|
| 40 |
+
(h 0.067)
|
| 41 |
+
(uu 0.105)
|
| 42 |
+
(uh 0.090)
|
| 43 |
+
(v 0.053)
|
| 44 |
+
(oo 0.145)
|
| 45 |
+
(i 0.060)
|
| 46 |
+
(jh 0.097)
|
| 47 |
+
(ii 0.095)
|
| 48 |
+
(w 0.066)
|
| 49 |
+
(k 0.088)
|
| 50 |
+
(+ 0.036)
|
| 51 |
+
(y 0.051)
|
| 52 |
+
(l 0.067)
|
| 53 |
+
(zh 0.080)
|
| 54 |
+
(ng 0.072)
|
| 55 |
+
(m 0.070)
|
| 56 |
+
(z 0.079)
|
| 57 |
+
(## 0.256)
|
| 58 |
+
(au 0.162)
|
| 59 |
+
(a 0.118)
|
| 60 |
+
(n 0.065)
|
| 61 |
+
(o 0.102)
|
| 62 |
+
(ai 0.156)
|
| 63 |
+
(b 0.071)
|
| 64 |
+
(ou 0.129)
|
| 65 |
+
(ch 0.119)
|
| 66 |
+
(p 0.094)
|
| 67 |
+
(oi 0.165)
|
| 68 |
+
(# 0.040)
|
| 69 |
+
(e@ 0.131)
|
| 70 |
+
(d 0.052)
|
| 71 |
+
(dh 0.032)
|
| 72 |
+
(e 0.091)
|
| 73 |
+
(r 0.062)
|
| 74 |
+
(sh 0.101)
|
| 75 |
+
(@@ 0.149)
|
| 76 |
+
(ei 0.131)
|
| 77 |
+
(f 0.091)
|
| 78 |
+
(s 0.093)
|
| 79 |
+
(g 0.066)
|
| 80 |
+
(u@ 0.120)
|
| 81 |
+
(aa 0.173)
|
| 82 |
+
(t 0.073)
|
| 83 |
+
(th 0.080)
|
| 84 |
+
(@ 0.054)
|
| 85 |
+
))
|
| 86 |
+
|
| 87 |
+
(set! gsw_durs
|
| 88 |
+
'(
|
| 89 |
+
(# 0.200 0.100)
|
| 90 |
+
(h 0.061 0.028)
|
| 91 |
+
(i@ 0.141 0.061)
|
| 92 |
+
(u 0.067 0.024)
|
| 93 |
+
(uu 0.107 0.044)
|
| 94 |
+
(uh 0.087 0.025)
|
| 95 |
+
(v 0.051 0.019)
|
| 96 |
+
(oo 0.138 0.046)
|
| 97 |
+
(i 0.058 0.023)
|
| 98 |
+
(ii 0.092 0.035)
|
| 99 |
+
(w 0.054 0.023)
|
| 100 |
+
(jh 0.094 0.024)
|
| 101 |
+
(k 0.089 0.034)
|
| 102 |
+
(y 0.048 0.025)
|
| 103 |
+
(l 0.056 0.026)
|
| 104 |
+
(zh 0.077 0.030)
|
| 105 |
+
(ng 0.064 0.024)
|
| 106 |
+
(m 0.063 0.021)
|
| 107 |
+
(z 0.072 0.029)
|
| 108 |
+
(a 0.120 0.036)
|
| 109 |
+
(au 0.171 0.046)
|
| 110 |
+
(n 0.059 0.025)
|
| 111 |
+
(ou 0.134 0.039)
|
| 112 |
+
(b 0.073 0.021)
|
| 113 |
+
(o 0.094 0.037)
|
| 114 |
+
(ai 0.137 0.047)
|
| 115 |
+
(ch 0.128 0.039)
|
| 116 |
+
(oi 0.183 0.050)
|
| 117 |
+
(p 0.101 0.032)
|
| 118 |
+
(e@ 0.144 0.061)
|
| 119 |
+
(d 0.048 0.021)
|
| 120 |
+
(dh 0.031 0.016)
|
| 121 |
+
(e 0.092 0.035)
|
| 122 |
+
(r 0.053 0.025)
|
| 123 |
+
(sh 0.108 0.031)
|
| 124 |
+
(f 0.095 0.033)
|
| 125 |
+
(@@ 0.147 0.035)
|
| 126 |
+
(ei 0.130 0.042)
|
| 127 |
+
(s 0.102 0.037)
|
| 128 |
+
(u@ 0.140 0.057)
|
| 129 |
+
(th 0.093 0.050)
|
| 130 |
+
(g 0.064 0.021)
|
| 131 |
+
(aa 0.155 0.045)
|
| 132 |
+
(t 0.070 0.034)
|
| 133 |
+
(@ 0.046 0.020)
|
| 134 |
+
))
|
| 135 |
+
|
| 136 |
+
(provide 'mrpa_durs)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/mrpa_phones.scm
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;; ;;
|
| 3 |
+
;; Centre for Speech Technology Research ;;
|
| 4 |
+
;; University of Edinburgh, UK ;;
|
| 5 |
+
;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;; All Rights Reserved. ;;
|
| 7 |
+
;; ;;
|
| 8 |
+
;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;; the following conditions: ;;
|
| 14 |
+
;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;; conditions and the following disclaimer. ;;
|
| 16 |
+
;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;; derived from this software without specific prior written ;;
|
| 20 |
+
;; permission. ;;
|
| 21 |
+
;; ;;
|
| 22 |
+
;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;; THIS SOFTWARE. ;;
|
| 31 |
+
;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;
|
| 34 |
+
;; A definition of the mrpa phone set
|
| 35 |
+
;;
|
| 36 |
+
|
| 37 |
+
(defPhoneSet
|
| 38 |
+
mrpa
|
| 39 |
+
;;; Phone Features
|
| 40 |
+
(;; vowel or consonant
|
| 41 |
+
(vc + -)
|
| 42 |
+
;; vowel length: short long dipthong schwa
|
| 43 |
+
(vlng s l d a 0)
|
| 44 |
+
;; vowel height: high mid low
|
| 45 |
+
(vheight 1 2 3 0)
|
| 46 |
+
;; vowel frontness: front mid back
|
| 47 |
+
(vfront 1 2 3 0)
|
| 48 |
+
;; lip rounding
|
| 49 |
+
(vrnd + - 0)
|
| 50 |
+
;; consonant type: stop fricative affricate nasal lateral approximant
|
| 51 |
+
(ctype s f a n l r 0)
|
| 52 |
+
;; place of articulation: labial alveolar palatal labio-dental
|
| 53 |
+
;; dental velar glottal
|
| 54 |
+
(cplace l a p b d v g 0)
|
| 55 |
+
;; consonant voicing
|
| 56 |
+
(cvox + - 0)
|
| 57 |
+
)
|
| 58 |
+
;; Phone set members
|
| 59 |
+
(
|
| 60 |
+
(uh + s 2 3 - 0 0 0)
|
| 61 |
+
(e + s 2 1 - 0 0 0)
|
| 62 |
+
(a + s 3 1 - 0 0 0)
|
| 63 |
+
(o + s 2 3 + 0 0 0)
|
| 64 |
+
(i + s 1 1 - 0 0 0)
|
| 65 |
+
(u + s 1 3 + 0 0 0)
|
| 66 |
+
(ii + l 1 1 - 0 0 0)
|
| 67 |
+
(uu + l 1 3 + 0 0 0)
|
| 68 |
+
(oo + l 3 3 + 0 0 0)
|
| 69 |
+
(aa + l 3 3 - 0 0 0)
|
| 70 |
+
(@@ + l 2 2 - 0 0 0)
|
| 71 |
+
(ai + d 3 2 - 0 0 0)
|
| 72 |
+
(ei + d 2 1 - 0 0 0)
|
| 73 |
+
(oi + d 3 3 + 0 0 0)
|
| 74 |
+
(au + d 3 2 + 0 0 0)
|
| 75 |
+
(ou + d 2 2 - 0 0 0)
|
| 76 |
+
(e@ + d 2 1 - 0 0 0)
|
| 77 |
+
(i@ + d 1 1 - 0 0 0)
|
| 78 |
+
(u@ + d 3 1 + 0 0 0)
|
| 79 |
+
(@ + a 2 2 - 0 0 0)
|
| 80 |
+
(p - 0 0 0 0 s l -)
|
| 81 |
+
(t - 0 0 0 0 s a -)
|
| 82 |
+
(k - 0 0 0 0 s v -)
|
| 83 |
+
(b - 0 0 0 0 s l +)
|
| 84 |
+
(d - 0 0 0 0 s a +)
|
| 85 |
+
(g - 0 0 0 0 s v +)
|
| 86 |
+
(s - 0 0 0 0 f a -)
|
| 87 |
+
(z - 0 0 0 0 f a +)
|
| 88 |
+
(sh - 0 0 0 0 f p -)
|
| 89 |
+
(zh - 0 0 0 0 f p +)
|
| 90 |
+
(f - 0 0 0 0 f b -)
|
| 91 |
+
(v - 0 0 0 0 f b +)
|
| 92 |
+
(th - 0 0 0 0 f d -)
|
| 93 |
+
(dh - 0 0 0 0 f d +)
|
| 94 |
+
(ch - 0 0 0 0 a p -)
|
| 95 |
+
(jh - 0 0 0 0 a p +)
|
| 96 |
+
(h - 0 0 0 0 f g -)
|
| 97 |
+
(m - 0 0 0 0 n l +)
|
| 98 |
+
(n - 0 0 0 0 n a +)
|
| 99 |
+
(ng - 0 0 0 0 n v +)
|
| 100 |
+
(l - 0 0 0 0 l a +)
|
| 101 |
+
(y - 0 0 0 0 r p +)
|
| 102 |
+
(r - 0 0 0 0 r a +)
|
| 103 |
+
(w - 0 0 0 0 r l +)
|
| 104 |
+
(# - 0 0 0 0 0 0 -)
|
| 105 |
+
)
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
(PhoneSet.silences '(#))
|
| 109 |
+
|
| 110 |
+
(provide 'mrpa_phones)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/multisyn/multisyn.scm
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 2003, 2004 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Multisyn top level scheme code (Korin Richmond and Rob Clark)
|
| 35 |
+
;;;
|
| 36 |
+
|
| 37 |
+
; Requires
|
| 38 |
+
(require_module 'UniSyn)
|
| 39 |
+
(require_module 'MultiSyn)
|
| 40 |
+
(require 'multisyn_pauses)
|
| 41 |
+
(require 'target_cost)
|
| 42 |
+
|
| 43 |
+
;; use a global parameter to specify which UnitSelection voice
|
| 44 |
+
;; to use to synthesise a given utterance for now, because the
|
| 45 |
+
;; standard Festival synthesis mainline doesn't accept a voice
|
| 46 |
+
;; parameter. (This should be set to the current voice object)
|
| 47 |
+
(defvar currentMultiSynVoice nil)
|
| 48 |
+
(defvar relp t)
|
| 49 |
+
(defvar flattenVoice nil)
|
| 50 |
+
|
| 51 |
+
; extract utt list from a .data file
|
| 52 |
+
(define (load_utt_list filename)
|
| 53 |
+
"(load_utt_list filename)
|
| 54 |
+
Loads a fextvox .data file and extracts an utterance list."
|
| 55 |
+
(let (l entries)
|
| 56 |
+
(set! entries (load filename t))
|
| 57 |
+
(mapcar
|
| 58 |
+
(lambda (d)
|
| 59 |
+
(set! l (cons (car d) l))
|
| 60 |
+
t)
|
| 61 |
+
entries)
|
| 62 |
+
l))
|
| 63 |
+
|
| 64 |
+
;; SynthType definition, main entry point.
|
| 65 |
+
|
| 66 |
+
(defSynthType MultiSyn
|
| 67 |
+
;(print "Multisyn unit selection synthesis")
|
| 68 |
+
(defvar MultiSyn_module_hooks nil)
|
| 69 |
+
(Param.def "unisyn.window_name" "hanning")
|
| 70 |
+
(Param.def "unisyn.window_factor" 1.0)
|
| 71 |
+
;; Unisyn requires these to be set.
|
| 72 |
+
(set! us_abs_offset 0.0)
|
| 73 |
+
(set! us_rel_offset 0.0)
|
| 74 |
+
|
| 75 |
+
(apply_hooks MultiSyn_module_hooks utt) ;; 4processing of diphone names
|
| 76 |
+
|
| 77 |
+
;; find appropriate unit sequence and put sythesis
|
| 78 |
+
;; parameters in the Unit relation of the utterance structure
|
| 79 |
+
(voice.getUnits currentMultiSynVoice utt)
|
| 80 |
+
|
| 81 |
+
;(print "doing concat")
|
| 82 |
+
(us_unit_concat utt)
|
| 83 |
+
|
| 84 |
+
;(print "doing raw concat")
|
| 85 |
+
|
| 86 |
+
(utt.relation.create utt 'SourceSegments)
|
| 87 |
+
|
| 88 |
+
(set! do_prosmod (du_voice.prosodic_modification currentMultiSynVoice))
|
| 89 |
+
|
| 90 |
+
(if do_prosmod
|
| 91 |
+
(begin
|
| 92 |
+
(if (not (member 'f0 (utt.relationnames utt)))
|
| 93 |
+
(targets_to_f0 utt))
|
| 94 |
+
;; temporary fix
|
| 95 |
+
(if (utt.relation.last utt 'Segment)
|
| 96 |
+
(set! pm_end (+ (item.feat (utt.relation.last utt 'Segment) "end") 0.02))
|
| 97 |
+
(set! pm_end 0.02))
|
| 98 |
+
(us_f0_to_pitchmarks utt 'f0 'TargetCoef pm_end)
|
| 99 |
+
(us_mapping utt 'segment_single))
|
| 100 |
+
(begin
|
| 101 |
+
(utt.copy_relation utt 'SourceCoef 'TargetCoef)
|
| 102 |
+
(us_mapping utt "linear")))
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
;(print "generating wave")
|
| 106 |
+
;; specify something else if you don't want lpc
|
| 107 |
+
(us_generate_wave utt 'lpc)
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
; target cost scheme code
|
| 112 |
+
(define (targetcost it1 it2)
|
| 113 |
+
(Default_Target_Cost it1 it2))
|
| 114 |
+
|
| 115 |
+
; Evil function which writes the functions to actually load and switch new voices.
|
| 116 |
+
(define (make_voice_definition name srate config_function backoff_rules data_dir config)
|
| 117 |
+
"(make_voice_definition NAME SRATE CONFIG_FUNCTION BACKOFF_RULES DATA_DIR CONFIG)
|
| 118 |
+
Create the fuction definitions to load and unload a voice."
|
| 119 |
+
(let ((voice_name (string-append "voice_" name))
|
| 120 |
+
(free_name (string-append "free_voice_" name))
|
| 121 |
+
(pre_config_function (string-append config_function "_pre"))
|
| 122 |
+
(voice_variable (upcase (string-append "voice_" name))))
|
| 123 |
+
|
| 124 |
+
(eval (list 'defvar (intern voice_variable) nil))
|
| 125 |
+
|
| 126 |
+
(eval (list 'define (list (intern voice_name))
|
| 127 |
+
(list 'if (intern pre_config_function)
|
| 128 |
+
(list (intern pre_config_function) (intern voice_variable)))
|
| 129 |
+
(list 'if (list 'null (intern voice_variable))
|
| 130 |
+
(list 'set! (intern voice_variable)
|
| 131 |
+
(list 'multisyn_load_voice_modules
|
| 132 |
+
(list 'quote name)
|
| 133 |
+
srate
|
| 134 |
+
(list 'quote backoff_rules)
|
| 135 |
+
data_dir
|
| 136 |
+
(list 'quote config))))
|
| 137 |
+
(list (intern config_function) (intern voice_variable))
|
| 138 |
+
(list 'set! 'current-voice (list 'quote name))
|
| 139 |
+
(list 'define_current_voice_reset)
|
| 140 |
+
(list 'set! 'currentMultiSynVoice (intern voice_variable))
|
| 141 |
+
))
|
| 142 |
+
|
| 143 |
+
(eval (list 'define
|
| 144 |
+
(list (intern free_name))
|
| 145 |
+
(list 'cond
|
| 146 |
+
(list (list 'null (intern voice_variable))
|
| 147 |
+
(list 'error "Voice not currently loaded!"))
|
| 148 |
+
(list (list 'eq? 'currentMultiSynVoice (intern voice_variable))
|
| 149 |
+
(list 'error "Can't free current voice!"))
|
| 150 |
+
(list 't (list set! (intern voice_variable) 'nil))))))
|
| 151 |
+
nil)
|
| 152 |
+
|
| 153 |
+
(define (multisyn_load_voice_modules name srate backoff_rules base_dir module_list)
|
| 154 |
+
"(multisyn_add_module voice name srate backoff_rules base_dir module_list)
|
| 155 |
+
Add voice modules to a voice."
|
| 156 |
+
(let (voice)
|
| 157 |
+
(mapcar
|
| 158 |
+
(lambda (module_entry)
|
| 159 |
+
(let ((dirs (car module_entry))
|
| 160 |
+
(utt_list (load_utt_list (path-append base_dir
|
| 161 |
+
(cadr module_entry)))))
|
| 162 |
+
(if (null voice)
|
| 163 |
+
(set! voice (make_du_voice utt_list dirs srate))
|
| 164 |
+
(voice.addModule voice utt_list dirs srate))))
|
| 165 |
+
module_list)
|
| 166 |
+
(voice.setName voice name)
|
| 167 |
+
(if flattenVoice
|
| 168 |
+
(du_voice.setTargetCost voice "flat")
|
| 169 |
+
(du_voice.setTargetCost voice t))
|
| 170 |
+
(du_voice.setJoinCost voice t)
|
| 171 |
+
(format stderr "Please wait: Initialising multisyn voice.\n")
|
| 172 |
+
(voice.init voice)
|
| 173 |
+
(format stderr " Voice loaded successfully!\n")
|
| 174 |
+
(du_voice.set_ob_pruning_beam voice 0.25)
|
| 175 |
+
(du_voice.set_pruning_beam voice 0.25)
|
| 176 |
+
(du_voice.setDiphoneBackoff voice backoff_rules)
|
| 177 |
+
voice))
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
(define (define_current_voice_reset)
|
| 183 |
+
"(define_current_voice_reset)
|
| 184 |
+
Re-define (current_voice_reset) correctly."
|
| 185 |
+
(eval (list 'define
|
| 186 |
+
(list 'current_voice_reset)
|
| 187 |
+
(list 'multisyn_reset_globals))))
|
| 188 |
+
|
| 189 |
+
(define (multisyn_reset_globals)
|
| 190 |
+
"(multisyn_reset_globals)
|
| 191 |
+
Reset multisyn specific global variables."
|
| 192 |
+
(Param.set 'unisyn.window_symmetric 1))
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
(provide 'multisyn)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/multisyn/multisyn_pauses.scm
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 2003, 2004 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Multisyn Pause module (Rob Clark and Korin Richmond)
|
| 35 |
+
;;;
|
| 36 |
+
;;;
|
| 37 |
+
|
| 38 |
+
(defvar BB_Pause "B_300")
|
| 39 |
+
(defvar B_Pause "B_150")
|
| 40 |
+
(defvar mB_Pause "B_150") ; shouldn't be used
|
| 41 |
+
|
| 42 |
+
(define (MultiSyn_Pauses utt)
|
| 43 |
+
"(MultiSyn_Pauses UTT)
|
| 44 |
+
Predict pause insertion in a Multisyn unit selection utterance structure."
|
| 45 |
+
(let ((words (utt.relation.items utt 'Word)) lastword tpname)
|
| 46 |
+
(if words
|
| 47 |
+
(begin
|
| 48 |
+
(insert_initial_pause utt) ;; always have a start pause
|
| 49 |
+
(set! lastword (car (last words)))
|
| 50 |
+
(mapcar
|
| 51 |
+
(lambda (w)
|
| 52 |
+
(let ((pbreak (item.feat w "pbreak"))
|
| 53 |
+
(emph (item.feat w "R:Token.parent.EMPH")))
|
| 54 |
+
(cond
|
| 55 |
+
((string-equal pbreak "BB")
|
| 56 |
+
(unitselection_pause_insert w BB_Pause))
|
| 57 |
+
((string-equal pbreak "mB")
|
| 58 |
+
(unitselection_pause_insert w mB_Pause))
|
| 59 |
+
((string-equal pbreak "B")
|
| 60 |
+
(unitselection_pause_insert w B_Pause)))))
|
| 61 |
+
words)
|
| 62 |
+
;; The embarassing bit. Remove any words labelled as punc or fpunc
|
| 63 |
+
(mapcar
|
| 64 |
+
(lambda (w)
|
| 65 |
+
(let ((pos (item.feat w "pos")))
|
| 66 |
+
(if (or (string-equal "punc" pos)
|
| 67 |
+
(string-equal "fpunc" pos))
|
| 68 |
+
(let ((pbreak (item.feat w "pbreak"))
|
| 69 |
+
(wp (item.relation w 'Phrase)))
|
| 70 |
+
(if (and (string-matches pbreak "BB?")
|
| 71 |
+
(item.relation.prev w 'Word))
|
| 72 |
+
(item.set_feat
|
| 73 |
+
(item.relation.prev w 'Word) "pbreak" pbreak))
|
| 74 |
+
(item.relation.remove w 'Word)
|
| 75 |
+
;; can't refer to w as we've just deleted it
|
| 76 |
+
(item.relation.remove wp 'Phrase)))))
|
| 77 |
+
words)))
|
| 78 |
+
;(utt.relation.print utt 'Word)
|
| 79 |
+
;(utt.relation.print utt 'Segment)
|
| 80 |
+
utt))
|
| 81 |
+
|
| 82 |
+
(define (unitselection_pause_insert word pause)
|
| 83 |
+
"(pause_insert word pause)
|
| 84 |
+
Insert segments needed for a pause."
|
| 85 |
+
(let ((silence (car (cadr (car (PhoneSet.description '(silences))))))
|
| 86 |
+
(seg (item.relation (find_last_seg word) 'Segment))
|
| 87 |
+
pause_item)
|
| 88 |
+
;(format stderr " inserting pause after: %s.\n" (item.name seg))
|
| 89 |
+
;(format stderr " Inserting pause\n")
|
| 90 |
+
; if next seg is not silence insert one.
|
| 91 |
+
(if (or (not (item.next seg))
|
| 92 |
+
(not (string-equal (item.name (item.next seg)) silence)))
|
| 93 |
+
(item.insert seg (list silence) 'after))
|
| 94 |
+
; insert pause after that if not the end.
|
| 95 |
+
(if (item.next (item.next seg))
|
| 96 |
+
(begin
|
| 97 |
+
(set! pause_item (item.insert (item.next seg) (list pause) 'after))
|
| 98 |
+
;if next seg after that is not silence add one.
|
| 99 |
+
(if (not (string-equal (item.name (item.next pause_item)) silence))
|
| 100 |
+
(item.insert pause_item (list silence) 'after))))))
|
| 101 |
+
|
| 102 |
+
(provide 'multisyn_pauses)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/multisyn/radio_phones_multisyn.scm
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997,2003, 2004 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; A definition of the radio phone set used in the BU RADIO FM
|
| 35 |
+
;;; corpus, some people call this the darpa set. This one
|
| 36 |
+
;;; has the closures removed and pauses added for multisyn
|
| 37 |
+
;;;
|
| 38 |
+
|
| 39 |
+
(defPhoneSet
|
| 40 |
+
radio_multisyn
|
| 41 |
+
;;; Phone Features
|
| 42 |
+
(;; vowel or consonant
|
| 43 |
+
(vc + -)
|
| 44 |
+
;; vowel length: short long dipthong schwa
|
| 45 |
+
(vlng s l d a 0)
|
| 46 |
+
;; vowel height: high mid low
|
| 47 |
+
(vheight 1 2 3 0)
|
| 48 |
+
;; vowel frontness: front mid back
|
| 49 |
+
(vfront 1 2 3 0)
|
| 50 |
+
;; lip rounding
|
| 51 |
+
(vrnd + - 0)
|
| 52 |
+
;; consonant type: stop fricative affricate nasal lateral approximant
|
| 53 |
+
(ctype s f a n l r 0)
|
| 54 |
+
;; place of articulation: labial alveolar palatal labio-dental
|
| 55 |
+
;; dental velar glottal
|
| 56 |
+
(cplace l a p b d v g 0)
|
| 57 |
+
;; consonant voicing
|
| 58 |
+
(cvox + - 0)
|
| 59 |
+
)
|
| 60 |
+
;; Phone set members
|
| 61 |
+
(
|
| 62 |
+
;; multisyn extras
|
| 63 |
+
(# - 0 0 0 0 0 0 -) ;; slience ...
|
| 64 |
+
(B_10 - 0 0 0 0 0 0 -) ;; Pauses
|
| 65 |
+
(B_20 - 0 0 0 0 0 0 -) ;; Pauses
|
| 66 |
+
(B_30 - 0 0 0 0 0 0 -) ;; Pauses
|
| 67 |
+
(B_40 - 0 0 0 0 0 0 -) ;; Pauses
|
| 68 |
+
(B_50 - 0 0 0 0 0 0 -) ;; Pauses
|
| 69 |
+
(B_100 - 0 0 0 0 0 0 -) ;; Pauses
|
| 70 |
+
(B_150 - 0 0 0 0 0 0 -) ;; Pauses
|
| 71 |
+
(B_200 - 0 0 0 0 0 0 -) ;; Pauses
|
| 72 |
+
(B_250 - 0 0 0 0 0 0 -) ;; Pauses
|
| 73 |
+
(B_300 - 0 0 0 0 0 0 -) ;; Pauses
|
| 74 |
+
(B_400 - 0 0 0 0 0 0 -) ;; Pauses
|
| 75 |
+
|
| 76 |
+
;; Note these features were set by awb so they are wrong !!!
|
| 77 |
+
(aa + l 3 3 - 0 0 0) ;; father
|
| 78 |
+
(ae + s 3 1 - 0 0 0) ;; fat
|
| 79 |
+
(ah + s 2 2 - 0 0 0) ;; but
|
| 80 |
+
(ao + l 3 3 + 0 0 0) ;; lawn
|
| 81 |
+
(aw + d 3 2 - 0 0 0) ;; how
|
| 82 |
+
(ax + a 2 2 - 0 0 0) ;; about
|
| 83 |
+
(axr + a 2 2 - r a +)
|
| 84 |
+
(ay + d 3 2 - 0 0 0) ;; hide
|
| 85 |
+
(b - 0 0 0 0 s l +)
|
| 86 |
+
(ch - 0 0 0 0 a p -)
|
| 87 |
+
(d - 0 0 0 0 s a +)
|
| 88 |
+
(dh - 0 0 0 0 f d +)
|
| 89 |
+
(dx - a 0 0 0 s a +) ;; ??
|
| 90 |
+
(eh + s 2 1 - 0 0 0) ;; get
|
| 91 |
+
(el + s 0 0 0 l a +)
|
| 92 |
+
(em + s 0 0 0 n l +)
|
| 93 |
+
(en + s 0 0 0 n a +)
|
| 94 |
+
(er + a 2 2 - r 0 0) ;; always followed by r (er-r == axr)
|
| 95 |
+
(ey + d 2 1 - 0 0 0) ;; gate
|
| 96 |
+
(f - 0 0 0 0 f b -)
|
| 97 |
+
(g - 0 0 0 0 s v +)
|
| 98 |
+
(hh - 0 0 0 0 f g -)
|
| 99 |
+
(hv - 0 0 0 0 f g +)
|
| 100 |
+
(ih + s 1 1 - 0 0 0) ;; bit
|
| 101 |
+
(iy + l 1 1 - 0 0 0) ;; beet
|
| 102 |
+
(jh - 0 0 0 0 a p +)
|
| 103 |
+
(k - 0 0 0 0 s v -)
|
| 104 |
+
(l - 0 0 0 0 l a +)
|
| 105 |
+
(m - 0 0 0 0 n l +)
|
| 106 |
+
(n - 0 0 0 0 n a +)
|
| 107 |
+
(nx - 0 0 0 0 n d +) ;; ???
|
| 108 |
+
(ng - 0 0 0 0 n v +)
|
| 109 |
+
(ow + d 2 3 + 0 0 0) ;; lone
|
| 110 |
+
(oy + d 2 3 + 0 0 0) ;; toy
|
| 111 |
+
(p - 0 0 0 0 s l -)
|
| 112 |
+
(r - 0 0 0 0 r a +)
|
| 113 |
+
(s - 0 0 0 0 f a -)
|
| 114 |
+
(sh - 0 0 0 0 f p -)
|
| 115 |
+
(t - 0 0 0 0 s a -)
|
| 116 |
+
(th - 0 0 0 0 f d -)
|
| 117 |
+
(uh + s 1 3 + 0 0 0) ;; full
|
| 118 |
+
(uw + l 1 3 + 0 0 0) ;; fool
|
| 119 |
+
(v - 0 0 0 0 f b +)
|
| 120 |
+
(w - 0 0 0 0 r l +)
|
| 121 |
+
(y - 0 0 0 0 r p +)
|
| 122 |
+
(z - 0 0 0 0 f a +)
|
| 123 |
+
(zh - 0 0 0 0 f p +)
|
| 124 |
+
(pau - 0 0 0 0 0 0 -)
|
| 125 |
+
(h# - 0 0 0 0 0 0 -)
|
| 126 |
+
(brth - 0 0 0 0 0 0 -)
|
| 127 |
+
)
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
(PhoneSet.silences '(# pau h# brth))
|
| 131 |
+
|
| 132 |
+
(provide 'radio_phones_multisyn)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/multisyn/send_xwaves.scm
ADDED
|
@@ -0,0 +1,318 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 2003, 2004 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; xwaves interface for festival for multisyn (Rob Clark)
|
| 35 |
+
;;;
|
| 36 |
+
;;; This is never loaded by defualt.
|
| 37 |
+
;;; You'd need to change the paths here for this to currently work outside of CSTR.
|
| 38 |
+
;;; If anyone else ends up using it let me know and I'll make it more robust.
|
| 39 |
+
;;;
|
| 40 |
+
|
| 41 |
+
;; Send commands to xwaves
|
| 42 |
+
|
| 43 |
+
(defvar send_xwaves_command "/cstr/linux/entropic/esps531.linux/bin/send_xwaves")
|
| 44 |
+
(defvar spectrogram_command "/cstr/linux/entropic/esps531.linux/bin/sgram")
|
| 45 |
+
(defvar data_path "/projects/cougar/data/cstr/nina")
|
| 46 |
+
|
| 47 |
+
(set! xw_object_count 0)
|
| 48 |
+
(set! xw_active_list nil)
|
| 49 |
+
|
| 50 |
+
;;
|
| 51 |
+
;; Display a synthesised utterance
|
| 52 |
+
;;
|
| 53 |
+
(define (xwaves_display_utterance utt)
|
| 54 |
+
"(xwaves_display_utterance utt)
|
| 55 |
+
Display join and target information for an utterance."
|
| 56 |
+
(let ((units (utt.relation.items utt 'Unit))
|
| 57 |
+
(object (xw_name_object))
|
| 58 |
+
wavfile specfile segfile diphfile joinfile targfile sourcefile timefile)
|
| 59 |
+
|
| 60 |
+
(set! wavfile (xw_make_tmp_filename object))
|
| 61 |
+
(set! specfile (xw_make_tmp_filename object))
|
| 62 |
+
(set! segfile (xw_make_tmp_filename object))
|
| 63 |
+
(set! diphfile (xw_make_tmp_filename object))
|
| 64 |
+
(set! joinfile (xw_make_tmp_filename object))
|
| 65 |
+
(set! targfile (xw_make_tmp_filename object))
|
| 66 |
+
(set! sourcefile (xw_make_tmp_filename object))
|
| 67 |
+
(set! timefile (xw_make_tmp_filename object))
|
| 68 |
+
|
| 69 |
+
; display resulting waveform
|
| 70 |
+
(utt.save.wave utt wavfile 'riff)
|
| 71 |
+
(xwaves_show_general object wavfile 1500 200 10 10)
|
| 72 |
+
; display resulting spectrogram
|
| 73 |
+
(xw_genspec wavfile specfile)
|
| 74 |
+
(xwaves_show_general object specfile 1500 400 10 260)
|
| 75 |
+
; segments
|
| 76 |
+
(utt.save.unit_selection_segs utt segfile)
|
| 77 |
+
(xwaves_show_labels object segfile specfile)
|
| 78 |
+
; Unit information
|
| 79 |
+
(utt.save.unit_selection_info utt diphfile joinfile targfile sourcefile timefile)
|
| 80 |
+
(xwaves_show_labels object timefile specfile)
|
| 81 |
+
(xwaves_show_labels object sourcefile specfile)
|
| 82 |
+
(xwaves_show_labels object targfile specfile)
|
| 83 |
+
(xwaves_show_labels object joinfile specfile)
|
| 84 |
+
(xwaves_show_labels object diphfile specfile)
|
| 85 |
+
; mark files
|
| 86 |
+
(xw_register_active object (list wavfile specfile segfile diphfile joinfile sourcefile timefile))
|
| 87 |
+
))
|
| 88 |
+
|
| 89 |
+
;;
|
| 90 |
+
;; Edit a diphone source
|
| 91 |
+
;;
|
| 92 |
+
|
| 93 |
+
(define (xwaves_edit_diphone utt id)
|
| 94 |
+
"(xwaves_edit_diphone utt id)
|
| 95 |
+
Access the source diphone for label correction."
|
| 96 |
+
(let ((diphone nil)
|
| 97 |
+
segfilename
|
| 98 |
+
wavefilename
|
| 99 |
+
(utt (Utterance Text nil))
|
| 100 |
+
segs
|
| 101 |
+
(seg nil)
|
| 102 |
+
(start 0)
|
| 103 |
+
end)
|
| 104 |
+
|
| 105 |
+
;; find unit.
|
| 106 |
+
(mapcar
|
| 107 |
+
(lambda (unit)
|
| 108 |
+
(if (string-equal (format nil "_%s" id) (item.feat unit id))
|
| 109 |
+
(set! diphone unit)))
|
| 110 |
+
(utt.relation.items utt 'Unit))
|
| 111 |
+
(if (null diphone)
|
| 112 |
+
(error (format nil "Diphone with id _%s not found in utterance.")))
|
| 113 |
+
(set! uttname (item.feat diphone "source_utt"))
|
| 114 |
+
(set! end (item.feat diphone "source_end"))
|
| 115 |
+
|
| 116 |
+
(set! segfilename (format nil "%s/lab/%s.lab" data_path uttname))
|
| 117 |
+
(set! wavefilename (format nil "%s/wav/%s.wav" data_path uttname))
|
| 118 |
+
(utt.relation.load utt 'Segment segfilename)
|
| 119 |
+
|
| 120 |
+
(set! segs (utt.relation.items utt 'Segment))
|
| 121 |
+
(while (and segs
|
| 122 |
+
(not (equal? (item.feat (car segs) "end") end)))
|
| 123 |
+
(set! segs (cdr segs)))
|
| 124 |
+
|
| 125 |
+
if null seg ...
|
| 126 |
+
|
| 127 |
+
(if (item.prev diphone)
|
| 128 |
+
(set! start (item.feat seg "start"))
|
| 129 |
+
(set! start 0))
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
))
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
;;
|
| 139 |
+
;; Interface with xwaves.
|
| 140 |
+
;;
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
(define (xwaves_show_general object file width height xpos ypos)
|
| 144 |
+
"(xwaves_show_general object file width height xpos ypos)
|
| 145 |
+
Display an wave or track file."
|
| 146 |
+
(xw_send (format nil "make name %s file %s width %d height %d loc_x %d loc_y %d" object file width height xpos ypos)))
|
| 147 |
+
|
| 148 |
+
(define (xwaves_show_wave object file)
|
| 149 |
+
"(xwaves_show_wave object file)
|
| 150 |
+
Display a waveform."
|
| 151 |
+
(xwaves_show_general object file 1500 200 10 10))
|
| 152 |
+
|
| 153 |
+
(define (xwaves_show_labels object file attachto)
|
| 154 |
+
"(xwaves_show_labels object file attachto)
|
| 155 |
+
Display a label file."
|
| 156 |
+
(xw_send (format nil "send make signal %s name %s file %s color 125" attachto object file))
|
| 157 |
+
(xw_send "send activate fields 1 2 3 4 5"))
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
(define (xwaves_attach_xlabel)
|
| 161 |
+
"(xwaves_attach_xlabel)
|
| 162 |
+
Attach xlabel to xwaves."
|
| 163 |
+
(xw_send "attach function xlabel"))
|
| 164 |
+
|
| 165 |
+
(define (xwaves_set_markers object left right)
|
| 166 |
+
"(xwaves_set_markers object left right)
|
| 167 |
+
Set the markers."
|
| 168 |
+
(xw_send (format nil "%s set l_marker_time %f" object left))
|
| 169 |
+
(xw_send (format nil "%s set r_marker_time %f" object right)))
|
| 170 |
+
|
| 171 |
+
(define (xwaves_bracket_markers object file)
|
| 172 |
+
"(xwaves_bracket_markers object file)
|
| 173 |
+
Bracket markers."
|
| 174 |
+
(xw_send (format nil "%s bracket file %s " object file)))
|
| 175 |
+
|
| 176 |
+
(define (xwaves_close_windows object)
|
| 177 |
+
"(xwaves_close_windows object)
|
| 178 |
+
Close currently open windows related to object or all if nil.."
|
| 179 |
+
(cond
|
| 180 |
+
((null object)
|
| 181 |
+
(xw_send "kill"))
|
| 182 |
+
(t
|
| 183 |
+
(xw_send (format nil "kill name %s" object))))
|
| 184 |
+
(xw_clear_active_list object))
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
(define (xwaves_wait)
|
| 188 |
+
"(xwaves_wait)
|
| 189 |
+
Wait for xwaves continue signal."
|
| 190 |
+
(xw_send "pause"))
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
;;
|
| 194 |
+
;; Object naming
|
| 195 |
+
;;
|
| 196 |
+
(define (xw_name_object)
|
| 197 |
+
"(xw_name_object)
|
| 198 |
+
Generate a name for this object."
|
| 199 |
+
(let (name)
|
| 200 |
+
(set! name (string-append "obj" xw_object_count))
|
| 201 |
+
(set! xw_object_count (+ xw_object_count 1))
|
| 202 |
+
name))
|
| 203 |
+
|
| 204 |
+
;;
|
| 205 |
+
;; Temp file lists
|
| 206 |
+
;;
|
| 207 |
+
|
| 208 |
+
(define (xw_clear_active_list object)
|
| 209 |
+
"(xw_clear_active_list)
|
| 210 |
+
Clear active list of specified object, or all if nil."
|
| 211 |
+
(let (new_active_list)
|
| 212 |
+
(mapcar
|
| 213 |
+
(lambda (objectlist)
|
| 214 |
+
(cond
|
| 215 |
+
((or (null object)
|
| 216 |
+
(string-equal object (car objectlist)))
|
| 217 |
+
(mapcar
|
| 218 |
+
(lambda (file)
|
| 219 |
+
(delete-file file))
|
| 220 |
+
(cadr objectlist)))
|
| 221 |
+
(t
|
| 222 |
+
(set! new_active_list (cons objectlist new_active_list)))))
|
| 223 |
+
xw_active_list)
|
| 224 |
+
(set! xw_active_list new_active_list))
|
| 225 |
+
nil)
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
(define (xw_register_active object flist)
|
| 229 |
+
"(xw_register_active object flist)
|
| 230 |
+
Adds an object and its filenames to the active list."
|
| 231 |
+
(set! xw_active_list (cons (cons object (list flist)) xw_active_list))
|
| 232 |
+
nil)
|
| 233 |
+
|
| 234 |
+
(define (xw_make_tmp_filename object)
|
| 235 |
+
"(xw_make_tmp_filename)
|
| 236 |
+
make tmp file name which incorporates object name."
|
| 237 |
+
(format nil "%s_%s" (make_tmp_filename) object))
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
;;
|
| 241 |
+
;; Low level xwaves stuff.
|
| 242 |
+
;;
|
| 243 |
+
|
| 244 |
+
(define (xw_genspec wavfile specfile)
|
| 245 |
+
"(xw_genspec wavfile specfile)
|
| 246 |
+
Generate a spectrogram file."
|
| 247 |
+
(system (format nil "%s -dHAMMING -o8 -E0.94 -S2 -w8 %s %s\n" spectrogram_command wavfile specfile)))
|
| 248 |
+
|
| 249 |
+
(define (xw_send command)
|
| 250 |
+
"(xw_send command)
|
| 251 |
+
Send a command to xwaves."
|
| 252 |
+
(system (format nil "%s %s\n" send_xwaves_command command)))
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
;;
|
| 257 |
+
;; General Festival stuff.
|
| 258 |
+
;;
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
(define (utt.save.unit_selection_segs utt filename)
|
| 262 |
+
"(utt.save.unit_selection_segs utt filename)
|
| 263 |
+
Save unit selection segments of UTT in a FILE in xlabel format."
|
| 264 |
+
(let ((fd (fopen filename "w")))
|
| 265 |
+
(format fd "#\n")
|
| 266 |
+
(mapcar
|
| 267 |
+
(lambda (info)
|
| 268 |
+
(format fd "%2.4f 100 %s\n" (car info) (car (cdr info))))
|
| 269 |
+
(utt.features utt 'Segment '(source_end name)))
|
| 270 |
+
(fclose fd)
|
| 271 |
+
utt))
|
| 272 |
+
|
| 273 |
+
(define (utt.save.unit_selection_info utt diphfile joinfile targfile sourcefile timefile)
|
| 274 |
+
"(utt.save.unit_selection_info utt diphfile joinfile targfile sourcefile timefile)
|
| 275 |
+
Save stuff in xlabel format."
|
| 276 |
+
(let ((fdd (fopen diphfile "w"))
|
| 277 |
+
(fdj (fopen joinfile "w"))
|
| 278 |
+
(fdt (fopen targfile "w"))
|
| 279 |
+
(fds (fopen sourcefile "w"))
|
| 280 |
+
(fdx (fopen timefile "w"))
|
| 281 |
+
real_join)
|
| 282 |
+
(format fdd "#\n")
|
| 283 |
+
(format fdj "#\n")
|
| 284 |
+
(format fdt "#\n")
|
| 285 |
+
(format fds "#\n")
|
| 286 |
+
(format fdx "#\n")
|
| 287 |
+
(mapcar
|
| 288 |
+
(lambda (unit)
|
| 289 |
+
(set! real_join "")
|
| 290 |
+
(if (item.next unit)
|
| 291 |
+
(if (not (string-equal (item.feat unit 'source_utt)
|
| 292 |
+
(item.feat (item.next unit) 'source_utt)))
|
| 293 |
+
(set! real_join "*")))
|
| 294 |
+
(format fdd "%2.4f 100 %s %s\n"
|
| 295 |
+
(item.feat unit 'end)
|
| 296 |
+
(item.feat unit 'name)
|
| 297 |
+
real_join)
|
| 298 |
+
(format fdj "%2.4f 100 %s\n"
|
| 299 |
+
(item.feat unit 'end)
|
| 300 |
+
(if (item.next unit)
|
| 301 |
+
(item.feat (item.next unit) 'join_cost)
|
| 302 |
+
0))
|
| 303 |
+
(format fdt "%2.4f 100 %s\n"
|
| 304 |
+
(item.feat unit 'end)
|
| 305 |
+
(item.feat unit 'target_cost))
|
| 306 |
+
(format fds "%2.4f 100 %s\n"
|
| 307 |
+
(item.feat unit 'end)
|
| 308 |
+
(item.feat unit 'source_utt))
|
| 309 |
+
(format fdx "%2.4f 100 %s\n"
|
| 310 |
+
(item.feat unit 'end)
|
| 311 |
+
(item.feat unit 'source_end)))
|
| 312 |
+
(utt.relation.items utt 'Unit))
|
| 313 |
+
(fclose fdd)
|
| 314 |
+
(fclose fdj)
|
| 315 |
+
(fclose fdt)
|
| 316 |
+
(fclose fds)
|
| 317 |
+
(fclose fdx)
|
| 318 |
+
utt))
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/multisyn/target_cost.scm
ADDED
|
@@ -0,0 +1,410 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 2003, 2004 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Multisyn scheme target cost (Rob Clark and Korin Richmond)
|
| 35 |
+
;;;
|
| 36 |
+
;;;
|
| 37 |
+
|
| 38 |
+
(define (Default_Target_Cost targ cand)
|
| 39 |
+
"(Default_Target_Cost targ cand)
|
| 40 |
+
A Default Target Cost function."
|
| 41 |
+
(let ((cost 0))
|
| 42 |
+
(mapcar
|
| 43 |
+
(lambda (row)
|
| 44 |
+
(set! cost (+ cost (tc_eval_row row targ cand))))
|
| 45 |
+
target_matrix)
|
| 46 |
+
(set! cost (/ cost target_matrix_weight))
|
| 47 |
+
cost))
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
(define (tc_eval_row row targ cand)
|
| 51 |
+
"(tc_eval_row row targ cand)
|
| 52 |
+
Evaluate a target matrix row."
|
| 53 |
+
(let ((weight (car row))
|
| 54 |
+
(func (cadr row))
|
| 55 |
+
(result 0))
|
| 56 |
+
(set! result (* weight (eval (list func targ cand))))
|
| 57 |
+
result))
|
| 58 |
+
|
| 59 |
+
;;
|
| 60 |
+
;; Target cost Matrix
|
| 61 |
+
;; '(weight function)
|
| 62 |
+
|
| 63 |
+
(define (get_matrix_weight m)
|
| 64 |
+
(let ((w 0))
|
| 65 |
+
(mapcar
|
| 66 |
+
(lambda (x)
|
| 67 |
+
(set! w (+ w (car x))))
|
| 68 |
+
m)
|
| 69 |
+
w))
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
(set! test_matrix_max_weight 1)
|
| 73 |
+
(set! test_matrix
|
| 74 |
+
'(
|
| 75 |
+
(10 tc_stress )
|
| 76 |
+
(5 tc_syl_pos )
|
| 77 |
+
(5 tc_word_pos)
|
| 78 |
+
(6 tc_partofspeech)
|
| 79 |
+
(7 tc_phrase_pos)
|
| 80 |
+
(4 tc_left_context)
|
| 81 |
+
(3 tc_right_context)
|
| 82 |
+
(25 tc_bad_f0) ;; set to equal 1/3 of total cost (so high because interaction with join)
|
| 83 |
+
; (0 tc_segment_score) ;; was 4. turned off until utterances are built for this.
|
| 84 |
+
(10 tc_bad_duration) ;; was 6
|
| 85 |
+
))
|
| 86 |
+
|
| 87 |
+
(set! test_matrix_weight (* test_matrix_max_weight (get_matrix_weight test_matrix)))
|
| 88 |
+
|
| 89 |
+
(set! target_matrix test_matrix)
|
| 90 |
+
(set! target_matrix_weight test_matrix_weight)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
;;
|
| 95 |
+
;; tc_stress
|
| 96 |
+
;;
|
| 97 |
+
;; Compares stress on any vowel which form part of the diphone. stress
|
| 98 |
+
;; conditions must match for a zero target cost.
|
| 99 |
+
;;
|
| 100 |
+
|
| 101 |
+
(define (tc_stress targ cand)
|
| 102 |
+
"(tc_stress targ cand)
|
| 103 |
+
Target Cost stressed. 0 - stress patterns match [ compares: 0 unstressed vs. > 0 stressed ]
|
| 104 |
+
1 - stress miss-match.
|
| 105 |
+
"
|
| 106 |
+
(let ((c 0)
|
| 107 |
+
cand_stress targ_stress)
|
| 108 |
+
;(format t "my_is_vowel %l\n" (my_is_vowel targ))
|
| 109 |
+
;(format t "phone_is_silence %l\n" (phone_is_silence (item.feat targ 'name)))
|
| 110 |
+
;; For first segment
|
| 111 |
+
(if (and (not (phone_is_silence (item.feat targ 'name)))
|
| 112 |
+
(my_is_vowel targ))
|
| 113 |
+
(begin
|
| 114 |
+
(set! cand_stress (item.feat cand "R:SylStructure.parent.stress"))
|
| 115 |
+
(set! targ_stress (item.feat targ "R:SylStructure.parent.stress"))
|
| 116 |
+
(if (or (and (eq? cand_stress 0) (> targ_stress 0))
|
| 117 |
+
(and (eq? targ_stress 0) (> cand_stress 0)))
|
| 118 |
+
(set! c 1))))
|
| 119 |
+
;; For second segment
|
| 120 |
+
;(format t "n.my_is_vowel %l\n" (my_is_vowel (item.next targ)))
|
| 121 |
+
;(format t "n.phone_is_silence %l\n" (phone_is_silence (item.feat targ 'n.name)))
|
| 122 |
+
(if (and (not (phone_is_silence (item.feat targ 'n.name)))
|
| 123 |
+
(my_is_vowel (item.next targ)))
|
| 124 |
+
(begin
|
| 125 |
+
(set! cand_stress (item.feat cand "n.R:SylStructure.parent.stress"))
|
| 126 |
+
(set! targ_stress (item.feat targ "n.R:SylStructure.parent.stress"))
|
| 127 |
+
(if (or (and (eq? cand_stress 0) (> targ_stress 0))
|
| 128 |
+
(and (eq? targ_stress 0) (> cand_stress 0)))
|
| 129 |
+
(set! c 1))))
|
| 130 |
+
; (format t "tc_stress: %l\n" c)
|
| 131 |
+
c))
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
;;
|
| 135 |
+
;; tc_syl_position
|
| 136 |
+
;;
|
| 137 |
+
;; Find and compare diphone position in syllabic structure.
|
| 138 |
+
;; Values are: inter - diphone crosses syllable boundary.
|
| 139 |
+
;; initial - diphone is syllable initial.
|
| 140 |
+
;; medial - diphone is syllable medial
|
| 141 |
+
;; final - diphone is syllable final
|
| 142 |
+
;; returns 0 for a match 1 for a mismatch.
|
| 143 |
+
;;
|
| 144 |
+
(define (tc_syl_pos targ cand)
|
| 145 |
+
"(tc_syl_pos targ cand)
|
| 146 |
+
Score position in syllable."
|
| 147 |
+
(let ((targ_pos "medial")
|
| 148 |
+
(cand_pos "medial")
|
| 149 |
+
(targ_syl (get_syl targ))
|
| 150 |
+
(targ_next_syl (get_syl (item.next targ)))
|
| 151 |
+
(cand_syl (get_syl cand))
|
| 152 |
+
(cand_next_syl (get_syl (item.next cand))))
|
| 153 |
+
;; target
|
| 154 |
+
(cond
|
| 155 |
+
((not (equal? targ_syl targ_next_syl))
|
| 156 |
+
(set! targ_pos "inter"))
|
| 157 |
+
((not (equal? targ_syl (get_syl (item.prev targ))))
|
| 158 |
+
(set! targ_pos "initial"))
|
| 159 |
+
((not (equal? targ_next_syl (get_syl (item.next (item.next targ)))))
|
| 160 |
+
(set! targ_pos "final")))
|
| 161 |
+
;; candidate
|
| 162 |
+
(cond
|
| 163 |
+
((not (equal? cand_syl cand_next_syl))
|
| 164 |
+
(set! cand_pos "inter"))
|
| 165 |
+
((not (equal? cand_syl (get_syl (item.prev cand))))
|
| 166 |
+
(set! cand_pos "initial"))
|
| 167 |
+
((not (equal? cand_next_syl (get_syl (item.next (item.next cand)))))
|
| 168 |
+
(set! cand_pos "final")))
|
| 169 |
+
; (format t "targ_syl: %l cand_syl %l\n" targ_pos cand_pos)
|
| 170 |
+
(if (equal? targ_pos cand_pos) 0 1)))
|
| 171 |
+
|
| 172 |
+
;;
|
| 173 |
+
;; tc_word_position
|
| 174 |
+
;;
|
| 175 |
+
;; Find and compare diphone position in word structure
|
| 176 |
+
;; Values are: inter - diphone crosses word boundary.
|
| 177 |
+
;; initial - diphone is word initial.
|
| 178 |
+
;; medial - diphone is word medial
|
| 179 |
+
;; final - diphone is word final
|
| 180 |
+
;; returns 0 for a match 1 for a mismatch.
|
| 181 |
+
;;
|
| 182 |
+
(define (tc_word_pos targ cand)
|
| 183 |
+
"(tc_word_pos targ cand)
|
| 184 |
+
Score position in word."
|
| 185 |
+
(let ((targ_pos "medial")
|
| 186 |
+
(cand_pos "medial")
|
| 187 |
+
(targ_word (get_word targ))
|
| 188 |
+
(targ_next_word (get_word (item.next targ)))
|
| 189 |
+
(cand_word (get_word cand))
|
| 190 |
+
(cand_next_word (get_word (item.next cand))))
|
| 191 |
+
;; target
|
| 192 |
+
(cond
|
| 193 |
+
((not (equal? targ_word targ_next_word))
|
| 194 |
+
(set! targ_pos "inter"))
|
| 195 |
+
((not (equal? targ_word (get_word (item.prev targ))))
|
| 196 |
+
(set! targ_pos "initial"))
|
| 197 |
+
((not (equal? targ_next_word (get_word (item.next (item.next targ)))))
|
| 198 |
+
(set! targ_pos "final")))
|
| 199 |
+
;; candidate
|
| 200 |
+
(cond
|
| 201 |
+
((not (equal? cand_word cand_next_word))
|
| 202 |
+
(set! cand_pos "inter"))
|
| 203 |
+
((not (equal? cand_word (get_word (item.prev cand))))
|
| 204 |
+
(set! cand_pos "initial"))
|
| 205 |
+
((not (equal? cand_next_word (get_word (item.next (item.next cand)))))
|
| 206 |
+
(set! cand_pos "final")))
|
| 207 |
+
; (format t "targ_word: %l cand_word %l\n" targ_pos cand_pos)
|
| 208 |
+
(if (equal? targ_pos cand_pos) 0 1)))
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
;;
|
| 213 |
+
;; tc_phrase_position
|
| 214 |
+
;;
|
| 215 |
+
;; Position (of word) in phrase
|
| 216 |
+
;; initial/medial/final
|
| 217 |
+
;;
|
| 218 |
+
;; 0 - match, 1 - mismatch
|
| 219 |
+
;;
|
| 220 |
+
(define (tc_phrase_pos targ cand)
|
| 221 |
+
"(tc_phrase_pos targ cand)
|
| 222 |
+
Score position in phrase."
|
| 223 |
+
(let ((targ_word (get_word targ))
|
| 224 |
+
(cand_word (get_word cand)))
|
| 225 |
+
(cond
|
| 226 |
+
((and (null targ_word)
|
| 227 |
+
(null cand_word))
|
| 228 |
+
0)
|
| 229 |
+
((or (null targ_word)
|
| 230 |
+
(null cand_word))
|
| 231 |
+
1)
|
| 232 |
+
((string-equal (item.feat targ_word 'pbreak)
|
| 233 |
+
(item.feat cand_word 'pbreak))
|
| 234 |
+
0)
|
| 235 |
+
(t 1))))
|
| 236 |
+
|
| 237 |
+
;;
|
| 238 |
+
;; tc_partofspeech
|
| 239 |
+
;;
|
| 240 |
+
;;
|
| 241 |
+
;;
|
| 242 |
+
(define (tc_partofspeech targ cand)
|
| 243 |
+
"(tc_partofspeech targ cand)
|
| 244 |
+
Score part of speech."
|
| 245 |
+
(let ((targ_word (get_word targ))
|
| 246 |
+
(cand_word (get_word cand))
|
| 247 |
+
targ_pos cand_pos)
|
| 248 |
+
(if targ_word
|
| 249 |
+
(set! targ_pos (simple_pos (item.feat targ_word 'pos))))
|
| 250 |
+
(if cand_word
|
| 251 |
+
(set! cand_pos (simple_pos (item.feat cand_word 'pos))))
|
| 252 |
+
;(format t "targ_pos %l cand_pos %l\n" targ_pos cand_pos)
|
| 253 |
+
(if (equal? targ_pos cand_pos) 0 1)))
|
| 254 |
+
|
| 255 |
+
(define (score_contexts targ_context cand_context)
|
| 256 |
+
"(score_contexts targ_context cand_context)
|
| 257 |
+
If both context items are nil, then score is 0.
|
| 258 |
+
If both context items are not nil, and are the same, then
|
| 259 |
+
score is 0. Otherwise, score is 1."
|
| 260 |
+
(if (and targ_context cand_context)
|
| 261 |
+
(if (equal? (item.feat targ_context "name")
|
| 262 |
+
(item.feat cand_context "name"))
|
| 263 |
+
0
|
| 264 |
+
1)
|
| 265 |
+
(if (and (equal? targ_context nil)
|
| 266 |
+
(equal? cand_context nil))
|
| 267 |
+
0
|
| 268 |
+
1)))
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
(define (tc_left_context targ cand)
|
| 272 |
+
"(tc_left_context targ cand)
|
| 273 |
+
Score left phonetic context."
|
| 274 |
+
(let ((targ_context (item.prev targ))
|
| 275 |
+
(cand_context (item.prev cand)))
|
| 276 |
+
(score_contexts targ_context cand_context)))
|
| 277 |
+
|
| 278 |
+
;;
|
| 279 |
+
;; tc_right_context
|
| 280 |
+
;;
|
| 281 |
+
;;
|
| 282 |
+
;;
|
| 283 |
+
(define (tc_right_context targ cand)
|
| 284 |
+
"(tc_right_context targ cand)
|
| 285 |
+
Score right phonetic context."
|
| 286 |
+
(let ((targ_context (item.next (item.next targ)))
|
| 287 |
+
(cand_context (item.next (item.next cand))))
|
| 288 |
+
(score_contexts targ_context cand_context)))
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
;;
|
| 292 |
+
;; tc_segment_score
|
| 293 |
+
;;
|
| 294 |
+
;; This currently thresholds based on looking at the distributions of the scores.
|
| 295 |
+
;; A nice exp function may be better.
|
| 296 |
+
(define (tc_segment_score targ cand)
|
| 297 |
+
"tc_segment_score targ cand)
|
| 298 |
+
A bad alignment score make a bad segment."
|
| 299 |
+
(let ((score 0))
|
| 300 |
+
(if (not (phone_is_silence (item.feat cand "name")))
|
| 301 |
+
(set! score (+ score (item.feat cand 'score))))
|
| 302 |
+
(if (not (phone_is_silence (item.feat (item.next cand) "name")))
|
| 303 |
+
(set! score (+ score (item.feat (item.next cand) 'score))))
|
| 304 |
+
(cond
|
| 305 |
+
((> score -4000) ;2000 (x2) is 7.5%
|
| 306 |
+
0)
|
| 307 |
+
((> score -5000) ;2500 (x2) is 5.0%
|
| 308 |
+
0.5)
|
| 309 |
+
(t 1))))
|
| 310 |
+
|
| 311 |
+
;;
|
| 312 |
+
;; tc_bad_duration
|
| 313 |
+
;;
|
| 314 |
+
;; If the segment is marked as having a weird duration penalise it.
|
| 315 |
+
;; We allow bad_dur to be set on the target so resynthesis works
|
| 316 |
+
;; and so you could ask for really long/short segments.
|
| 317 |
+
;;
|
| 318 |
+
(define (tc_bad_duration targ cand)
|
| 319 |
+
(if (equal? (item.feat targ "bad_dur")
|
| 320 |
+
(item.feat cand "bad_dur"))
|
| 321 |
+
0
|
| 322 |
+
1))
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
;;
|
| 326 |
+
;; tc_bad_f0
|
| 327 |
+
;;
|
| 328 |
+
;; If the candidate is deemed to have an inappropriate f0, then penalise it.
|
| 329 |
+
;;
|
| 330 |
+
;; Specifically, if the targ/cand segment type is expected to be voiced, then
|
| 331 |
+
;; an f0 of zero is bad (results from poor pitch tracking). In such a case,
|
| 332 |
+
;; the join cost would then favour other units with f0 (since the euclidean
|
| 333 |
+
;; distance between two zeros is very small ;)
|
| 334 |
+
;; We want to avoid that.
|
| 335 |
+
;;
|
| 336 |
+
;; Presumeably, we also want to penalise cases where supposedly voiceless
|
| 337 |
+
;; candidates have an f0 != 0 (either a consequence of bad pitch tracking
|
| 338 |
+
;; or bad labelling) but that's not done here yet...
|
| 339 |
+
;;
|
| 340 |
+
;; (the function itself has been implemented in C for convenience, and
|
| 341 |
+
;; this stub is left here just for this note ;)
|
| 342 |
+
|
| 343 |
+
(define (tc_bad_f0 targ cand)
|
| 344 |
+
(let ((score (temp_tc_bad_f0 targ cand))
|
| 345 |
+
(name (format nil "%s_%s"
|
| 346 |
+
(item.feat targ "name")
|
| 347 |
+
(item.feat (item.next targ) "name"))))
|
| 348 |
+
(if (not (equal? score 0.0))
|
| 349 |
+
(format t "f0 score for %s is %f\n" name score))
|
| 350 |
+
score))
|
| 351 |
+
|
| 352 |
+
;;
|
| 353 |
+
;; Is a segment a vowel? ( ph_is_a_vowel doesn't seem to work)
|
| 354 |
+
;;
|
| 355 |
+
(define (my_is_vowel seg)
|
| 356 |
+
(if seg
|
| 357 |
+
(if (equal? (item.feat seg 'ph_vc) "+")
|
| 358 |
+
t
|
| 359 |
+
nil)))
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
|
| 363 |
+
;; get the syllable from sysstructure in normal utterance
|
| 364 |
+
;;
|
| 365 |
+
(define (get_syl seg)
|
| 366 |
+
(let (syl)
|
| 367 |
+
(if seg
|
| 368 |
+
(set! syl (item.relation.parent seg 'SylStructure)))
|
| 369 |
+
syl))
|
| 370 |
+
|
| 371 |
+
;; get the word from sylstructure in normal utterance
|
| 372 |
+
;;
|
| 373 |
+
(define (get_word seg)
|
| 374 |
+
(let ((syl (get_syl seg))
|
| 375 |
+
word)
|
| 376 |
+
(if syl
|
| 377 |
+
(set! word (item.parent syl)))
|
| 378 |
+
word))
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
;; simple pos
|
| 382 |
+
;;
|
| 383 |
+
(define (simple_pos pos)
|
| 384 |
+
(let (spos)
|
| 385 |
+
(cond
|
| 386 |
+
((member_string pos '(vbd vb vbn vbz vbp vbg))
|
| 387 |
+
(set! spos "v"))
|
| 388 |
+
((member_string pos '(nn nnp nns nnps fw sym ls))
|
| 389 |
+
(set! spos "n"))
|
| 390 |
+
((member_string pos '(dt gin prp cc of to cd md pos wdt wp wrb ex uh pdt))
|
| 391 |
+
(set! spos "func"))
|
| 392 |
+
((member_string pos '(jj jjr jjs 1 2 rb rp rbr rbs))
|
| 393 |
+
(set! spos "other")))
|
| 394 |
+
spos))
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
;; debugging
|
| 398 |
+
|
| 399 |
+
(define (test_target_cost utt1 utt2)
|
| 400 |
+
(let ((segs1 (utt.relation.items utt1 'Segment))
|
| 401 |
+
(segs2 (utt.relation.items utt2 'Segment))
|
| 402 |
+
(tc 0))
|
| 403 |
+
(while (and segs1 segs2)
|
| 404 |
+
(set! tc (Default_Target_Cost (car segs1) (car segs2)))
|
| 405 |
+
(format t "targ: %l cand: %l cost: %l\n" (item.name (car segs1)) (item.name (car segs2)) tc)
|
| 406 |
+
(set! segs1 (cdr segs1))
|
| 407 |
+
(set! segs2 (cdr segs2)))))
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
(provide 'target_cost)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/ogimarkup-mode.scm
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; An example tts text mode for reading OGI's CSLU toolkit mark up
|
| 35 |
+
;;;
|
| 36 |
+
;;; Note not all tokens do something in festival but all are removed
|
| 37 |
+
;;; from the actual text
|
| 38 |
+
;;;
|
| 39 |
+
|
| 40 |
+
(defvar ogimarkup_eou_tree
|
| 41 |
+
'((n.name matches "<.*")
|
| 42 |
+
((1))
|
| 43 |
+
((n.whitespace matches ".*\n.*\n\\(.\\|\n\\)*") ;; A significant break (2 nls)
|
| 44 |
+
((1))
|
| 45 |
+
((punc in ("?" ":" "!"))
|
| 46 |
+
((1))
|
| 47 |
+
((punc is ".")
|
| 48 |
+
;; This is to distinguish abbreviations vs periods
|
| 49 |
+
;; These are heuristics
|
| 50 |
+
((name matches "\\(.*\\..*\\|[A-Z][A-Za-z]?[A-Za-z]?\\|etc\\)") ;; an abbreviation
|
| 51 |
+
((n.whitespace is " ")
|
| 52 |
+
((0)) ;; if abbrev single space isn't enough for break
|
| 53 |
+
((n.name matches "[A-Z].*")
|
| 54 |
+
((1))
|
| 55 |
+
((0))))
|
| 56 |
+
((n.whitespace is " ") ;; if it doesn't look like an abbreviation
|
| 57 |
+
((n.name matches "[A-Z].*") ;; single space and non-cap is no break
|
| 58 |
+
((1))
|
| 59 |
+
((0)))
|
| 60 |
+
((1))))
|
| 61 |
+
((0)))))))
|
| 62 |
+
|
| 63 |
+
(define (ogimarkup_init_func)
|
| 64 |
+
"Called on starting ogimarkup text mode."
|
| 65 |
+
(set! ogimarkup_in_tag nil)
|
| 66 |
+
(set! ogimarkup_tagtokens "")
|
| 67 |
+
(set! ogimarkup_previous_t2w_func token_to_words)
|
| 68 |
+
(set! english_token_to_words ogimarkup_token_to_words)
|
| 69 |
+
(set! token_to_words ogimarkup_token_to_words)
|
| 70 |
+
(set! ogimarkup_previous_eou_tree eou_tree)
|
| 71 |
+
(set! eou_tree ogimarkup_eou_tree))
|
| 72 |
+
|
| 73 |
+
(define (ogimarkup_exit_func)
|
| 74 |
+
"Called on exit ogimarkup text mode."
|
| 75 |
+
(Parameter.set 'Duration_Stretch 1.0)
|
| 76 |
+
(set! token_to_words ogimarkup_previous_t2w_func)
|
| 77 |
+
(set! english_token_to_words ogimarkup_previous_t2w_func)
|
| 78 |
+
(set! eou_tree ogimarkup_previous_eou_tree))
|
| 79 |
+
|
| 80 |
+
(define (ogimarkup_token_to_words token name)
|
| 81 |
+
"(ogimarkup_token_to_words token name)
|
| 82 |
+
OGI markup specific token to word rules. Tags may have optional
|
| 83 |
+
argument e.g. <slow> or <slow 0.6> which means the tag may be over
|
| 84 |
+
a number of tokens."
|
| 85 |
+
(let (tag (arg nil) (rval nil))
|
| 86 |
+
(cond
|
| 87 |
+
((string-matches name "<.*")
|
| 88 |
+
(set! ogimarkup_tagtokens "")
|
| 89 |
+
(set! tag (string-after name "<"))
|
| 90 |
+
(if (string-matches tag ".*>$")
|
| 91 |
+
(set! tag (string-before tag ">"))
|
| 92 |
+
(if (string-matches (set! arg (item.feat token "n.name"))
|
| 93 |
+
".*>$")
|
| 94 |
+
(set! arg (string-before arg ">"))))
|
| 95 |
+
(set! ogimarkup_in_tag tag)
|
| 96 |
+
(cond
|
| 97 |
+
((string-equal tag "slow")
|
| 98 |
+
(Parameter.set 'Duration_Stretch 1.3))
|
| 99 |
+
((string-equal tag "SLOW")
|
| 100 |
+
(Parameter.set 'Duration_Stretch 2.0))
|
| 101 |
+
((string-equal tag "normal")
|
| 102 |
+
(Parameter.set 'Duration_Stretch 1.0))
|
| 103 |
+
((string-matches tag "FAST")
|
| 104 |
+
(Parameter.set 'Duration_Stretch 0.5))
|
| 105 |
+
((string-matches tag "fast")
|
| 106 |
+
(Parameter.set 'Duration_Stretch 0.8))
|
| 107 |
+
((string-matches tag"spell")
|
| 108 |
+
;; This ain't really right as we'll get an utterance break here
|
| 109 |
+
(set! rval (symbolexplode arg)))
|
| 110 |
+
((string-matches tag "phone")
|
| 111 |
+
;; This ain't really right as we'll get an utterance break here
|
| 112 |
+
(item.set_feat token "token_pos" "digits") ;; canonical phone number
|
| 113 |
+
(set! rval (ogimarkup_previous_t2w_func token arg)))
|
| 114 |
+
((string-matches tag "male")
|
| 115 |
+
(if (and (member 'OGIresLPC *modules*)
|
| 116 |
+
(symbol-bound? 'voice_aec_diphone))
|
| 117 |
+
(voice_aec_diphone)
|
| 118 |
+
(voice_kal_diphone)))
|
| 119 |
+
((string-matches tag "Male")
|
| 120 |
+
(if (and (member 'OGIresLPC *modules*)
|
| 121 |
+
(symbol-bound? 'voice_mwm_diphone))
|
| 122 |
+
(voice_mwm_diphone)
|
| 123 |
+
(voice_cmu_us_rms_cg)))
|
| 124 |
+
((string-matches tag "MALE")
|
| 125 |
+
(if (and (member 'OGIresLPC *modules*)
|
| 126 |
+
(symbol-bound? 'voice_jph_diphone))
|
| 127 |
+
(voice_jph_diphone)
|
| 128 |
+
(voice_rab_diphone)))
|
| 129 |
+
((string-matches tag "FT")
|
| 130 |
+
t) ;; do nothing until the end of this tag
|
| 131 |
+
((string-matches (downcase tag) "female")
|
| 132 |
+
;; only one female voice so map female Female FEMALE to it
|
| 133 |
+
(if (and (member 'OGIresLPC *modules*)
|
| 134 |
+
(symbol-bound? 'voice_tll_diphone))
|
| 135 |
+
(voice_tll_diphone)
|
| 136 |
+
(voice_cmu_us_slt_arctic_hts))))
|
| 137 |
+
(if (string-matches name ".*>$")
|
| 138 |
+
(set! ogimarkup_in_tag nil))
|
| 139 |
+
rval ;; mostly nil
|
| 140 |
+
)
|
| 141 |
+
((string-matches name ".*>$")
|
| 142 |
+
(set! ogimarkup_tagtokens
|
| 143 |
+
(string-append
|
| 144 |
+
ogimarkup_tagtokens
|
| 145 |
+
(ogimarkup_get_token_string token t))) ;; delete final >
|
| 146 |
+
(if (string-equal ogimarkup_in_tag "FT")
|
| 147 |
+
(ogimarkup_festival_eval ogimarkup_tagtokens))
|
| 148 |
+
(set! ogimarkup_in_tag nil) ;; end of tag
|
| 149 |
+
nil)
|
| 150 |
+
(ogimarkup_in_tag
|
| 151 |
+
(set! ogimarkup_tagtokens
|
| 152 |
+
(string-append
|
| 153 |
+
ogimarkup_tagtokens
|
| 154 |
+
(ogimarkup_get_token_string token nil)))
|
| 155 |
+
nil) ;; still in tag
|
| 156 |
+
(t ;; for all other cases
|
| 157 |
+
(ogimarkup_previous_t2w_func token name)))))
|
| 158 |
+
|
| 159 |
+
(set! tts_text_modes
|
| 160 |
+
(cons
|
| 161 |
+
(list
|
| 162 |
+
'ogimarkup ;; mode name
|
| 163 |
+
(list ;; ogimarkup mode params
|
| 164 |
+
(list 'init_func ogimarkup_init_func)
|
| 165 |
+
(list 'exit_func ogimarkup_exit_func)))
|
| 166 |
+
tts_text_modes))
|
| 167 |
+
|
| 168 |
+
(define (ogimarkup_get_token_string token delend)
|
| 169 |
+
"(ogimarkup_get_token_string TOKEN DELEND)
|
| 170 |
+
return string for token including whitespace and punctuation. If DELEND
|
| 171 |
+
is true remove > from the name."
|
| 172 |
+
(string-append
|
| 173 |
+
(item.feat token "whitespace")
|
| 174 |
+
(item.feat token "prepunctuation")
|
| 175 |
+
(if delend
|
| 176 |
+
(string-before
|
| 177 |
+
(item.feat token "name") ">")
|
| 178 |
+
(item.feat token "name"))
|
| 179 |
+
(if (string-equal "0" (item.feat token "punc"))
|
| 180 |
+
""
|
| 181 |
+
(item.feat token "punc"))))
|
| 182 |
+
|
| 183 |
+
(define (ogimarkup_festival_eval tagtokens)
|
| 184 |
+
"(ogimarkup_festival_eval TAGTOKENS
|
| 185 |
+
Take a string of the tokens within the tag and read an s-expression from
|
| 186 |
+
it and then evaluate it."
|
| 187 |
+
(let ((com "") (command nil))
|
| 188 |
+
(set! command (read-from-string tagtokens))
|
| 189 |
+
(eval command)))
|
| 190 |
+
|
| 191 |
+
(provide 'ogimarkup-mode)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/pauses.scm
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Predicting pause insertion
|
| 35 |
+
|
| 36 |
+
(define (Pauses utt)
|
| 37 |
+
"(Pauses utt)
|
| 38 |
+
Insert pauses where required."
|
| 39 |
+
(let ((rval (apply_method 'Pause_Method utt)))
|
| 40 |
+
(cond
|
| 41 |
+
(rval rval) ;; new style
|
| 42 |
+
(t
|
| 43 |
+
(Classic_Pauses utt))))
|
| 44 |
+
(Pause_optional_deleting_B_X utt))
|
| 45 |
+
|
| 46 |
+
(define (Classic_Pauses utt)
|
| 47 |
+
"(Pauses UTT)
|
| 48 |
+
Predict pause insertion."
|
| 49 |
+
(let ((words (utt.relation.items utt 'Word)) lastword tpname)
|
| 50 |
+
(if words
|
| 51 |
+
(begin
|
| 52 |
+
(insert_initial_pause utt) ;; always have a start pause
|
| 53 |
+
(set! lastword (car (last words)))
|
| 54 |
+
(mapcar
|
| 55 |
+
(lambda (w)
|
| 56 |
+
(let ((pbreak (item.feat w "pbreak"))
|
| 57 |
+
(emph (item.feat w "R:Token.parent.EMPH")))
|
| 58 |
+
(cond
|
| 59 |
+
((or (string-equal "B" pbreak)
|
| 60 |
+
(string-equal "BB" pbreak))
|
| 61 |
+
(insert_pause utt w))
|
| 62 |
+
; ((string-equal emph "1")
|
| 63 |
+
; (insert_pause utt w))
|
| 64 |
+
((equal? w lastword)
|
| 65 |
+
(insert_pause utt w)))))
|
| 66 |
+
words)
|
| 67 |
+
;; The embarrassing bit. Remove any words labelled as punc or fpunc
|
| 68 |
+
(mapcar
|
| 69 |
+
(lambda (w)
|
| 70 |
+
(let ((pos (item.feat w "pos")))
|
| 71 |
+
(if (or (string-equal "punc" pos)
|
| 72 |
+
(string-equal "fpunc" pos))
|
| 73 |
+
(let ((pbreak (item.feat w "pbreak"))
|
| 74 |
+
(wp (item.relation w 'Phrase)))
|
| 75 |
+
(if (and (string-matches pbreak "BB?")
|
| 76 |
+
(item.relation.prev w 'Word))
|
| 77 |
+
(item.set_feat
|
| 78 |
+
(item.relation.prev w 'Word) "pbreak" pbreak))
|
| 79 |
+
(item.relation.remove w 'Word)
|
| 80 |
+
;; can't refer to w as we've just deleted it
|
| 81 |
+
(item.relation.remove wp 'Phrase)))))
|
| 82 |
+
words)
|
| 83 |
+
;; 12/01/2006 V.Strom: Even more embarrasing: Delete all silences
|
| 84 |
+
;; that are followed by a silence. These silence sequences
|
| 85 |
+
;; emerge if 'punc of phrase-final words consists of more than one
|
| 86 |
+
;; character, e.g. period+quote. That in turn causes problems in
|
| 87 |
+
;; build_utts: the 2nd silence ends up with no features but its name,
|
| 88 |
+
;; because there is no corresponding 2nd silence in the phone
|
| 89 |
+
;; segmentation to align with.
|
| 90 |
+
;; This schould be fixed in the functions below, but it is easier for
|
| 91 |
+
;; me to clean up at the end:
|
| 92 |
+
(set! sil (car (car (cdr (car (PhoneSet.description '(silences)))))))
|
| 93 |
+
(set! seg (item.next(utt.relation.first utt 'Segment)))
|
| 94 |
+
(while seg
|
| 95 |
+
(if(and(equal? sil (item.name seg))
|
| 96 |
+
(equal? sil (item.name (item.prev seg))))
|
| 97 |
+
(item.delete (item.prev seg)))
|
| 98 |
+
(set! seg (item.next seg)))))
|
| 99 |
+
utt))
|
| 100 |
+
|
| 101 |
+
(define (insert_pause utt word)
|
| 102 |
+
"(insert_pause UTT WORDITEM)
|
| 103 |
+
Insert a silence segment after the last segment in WORDITEM in UTT."
|
| 104 |
+
(let ((lastseg (find_last_seg word))
|
| 105 |
+
(silence (car (car (cdr (car (PhoneSet.description '(silences))))))))
|
| 106 |
+
(if lastseg
|
| 107 |
+
(item.relation.insert
|
| 108 |
+
lastseg 'Segment (list silence) 'after))))
|
| 109 |
+
|
| 110 |
+
(define (insert_initial_pause utt)
|
| 111 |
+
"(insert_initial_pause UTT)
|
| 112 |
+
Always have an initial silence if the utterance is non-empty.
|
| 113 |
+
Insert a silence segment after the last segment in WORDITEM in UTT."
|
| 114 |
+
(let ((firstseg (car (utt.relation.items utt 'Segment)))
|
| 115 |
+
(silence (car (car (cdr (car (PhoneSet.description '(silences))))))))
|
| 116 |
+
(if firstseg
|
| 117 |
+
(item.relation.insert
|
| 118 |
+
firstseg 'Segment (list silence) 'before))))
|
| 119 |
+
|
| 120 |
+
(define (insert_final_pause utt)
|
| 121 |
+
"(insert_final_pause UTT)
|
| 122 |
+
Always have a final silence if the utterance is non-empty."
|
| 123 |
+
(let ((lastseg (utt.relation.last utt 'Segment))
|
| 124 |
+
(silence (car (car (cdr (car (PhoneSet.description '(silences))))))))
|
| 125 |
+
(set! silence (format nil "%l" silence)) ; to make the symbol a string
|
| 126 |
+
;(format t "silence is %l\n" silence)
|
| 127 |
+
;(format t "lastseg is %l\n" (item.name lastseg))
|
| 128 |
+
(if lastseg
|
| 129 |
+
(if (not(equal? (item.name lastseg) silence))
|
| 130 |
+
(begin
|
| 131 |
+
(format t "iserted final pause %s\n" silence)
|
| 132 |
+
(item.relation.insert lastseg 'Segment (list silence) 'after))))))
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
(define (find_last_seg word)
|
| 136 |
+
;;; Find the segment that is immediately at this end of this word
|
| 137 |
+
;;; If this word is punctuation it might not have any segments
|
| 138 |
+
;;; so we have to check back until we find a word with a segment in it
|
| 139 |
+
(cond
|
| 140 |
+
((null word)
|
| 141 |
+
nil) ;; there are no segs (don't think this can happen)
|
| 142 |
+
(t
|
| 143 |
+
(let ((lsyl (item.relation.daughtern word 'SylStructure)))
|
| 144 |
+
(if lsyl
|
| 145 |
+
(item.relation.daughtern lsyl 'SylStructure)
|
| 146 |
+
(find_last_seg (item.relation.prev word 'Word)))))))
|
| 147 |
+
|
| 148 |
+
(define (Unisyn_Pauses utt)
|
| 149 |
+
"(Unisyn_Pauses UTT)
|
| 150 |
+
Predict pause insertion in a Unisyn utterance structure."
|
| 151 |
+
(let ((words (utt.relation.items utt 'Word)) lastword tpname)
|
| 152 |
+
(if words
|
| 153 |
+
(begin
|
| 154 |
+
(us_insert_initial_pause utt) ;; always have a start pause
|
| 155 |
+
(set! lastword (car (last words)))
|
| 156 |
+
(mapcar
|
| 157 |
+
(lambda (w)
|
| 158 |
+
(let ((pbreak (item.feat w "pbreak"))
|
| 159 |
+
(emph (item.feat w "R:Token.parent.EMPH")))
|
| 160 |
+
(cond
|
| 161 |
+
((or (string-equal "B" pbreak)
|
| 162 |
+
(string-equal "BB" pbreak))
|
| 163 |
+
(us_insert_pause utt w))
|
| 164 |
+
; ((string-equal emph "1")
|
| 165 |
+
; (us_insert_pause utt w))
|
| 166 |
+
((equal? w lastword)
|
| 167 |
+
(us_insert_pause utt w)))))
|
| 168 |
+
words)
|
| 169 |
+
;; The embarrassing bit. Remove any words labelled as punc or fpunc
|
| 170 |
+
(mapcar
|
| 171 |
+
(lambda (w)
|
| 172 |
+
(let ((pos (item.feat w "pos")))
|
| 173 |
+
(if (or (string-equal "punc" pos)
|
| 174 |
+
(string-equal "fpunc" pos))
|
| 175 |
+
(let ((pbreak (item.feat w "pbreak"))
|
| 176 |
+
(wp (item.relation w 'Phrase)))
|
| 177 |
+
(if (and (string-matches pbreak "BB?")
|
| 178 |
+
(item.relation.prev w 'Word))
|
| 179 |
+
(item.set_feat
|
| 180 |
+
(item.relation.prev w 'Word) "pbreak" pbreak))
|
| 181 |
+
(item.relation.remove w 'Word)
|
| 182 |
+
;; can't refer to w as we've just deleted it
|
| 183 |
+
(item.relation.remove wp 'Phrase)))))
|
| 184 |
+
words)))
|
| 185 |
+
utt))
|
| 186 |
+
|
| 187 |
+
(define (us_insert_pause utt word)
|
| 188 |
+
"(us_insert_pause UTT WORDITEM)
|
| 189 |
+
Insert a silence segment after the last segment in WORDITEM in UTT."
|
| 190 |
+
(let ((lastseg (us_find_last_seg word))
|
| 191 |
+
(silence "pau"))
|
| 192 |
+
(if lastseg
|
| 193 |
+
(item.relation.insert
|
| 194 |
+
lastseg 'Segment (list silence) 'after))))
|
| 195 |
+
|
| 196 |
+
(define (us_insert_initial_pause utt)
|
| 197 |
+
"(us_insert_initial_pause UTT)
|
| 198 |
+
Always have an initial silence if the utterance is non-empty.
|
| 199 |
+
Insert a silence segment after the last segment in WORDITEM in UTT."
|
| 200 |
+
(let ((firstseg (utt.relation.first utt 'Segment))
|
| 201 |
+
(silence "pau"))
|
| 202 |
+
(if firstseg
|
| 203 |
+
(item.relation.insert
|
| 204 |
+
firstseg 'Segment (list silence) 'before))))
|
| 205 |
+
|
| 206 |
+
(define (us_find_last_seg word)
|
| 207 |
+
;;; Find the segment that is immediately at this end of this word
|
| 208 |
+
;;; If this word is punctuation it might not have any segments
|
| 209 |
+
;;; so we have to check back until we find a word with a segment in it
|
| 210 |
+
(cond
|
| 211 |
+
((null word)
|
| 212 |
+
nil) ;; there are no segs (don't think this can happen)
|
| 213 |
+
(t
|
| 214 |
+
(if (item.daughtern_to (item.relation word 'WordStructure) 'Syllable)
|
| 215 |
+
(item.daughtern_to
|
| 216 |
+
(item.relation
|
| 217 |
+
(item.daughtern_to (item.relation word 'WordStructure) 'Syllable)
|
| 218 |
+
'SylStructure)
|
| 219 |
+
'Segment)
|
| 220 |
+
(us_find_last_seg (item.relation.prev word 'Word))))))
|
| 221 |
+
|
| 222 |
+
(define (Pause_optional_deleting_B_X utt)
|
| 223 |
+
"(Pause_optional_deleting_B_X utt)
|
| 224 |
+
|
| 225 |
+
Delete all phone symbols starting with 'B_' from the segemt relation
|
| 226 |
+
(a B_150 e.g. is a 150ms pause) if symbol 'Pause_delete_B_X is defined.
|
| 227 |
+
"
|
| 228 |
+
; The B_X never occur in the phone segmentation but are predicted by
|
| 229 |
+
; some pause methods, in particular the default I used to produce the
|
| 230 |
+
; .utt files for the 2009 test sentences for the Blizzard challange.
|
| 231 |
+
; Some participants complained about them and I had to fix it quickly.
|
| 232 |
+
(if (symbol-bound? 'Pause_delete_B_X)
|
| 233 |
+
(let(seg )
|
| 234 |
+
(set! seg (item.next(utt.relation.first utt 'Segment)))
|
| 235 |
+
(while seg
|
| 236 |
+
(set! next_seg (item.next seg))
|
| 237 |
+
;(format t "segment %l\n" (item.name seg))
|
| 238 |
+
(if(string-matches (item.name seg) "B_[0-9]*")
|
| 239 |
+
(item.delete seg))
|
| 240 |
+
(set! seg next_seg)))))
|
| 241 |
+
|
| 242 |
+
(provide 'pauses)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/phoneset.scm
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1999 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;; Author: Alan W Black
|
| 34 |
+
;;; Date: April 1999
|
| 35 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 36 |
+
;;;
|
| 37 |
+
;;; Support code for phone set definitions
|
| 38 |
+
;;;
|
| 39 |
+
|
| 40 |
+
(defmac (defPhoneSet form)
|
| 41 |
+
(list 'defPhoneSet_real
|
| 42 |
+
(list 'quote (cadr form))
|
| 43 |
+
(list 'quote (car (cddr form)))
|
| 44 |
+
(list 'quote (cadr (cddr form)))))
|
| 45 |
+
|
| 46 |
+
(define (defPhoneSet_real name featdefs phones)
|
| 47 |
+
"(defPhoneSet NAME FEATTYPES PHONES)
|
| 48 |
+
Define a phone set with given name, feature types and
|
| 49 |
+
list of phones. This also selects name as the current phoneset."
|
| 50 |
+
(let (info)
|
| 51 |
+
(if (not (eq? 'Features (car featdefs)))
|
| 52 |
+
(begin
|
| 53 |
+
;; Old format that has the same number of phone features for
|
| 54 |
+
;; all phones
|
| 55 |
+
(set! info
|
| 56 |
+
(mapcar
|
| 57 |
+
(lambda (ph)
|
| 58 |
+
(let ((fvs
|
| 59 |
+
(mapcar
|
| 60 |
+
list
|
| 61 |
+
(mapcar car featdefs)
|
| 62 |
+
(cdr ph))))
|
| 63 |
+
(ps_check_fvals
|
| 64 |
+
(cons (car ph) (cons (list 'type t) fvs))
|
| 65 |
+
(cons t fvs))
|
| 66 |
+
(list (car ph) fvs)))
|
| 67 |
+
phones)))
|
| 68 |
+
;; else
|
| 69 |
+
;; New format where types are specified so phones may have
|
| 70 |
+
;; different features
|
| 71 |
+
(set! info
|
| 72 |
+
(mapcar
|
| 73 |
+
(lambda (ph)
|
| 74 |
+
(let ((fvs
|
| 75 |
+
(cons
|
| 76 |
+
(list 'type (cadr ph))
|
| 77 |
+
(mapcar
|
| 78 |
+
list
|
| 79 |
+
(mapcar car (cdr (assoc (cadr ph) (cdr featdefs))))
|
| 80 |
+
(cddr ph)))))
|
| 81 |
+
(ps_check_fvals
|
| 82 |
+
(cons (car ph) fvs)
|
| 83 |
+
(assoc (cadr ph) (cdr featdefs)))
|
| 84 |
+
(list (car ph) fvs)))
|
| 85 |
+
(cdr phones))))
|
| 86 |
+
(Param.set
|
| 87 |
+
(string-append "phonesets." name)
|
| 88 |
+
info)
|
| 89 |
+
(PhoneSet.select name)
|
| 90 |
+
(list name info)))
|
| 91 |
+
|
| 92 |
+
(define (ps_check_fvals fvs featdefs)
|
| 93 |
+
"(ps_check_fvals fvs featdefs)
|
| 94 |
+
Check that feature values in a phone definition are in the defined
|
| 95 |
+
set of possibles."
|
| 96 |
+
(mapcar
|
| 97 |
+
(lambda (fp)
|
| 98 |
+
(let ((def (cdr (assoc (car fp) (cdr featdefs)))))
|
| 99 |
+
(cond
|
| 100 |
+
((not def)
|
| 101 |
+
(error "Phoneset definition: phone has no defined type" fvs))
|
| 102 |
+
((not (member_string (car (cdr fp)) def))
|
| 103 |
+
(error
|
| 104 |
+
(format nil "Phoneset definition: phone feature %l is undefined" fp) fvs)))))
|
| 105 |
+
(cdr (cdr fvs))))
|
| 106 |
+
|
| 107 |
+
(define (PhoneSet.select name)
|
| 108 |
+
"(PhoneSet.select name)
|
| 109 |
+
Select named phonset as current."
|
| 110 |
+
(if (feats.present Param (string-append "phonesets." name))
|
| 111 |
+
(Param.set "phoneset" (Param.get (string-append "phonesets." name)))
|
| 112 |
+
(error "no phoneset defined: " name)))
|
| 113 |
+
|
| 114 |
+
(define (PhoneSet.description name)
|
| 115 |
+
"(PhoneSet.description)
|
| 116 |
+
Return (lisp) representation of current phoneset."
|
| 117 |
+
(feats.tolisp (Param.get "phoneset")))
|
| 118 |
+
|
| 119 |
+
(define (PhoneSet.list)
|
| 120 |
+
"(PhoneSet.list)
|
| 121 |
+
List of the names of the currently defined phonesets."
|
| 122 |
+
;; This isn't a particularly efficient way to get the answer
|
| 123 |
+
(mapcar car (feats.tolisp (Param.get "phonesets"))))
|
| 124 |
+
|
| 125 |
+
(define (PhoneSet.silences sils)
|
| 126 |
+
"(PhoneSet.silences SILLIST)
|
| 127 |
+
Define the silence phones for the currently selected phoneset."
|
| 128 |
+
(Param.set "phoneset.silences" sils))
|
| 129 |
+
|
| 130 |
+
(provide 'phoneset)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/phrase.scm
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Phrase boundary prediction.
|
| 35 |
+
;;;
|
| 36 |
+
;;; Two methods supported, if POS is enabled we use ngrams for that
|
| 37 |
+
;;; otherwise we use a CART tree
|
| 38 |
+
;;;
|
| 39 |
+
;;; Models trained from the IBM/Lancaster Spoken English Corpus and
|
| 40 |
+
;;; Boston University's FM Radio Corpus.
|
| 41 |
+
|
| 42 |
+
;;;
|
| 43 |
+
;;; Here's a very simple CART tree for predicting phrase breaks
|
| 44 |
+
;;; based on punctuation only
|
| 45 |
+
;;;
|
| 46 |
+
(set! simple_phrase_cart_tree
|
| 47 |
+
'
|
| 48 |
+
((lisp_token_end_punc in ("?" "." ":"))
|
| 49 |
+
((BB))
|
| 50 |
+
((lisp_token_end_punc in ("'" "\"" "," ";"))
|
| 51 |
+
((B))
|
| 52 |
+
((n.name is 0) ;; end of utterance
|
| 53 |
+
((BB))
|
| 54 |
+
((NB))))))
|
| 55 |
+
|
| 56 |
+
(define (token_end_punc word)
|
| 57 |
+
"(token_end_punc UTT WORD)
|
| 58 |
+
If punctuation at end of related Token and if WORD is last word
|
| 59 |
+
in Token return punc, otherwise 0."
|
| 60 |
+
(if (item.relation.next word "Token")
|
| 61 |
+
"0"
|
| 62 |
+
(item.feat word "R:Token.parent.punc")))
|
| 63 |
+
|
| 64 |
+
;;; This is a simple CART tree used after boundaries are predicted
|
| 65 |
+
;;; by the probabilistic method to get two levels of break
|
| 66 |
+
(set! english_phrase_type_tree
|
| 67 |
+
'((pbreak is NB)
|
| 68 |
+
((num_break is 1)
|
| 69 |
+
((mB))
|
| 70 |
+
((R:Token.parent.EMPH is 1)
|
| 71 |
+
((NB))
|
| 72 |
+
((n.R:Token.parent.EMPH is 1)
|
| 73 |
+
((NB))
|
| 74 |
+
((NB)))))
|
| 75 |
+
((pbreak is BB)
|
| 76 |
+
((BB))
|
| 77 |
+
((pbreak is mB)
|
| 78 |
+
((mB))
|
| 79 |
+
((name in ("." "!" "?"));; only (potentially) change Bs to BBs
|
| 80 |
+
((BB))
|
| 81 |
+
((B)))))))
|
| 82 |
+
|
| 83 |
+
(set! f2b_phrase_cart_tree
|
| 84 |
+
'
|
| 85 |
+
((gpos is punc)
|
| 86 |
+
(((1 0.00238095) (3 0) (4 0.997619) B))
|
| 87 |
+
(((4 0.00238095) (3 0) (1 0.997619) NB))))
|
| 88 |
+
|
| 89 |
+
;;; For more detailed prediction of phrase breaks we use POS and
|
| 90 |
+
;;; probability distribution of breaks
|
| 91 |
+
;;; These models were trained using data from the Lancaster/IBM
|
| 92 |
+
;;; Spoken English Corpus
|
| 93 |
+
|
| 94 |
+
(require 'pos) ;; for part of speech map
|
| 95 |
+
|
| 96 |
+
(defvar pbreak_ngram_dir libdir
|
| 97 |
+
"pbreak_ngram_dir
|
| 98 |
+
The directory containing the ngram models for predicting phrase
|
| 99 |
+
breaks. By default this is the standard library directory.")
|
| 100 |
+
|
| 101 |
+
(defvar english_phr_break_params
|
| 102 |
+
(list
|
| 103 |
+
;; The name and filename off the ngram with the a priori ngram model
|
| 104 |
+
;; for predicting phrase breaks in the Phrasify module. This model should
|
| 105 |
+
;; predict probability distributions for B and NB given some context of
|
| 106 |
+
;; part of speech tags.
|
| 107 |
+
(list 'pos_ngram_name 'english_break_pos_ngram)
|
| 108 |
+
(list 'pos_ngram_filename
|
| 109 |
+
(path-append pbreak_ngram_dir "sec.ts20.quad.ngrambin"))
|
| 110 |
+
;; The name and filename of the ngram containing the a posteriori ngram
|
| 111 |
+
;; for predicting phrase breaks in the Phrasify module. This module should
|
| 112 |
+
;; predict probability distributions for B and NB given previous B and
|
| 113 |
+
;; NBs.
|
| 114 |
+
(list 'break_ngram_name 'english_break_ngram)
|
| 115 |
+
(list 'break_ngram_filename
|
| 116 |
+
(path-append pbreak_ngram_dir "sec.B.hept.ngrambin"))
|
| 117 |
+
;; A weighting factor for breaks in the break/non-break ngram.
|
| 118 |
+
(list 'gram_scale_s 0.05)
|
| 119 |
+
;; When Phrase_Method is prob_models, this tree, if set is used to
|
| 120 |
+
;; potentially predict phrase type. At least some prob_models only
|
| 121 |
+
;; predict B or NB, this tree may be used to change some Bs into
|
| 122 |
+
;; BBs. If it is nil, the pbreak value predicted by prob_models
|
| 123 |
+
;; remains the same.
|
| 124 |
+
(list 'phrase_type_tree english_phrase_type_tree)
|
| 125 |
+
;; A list of tags used in identifying breaks. Typically B and NB (and
|
| 126 |
+
;; BB). This should be the alphabet of the ngram identified in
|
| 127 |
+
;; break_ngram_name
|
| 128 |
+
(list 'break_tags '(B NB))
|
| 129 |
+
(list 'pos_map english_pos_map_wp39_to_wp20)
|
| 130 |
+
)
|
| 131 |
+
"english_phr_break_params
|
| 132 |
+
Parameters for English phrase break statistical model.")
|
| 133 |
+
|
| 134 |
+
(defvar phr_break_params nil
|
| 135 |
+
"phr_break_params
|
| 136 |
+
Parameters for phrase break statistical model. This is typcal set by
|
| 137 |
+
a voice selection function to the parameters for a particular model.")
|
| 138 |
+
|
| 139 |
+
;;;
|
| 140 |
+
;;; Declaration of some features
|
| 141 |
+
;;;
|
| 142 |
+
|
| 143 |
+
(def_feature_docstring
|
| 144 |
+
'Word.pbreak
|
| 145 |
+
"Word.pbreak
|
| 146 |
+
Result from statistical phrasing module, may be B or NB denoting
|
| 147 |
+
phrase break or non-phrase break after the word.")
|
| 148 |
+
|
| 149 |
+
(def_feature_docstring
|
| 150 |
+
'Word.pbreak_score
|
| 151 |
+
"Word.pbreak_score
|
| 152 |
+
Log likelihood score from statistical phrasing module, for pbreak
|
| 153 |
+
value.")
|
| 154 |
+
|
| 155 |
+
(def_feature_docstring
|
| 156 |
+
'Word.blevel
|
| 157 |
+
"Word.blevel
|
| 158 |
+
A crude translation of phrase break into ToBI like phrase level.
|
| 159 |
+
Values may be 0,1,2,3,4.")
|
| 160 |
+
|
| 161 |
+
(define (Phrasify utt)
|
| 162 |
+
"(Phrasify utt)
|
| 163 |
+
Construct phrasify over Words module."
|
| 164 |
+
(let ((rval (apply_method 'Phrasify_Method utt)))
|
| 165 |
+
(cond
|
| 166 |
+
(rval rval) ;; new style
|
| 167 |
+
(t
|
| 168 |
+
(Classic_Phrasify utt)))))
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
(provide 'phrase)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/pos.scm
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; A part of speech tagger
|
| 35 |
+
;;;
|
| 36 |
+
|
| 37 |
+
(set! english_guess_pos
|
| 38 |
+
'((in of for in on that with by at from as if that against about
|
| 39 |
+
before because if under after over into while without
|
| 40 |
+
through new between among until per up down)
|
| 41 |
+
(to to)
|
| 42 |
+
(det the a an no some this that each another those every all any
|
| 43 |
+
these both neither no many)
|
| 44 |
+
(md will may would can could should must ought might)
|
| 45 |
+
(cc and but or plus yet nor)
|
| 46 |
+
(wp who what where how when)
|
| 47 |
+
(pps her his their its our their its mine)
|
| 48 |
+
(aux is am are was were has have had be)
|
| 49 |
+
(punc "." "," ":" ";" "\"" "'" "(" "?" ")" "!" "[" "]" "{" "}")
|
| 50 |
+
))
|
| 51 |
+
|
| 52 |
+
(defvar guess_pos english_guess_pos
|
| 53 |
+
"guess_pos
|
| 54 |
+
An assoc-list of simple part of speech tag to list of words in that
|
| 55 |
+
class. This basically only contains closed class words all other
|
| 56 |
+
words may be assumed to be content words. This was built from information
|
| 57 |
+
in the f2b database and is used by the ffeature gpos.")
|
| 58 |
+
|
| 59 |
+
;;; A more elaborate part of speech tagger using ngrams works but
|
| 60 |
+
;;; at present requires a large list of a priori probabilities
|
| 61 |
+
;;; to work. If that file exists on your system we'll use it otherwise
|
| 62 |
+
;;; POS is guessed by the lexicon
|
| 63 |
+
|
| 64 |
+
;;; These models were build from the Penn TreeBank, WSJ corpus
|
| 65 |
+
|
| 66 |
+
(defvar pos_model_dir lexdir
|
| 67 |
+
"pos_model_dir
|
| 68 |
+
The directory contains the various models for the POS module. By
|
| 69 |
+
default this is the same directory as lexdir. The directory should
|
| 70 |
+
contain two models: a part of speech lexicon with reverse log probabilities
|
| 71 |
+
and an ngram model for the same part of speech tag set.")
|
| 72 |
+
|
| 73 |
+
(defvar pos_p_start_tag "punc"
|
| 74 |
+
"pos_p_start_tag
|
| 75 |
+
This variable's value is the tag most likely to appear before
|
| 76 |
+
the start of a sentence. It is used when looking for pos context
|
| 77 |
+
before an utterance. Typically it should be some type of punctuation
|
| 78 |
+
tag.")
|
| 79 |
+
|
| 80 |
+
(defvar pos_pp_start_tag "n"
|
| 81 |
+
"pos_pp_start_tag
|
| 82 |
+
This variable's value is the tag most likely to appear before
|
| 83 |
+
pos_p_start_tag and any position preceding that. It is typically
|
| 84 |
+
some type of noun tag. This is used to provide pos context for
|
| 85 |
+
early words in an utterance.")
|
| 86 |
+
|
| 87 |
+
(defvar pos_supported nil
|
| 88 |
+
"pos_supported
|
| 89 |
+
If set to non-nil use part of speech prediction, if nil just get
|
| 90 |
+
pos information from the lexicon.")
|
| 91 |
+
|
| 92 |
+
(defvar pos_ngram_name nil
|
| 93 |
+
"pos_ngram_name
|
| 94 |
+
The name of a loaded ngram containing the a posteriori ngram model for
|
| 95 |
+
predicting part of speech. The a priori model is held as a
|
| 96 |
+
lexicon call poslex.")
|
| 97 |
+
|
| 98 |
+
(defvar pos_map nil
|
| 99 |
+
"pos_map
|
| 100 |
+
If set this should be a reverse assoc-list mapping on part of speech
|
| 101 |
+
tag set to another. It is used after using the defined POS models to
|
| 102 |
+
map the pos feature on each word to a new tagset.")
|
| 103 |
+
|
| 104 |
+
;;;
|
| 105 |
+
;;; All the names here don't really allow multiple versions
|
| 106 |
+
;;; they should be prefixed with english_
|
| 107 |
+
;;;
|
| 108 |
+
|
| 109 |
+
(if (probe_file (path-append pos_model_dir "wsj.wp39.poslexR"))
|
| 110 |
+
(begin
|
| 111 |
+
(lex.create "english_poslex")
|
| 112 |
+
(lex.set.compile.file
|
| 113 |
+
(path-append pos_model_dir "wsj.wp39.poslexR"))
|
| 114 |
+
(lex.set.phoneset "mrpa")
|
| 115 |
+
(lex.set.lts.method nil)
|
| 116 |
+
(set! pos_lex_name "english_poslex")
|
| 117 |
+
(set! pos_p_start_tag "punc")
|
| 118 |
+
(set! pos_pp_start_tag "nn")
|
| 119 |
+
;; wp39
|
| 120 |
+
(lex.add.entry '("_OOV_" ((nnp -2.9144) (jj -2.7357) (nn -3.5787)
|
| 121 |
+
(nns -3.4933) (vbn -3.2486) (vbg -2.9419)
|
| 122 |
+
(vb -3.5471) (vbd -3.7896) (vbz -3.7820)
|
| 123 |
+
(rb -4.1940) (vbp -3.2755) (nnps -2.1605))
|
| 124 |
+
()))
|
| 125 |
+
(lex.add.entry '("_number_"
|
| 126 |
+
((cd -0.35202) (jj -4.1083) (nns -6.4488) (nnp -7.3595))
|
| 127 |
+
() ))
|
| 128 |
+
(lex.add.entry '("," ((punc -0.88488)) () ))
|
| 129 |
+
(lex.add.entry '("." ((punc -1.1104)) () ))
|
| 130 |
+
(lex.add.entry '(":" ((punc -4.4236)) () ))
|
| 131 |
+
(lex.add.entry '("``" ((punc -2.7867)) () ))
|
| 132 |
+
(lex.add.entry '("`" ((punc -2.7867)) () ))
|
| 133 |
+
(lex.add.entry '("'" ((punc -2.7867)) () ))
|
| 134 |
+
(lex.add.entry '("\"" ((punc -2.7867)) () ))
|
| 135 |
+
(lex.add.entry '("[" ((punc -2.7867)) () ))
|
| 136 |
+
(lex.add.entry '("]" ((punc -2.7867)) () ))
|
| 137 |
+
(lex.add.entry '("{" ((punc -2.7867)) () ))
|
| 138 |
+
(lex.add.entry '("}" ((punc -2.7867)) () ))
|
| 139 |
+
;; wp17
|
| 140 |
+
;; (lex.add.entry '("_OOV_" ((n -3.4109) (j -2.7892) (v -3.7426)) ()))
|
| 141 |
+
; (lex.add.entry '("_OOV_" ((n -1.968) (j -2.351) (v -2.287)) ()))
|
| 142 |
+
; (lex.add.entry '("_number_" ((j -0.35202)) ()))
|
| 143 |
+
; (lex.add.entry '("," ((punc -0.88359)) () ))
|
| 144 |
+
; (lex.add.entry '("." ((punc -1.1101)) () ))
|
| 145 |
+
; (lex.add.entry '(":" ((punc -4.4236)) () ))
|
| 146 |
+
; (lex.add.entry '("``" ((punc -2.7867)) () ))
|
| 147 |
+
; (lex.add.entry '("`" ((punc -2.7867)) () ))
|
| 148 |
+
; (lex.add.entry '("'" ((punc -2.7867)) () ))
|
| 149 |
+
; (lex.add.entry '("\"" ((punc -2.7867)) () ))
|
| 150 |
+
;; wp22
|
| 151 |
+
; (lex.add.entry '("_OOV_" ((n -3.4109) (j -2.7892) (v -3.7426)) ()))
|
| 152 |
+
; (lex.add.entry '("_number_" ((cd -0.35202) (j -4.1908) (n -7.3890)) ()))
|
| 153 |
+
; (lex.add.entry '("," ((punc -0.88359)) () ))
|
| 154 |
+
; (lex.add.entry '("." ((punc -1.1101)) () ))
|
| 155 |
+
; (lex.add.entry '(":" ((punc -4.4236)) () ))
|
| 156 |
+
; (lex.add.entry '("``" ((punc -2.7867)) () ))
|
| 157 |
+
;; wp18
|
| 158 |
+
; (lex.add.entry '("_OOV_" ((n -3.4109) (j -2.7892) (v -3.7426)) ()))
|
| 159 |
+
; (lex.add.entry '("_number_" ((j -0.35202)) ()))
|
| 160 |
+
; (lex.add.entry '("`" ((punc -6.539) ) () ))
|
| 161 |
+
; (lex.add.entry '("``" ((punc -2.399) ) () ))
|
| 162 |
+
; (lex.add.entry '("," ((punc -0.480) ) () ))
|
| 163 |
+
; (lex.add.entry '("." ((fpunc -0.012) ) () ))
|
| 164 |
+
; (lex.add.entry '(":" ((punc -4.100) ) () ))
|
| 165 |
+
|
| 166 |
+
(ngram.load 'english_pos_ngram
|
| 167 |
+
(path-append pos_model_dir "wsj.wp39.tri.ngrambin"))
|
| 168 |
+
; (ngram.load 'english_pos_ngram
|
| 169 |
+
; (path-append pos_model_dir "wsj.wp45.tri.ngram"))
|
| 170 |
+
(set! pos_supported t)
|
| 171 |
+
)
|
| 172 |
+
(set! pos_supported nil))
|
| 173 |
+
|
| 174 |
+
(setq english_pos_map_wp39_to_wp20
|
| 175 |
+
'(
|
| 176 |
+
(( vbd vb vbn vbz vbp vbg ) v)
|
| 177 |
+
(( nn nnp nns nnps fw sym ls ) n)
|
| 178 |
+
(( dt ) dt)
|
| 179 |
+
(( punc fpunc ) punc)
|
| 180 |
+
(( in ) in)
|
| 181 |
+
(( jj jjr jjs 1 2 ) j)
|
| 182 |
+
(( prp ) prp)
|
| 183 |
+
(( rb rp rbr rbs ) r)
|
| 184 |
+
(( cc ) cc)
|
| 185 |
+
(( of ) of)
|
| 186 |
+
(( to ) to)
|
| 187 |
+
(( cd ) cd)
|
| 188 |
+
(( md ) md)
|
| 189 |
+
(( pos ) pos)
|
| 190 |
+
(( wdt ) wdt)
|
| 191 |
+
(( wp ) wp)
|
| 192 |
+
(( wrb ) wrb)
|
| 193 |
+
(( ex ) ex)
|
| 194 |
+
(( uh ) uh)
|
| 195 |
+
(( pdt ) pdt)
|
| 196 |
+
))
|
| 197 |
+
|
| 198 |
+
(defvar pos_map nil
|
| 199 |
+
"pos_map
|
| 200 |
+
A reverse assoc list of predicted pos tags to some other tag set. Note
|
| 201 |
+
using this changes the pos tag loosing the actual predicted value. Rather
|
| 202 |
+
than map here you may find it more appropriate to map tags sets locally
|
| 203 |
+
in the modules that use them (e.g. phrasing and lexicons).")
|
| 204 |
+
|
| 205 |
+
;;(setq pos_map_remap
|
| 206 |
+
;; '(
|
| 207 |
+
;; (( fpunc ) punc)
|
| 208 |
+
;; (( of ) in)))
|
| 209 |
+
|
| 210 |
+
(def_feature_docstring 'Word.pos
|
| 211 |
+
"Word.pos
|
| 212 |
+
Part of speech tag value returned by the POS tagger module.")
|
| 213 |
+
|
| 214 |
+
(def_feature_docstring 'Word.pos_score
|
| 215 |
+
"Word.pos_score
|
| 216 |
+
Part of speech tag log likelihood from Viterbi search.")
|
| 217 |
+
|
| 218 |
+
(define (POS utt)
|
| 219 |
+
"(POS utt)
|
| 220 |
+
Apply part of speech tagging (and possible parsing too) to Word
|
| 221 |
+
relation."
|
| 222 |
+
(let ((rval (apply_method 'POS_Method utt)))
|
| 223 |
+
(cond
|
| 224 |
+
(rval rval) ;; new style
|
| 225 |
+
(t
|
| 226 |
+
(Classic_POS utt)))))
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
(provide 'pos)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/postlex.scm
ADDED
|
@@ -0,0 +1,587 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Postlexical rules
|
| 35 |
+
;;;
|
| 36 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 37 |
+
;; Modifed for CSTR HTS Voice Library ;;
|
| 38 |
+
;; Author : Junichi Yamagishi (jyamagis@inf.ed.ac.uk) ;;
|
| 39 |
+
;; Date : Sept 2008 ;;
|
| 40 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
(define (PostLex utt)
|
| 44 |
+
"(PostLex utt)
|
| 45 |
+
Apply post lexical rules to segment stream. These may be almost
|
| 46 |
+
arbitrary rules as specified by the particular voice, through the
|
| 47 |
+
postlex_hooks variable. A number of standard post lexical rule
|
| 48 |
+
sets are provided including reduction, posessives etc. These
|
| 49 |
+
rules are also used to mark standard segments with their cluster
|
| 50 |
+
information used in creating diphone names."
|
| 51 |
+
(let ((rval (apply_method 'PostLex_Method utt)))
|
| 52 |
+
(cond
|
| 53 |
+
(rval rval) ;; new style
|
| 54 |
+
(t ;; should only really need this one
|
| 55 |
+
(apply_hooks postlex_rules_hooks utt)))
|
| 56 |
+
utt
|
| 57 |
+
))
|
| 58 |
+
|
| 59 |
+
(define (Classic_PostLex utt)
|
| 60 |
+
"(Classic_PostLex utt)
|
| 61 |
+
Apply post lexical rules (both builtin and those specified in
|
| 62 |
+
postlex_rules_hooks)."
|
| 63 |
+
(Builtin_PostLex utt) ;; haven't translated all the rules yet
|
| 64 |
+
(apply_hooks postlex_rules_hooks utt)
|
| 65 |
+
utt
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
(defvar postlex_rules_hooks nil
|
| 69 |
+
"postlex_rules_hooks
|
| 70 |
+
A function or list of functions which encode post lexical rules.
|
| 71 |
+
This will be voice specific, though some rules will be shared across
|
| 72 |
+
languages.")
|
| 73 |
+
|
| 74 |
+
;;; Mapping of full vowels to reduced vowels, this should be part
|
| 75 |
+
;;; of the phoneset definitions
|
| 76 |
+
(defvar postlex_vowel_reduce_table
|
| 77 |
+
'((mrpa
|
| 78 |
+
((uh @) (i @) (a @) (e @) (u @) (o @) (oo @)))
|
| 79 |
+
(radio
|
| 80 |
+
((ah ax el en em)
|
| 81 |
+
(ih ax)
|
| 82 |
+
; (er axr ax)
|
| 83 |
+
; (iy ih)
|
| 84 |
+
; (ey ax)
|
| 85 |
+
(aa ax)
|
| 86 |
+
(ae ax)
|
| 87 |
+
(eh ax))))
|
| 88 |
+
"postlex_vowel_reduce_table
|
| 89 |
+
Mapping of vowels to their reduced form. This in an assoc list of
|
| 90 |
+
phoneset name to an assoc list of full vowel to reduced form.")
|
| 91 |
+
|
| 92 |
+
(defvar postlex_vowel_reduce_cart_tree nil
|
| 93 |
+
"postlex_vowel_reduce_cart_tree
|
| 94 |
+
CART tree for vowel reduction.")
|
| 95 |
+
|
| 96 |
+
(defvar postlex_vowel_reduce_cart_tree_hand
|
| 97 |
+
'((stress is 0)
|
| 98 |
+
((p.syl_break < 2)
|
| 99 |
+
((syl_break < 2)
|
| 100 |
+
((1))
|
| 101 |
+
((0)))
|
| 102 |
+
((0)))
|
| 103 |
+
((0)))
|
| 104 |
+
"postlex_vowel_reduce_cart_tree_hand
|
| 105 |
+
A CART tree for vowel reduction. This is hand-written.")
|
| 106 |
+
|
| 107 |
+
(defvar postlex_vowel_reduce_cart_data
|
| 108 |
+
'
|
| 109 |
+
((R:SylStructure.parent.gpos is cc)
|
| 110 |
+
(((0 0.993548) (1 0.00645161) 0))
|
| 111 |
+
((p.R:SylStructure.parent.gpos is md)
|
| 112 |
+
(((0 0.903226) (1 0.0967742) 0))
|
| 113 |
+
((p.R:SylStructure.parent.gpos is det)
|
| 114 |
+
((n.R:SylStructure.parent.gpos is content)
|
| 115 |
+
((last_accent < 2.5)
|
| 116 |
+
((next_accent < 2.5)
|
| 117 |
+
((next_accent < 1.2)
|
| 118 |
+
((n.syl_break is 4)
|
| 119 |
+
(((0 0.967213) (1 0.0327869) 0))
|
| 120 |
+
((syl_break is 4)
|
| 121 |
+
(((0 0.952381) (1 0.047619) 0))
|
| 122 |
+
((n.syl_break is 4)
|
| 123 |
+
(((0 0.953488) (1 0.0465116) 0))
|
| 124 |
+
((position_type is single)
|
| 125 |
+
(((0 0.947368) (1 0.0526316) 0))
|
| 126 |
+
((accented is 0)
|
| 127 |
+
((n.accented is 0)
|
| 128 |
+
(((0 0.857143) (1 0.142857) 0))
|
| 129 |
+
(((0 0.415385) (1 0.584615) 1)))
|
| 130 |
+
(((0 0.974359) (1 0.025641) 0)))))))
|
| 131 |
+
(((0 0.968254) (1 0.031746) 0)))
|
| 132 |
+
(((0 0.969697) (1 0.030303) 0)))
|
| 133 |
+
(((0 0.976744) (1 0.0232558) 0)))
|
| 134 |
+
(((0 0.990291) (1 0.00970874) 0)))
|
| 135 |
+
((next_accent < 108.5)
|
| 136 |
+
((p.R:SylStructure.parent.gpos is pps)
|
| 137 |
+
(((0 0.828947) (1 0.171053) 0))
|
| 138 |
+
((R:SylStructure.parent.gpos is det)
|
| 139 |
+
((accented is 0)
|
| 140 |
+
(((0 0.0599572) (1 0.940043) 1))
|
| 141 |
+
(((0 0.949367) (1 0.0506329) 0)))
|
| 142 |
+
((p.R:SylStructure.parent.gpos is cc)
|
| 143 |
+
(((0 0.880952) (1 0.119048) 0))
|
| 144 |
+
((p.R:SylStructure.parent.gpos is wp)
|
| 145 |
+
(((0 0.875) (1 0.125) 0))
|
| 146 |
+
((p.R:SylStructure.parent.gpos is in)
|
| 147 |
+
((n.syl_break is 4)
|
| 148 |
+
(((0 0.961538) (1 0.0384615) 0))
|
| 149 |
+
((next_accent < 2.5)
|
| 150 |
+
((syl_break is 4)
|
| 151 |
+
(((0 0.95122) (1 0.0487805) 0))
|
| 152 |
+
((next_accent < 1.2)
|
| 153 |
+
((accented is 0)
|
| 154 |
+
((n.stress is 0)
|
| 155 |
+
(((0 0.788462) (1 0.211538) 0))
|
| 156 |
+
((R:SylStructure.parent.R:Word.p.gpos is content)
|
| 157 |
+
(((0 0.863636) (1 0.136364) 0))
|
| 158 |
+
((position_type is single)
|
| 159 |
+
(((0 0.729167) (1 0.270833) 0))
|
| 160 |
+
(((0 0.4) (1 0.6) 1)))))
|
| 161 |
+
(((0 0.983871) (1 0.016129) 0)))
|
| 162 |
+
(((0 0.96) (1 0.04) 0))))
|
| 163 |
+
(((0 0.963636) (1 0.0363636) 0))))
|
| 164 |
+
((position_type is single)
|
| 165 |
+
((syl_break is 4)
|
| 166 |
+
(((0 0.993865) (1 0.00613497) 0))
|
| 167 |
+
((p.R:SylStructure.parent.gpos is to)
|
| 168 |
+
(((0 0.984375) (1 0.015625) 0))
|
| 169 |
+
((syl_break is 1)
|
| 170 |
+
((accented is 0)
|
| 171 |
+
((n.R:SylStructure.parent.gpos is in)
|
| 172 |
+
(((0 0.869565) (1 0.130435) 0))
|
| 173 |
+
((R:SylStructure.parent.gpos is content)
|
| 174 |
+
(((0 0.861789) (1 0.138211) 0))
|
| 175 |
+
((p.R:SylStructure.parent.gpos is content)
|
| 176 |
+
((p.syl_break is 4)
|
| 177 |
+
(((0 0.858065) (1 0.141935) 0))
|
| 178 |
+
((R:SylStructure.parent.gpos is in)
|
| 179 |
+
((p.syl_break is 1)
|
| 180 |
+
((n.R:SylStructure.parent.gpos is det)
|
| 181 |
+
(((0 0.659574) (1 0.340426) 0))
|
| 182 |
+
((p.stress is 0)
|
| 183 |
+
(((0 0.422222) (1 0.577778) 1))
|
| 184 |
+
(((0 0.582278) (1 0.417722) 0))))
|
| 185 |
+
((n.accented is 0)
|
| 186 |
+
((n.R:SylStructure.parent.gpos is content)
|
| 187 |
+
(((0 0.65) (1 0.35) 0))
|
| 188 |
+
((p.stress is 0)
|
| 189 |
+
(((0 0.464286) (1 0.535714) 1))
|
| 190 |
+
(((0 0.538462) (1 0.461538) 0))))
|
| 191 |
+
(((0 0.803279) (1 0.196721) 0))))
|
| 192 |
+
((n.R:SylStructure.parent.gpos is det)
|
| 193 |
+
(((0 0.952381) (1 0.047619) 0))
|
| 194 |
+
((n.syl_break is 4)
|
| 195 |
+
(((0 0.833333) (1 0.166667) 0))
|
| 196 |
+
((p.stress is 0)
|
| 197 |
+
((p.syl_break is 1)
|
| 198 |
+
((n.syl_break is 1)
|
| 199 |
+
(((0 0.740741) (1 0.259259) 0))
|
| 200 |
+
((R:SylStructure.parent.gpos is aux)
|
| 201 |
+
(((0 0.478261) (1 0.521739) 1))
|
| 202 |
+
(((0 0.769231) (1 0.230769) 0))))
|
| 203 |
+
(((0 0.755556) (1 0.244444) 0)))
|
| 204 |
+
(((0 0.797619) (1 0.202381) 0)))))))
|
| 205 |
+
(((0 0.870968) (1 0.129032) 0)))))
|
| 206 |
+
(((0 0.983806) (1 0.0161943) 0)))
|
| 207 |
+
(((0 0.977778) (1 0.0222222) 0)))))
|
| 208 |
+
((next_accent < 21.6)
|
| 209 |
+
((p.stress is 0)
|
| 210 |
+
((R:SylStructure.parent.R:Word.p.gpos is md)
|
| 211 |
+
(((0 0.961538) (1 0.0384615) 0))
|
| 212 |
+
((position_type is mid)
|
| 213 |
+
(((0 0.977612) (1 0.0223881) 0))
|
| 214 |
+
((n.R:SylStructure.parent.gpos is det)
|
| 215 |
+
(((0 0.916667) (1 0.0833333) 0))
|
| 216 |
+
((R:SylStructure.parent.R:Word.n.gpos is 0)
|
| 217 |
+
(((0 0.915493) (1 0.084507) 0))
|
| 218 |
+
((R:SylStructure.parent.R:Word.n.gpos is pps)
|
| 219 |
+
(((0 0.884615) (1 0.115385) 0))
|
| 220 |
+
((n.stress is 0)
|
| 221 |
+
((n.syl_break is 4)
|
| 222 |
+
(((0 0.986755) (1 0.013245) 0))
|
| 223 |
+
((p.syl_break is 4)
|
| 224 |
+
(((0 0.977011) (1 0.0229885) 0))
|
| 225 |
+
((n.syl_break is 4)
|
| 226 |
+
(((0 0.965517) (1 0.0344828) 0))
|
| 227 |
+
((last_accent < 1.2)
|
| 228 |
+
((last_accent < 0.1)
|
| 229 |
+
(((0 0.910448) (1 0.0895522) 0))
|
| 230 |
+
((next_accent < 1.2)
|
| 231 |
+
((R:SylStructure.parent.R:Word.n.gpos is in)
|
| 232 |
+
(((0 0.82) (1 0.18) 0))
|
| 233 |
+
((n.syl_break is 0)
|
| 234 |
+
((R:SylStructure.parent.R:Word.p.gpos is content)
|
| 235 |
+
(((0 0.819672) (1 0.180328) 0))
|
| 236 |
+
(((0 0.444444) (1 0.555556) 1)))
|
| 237 |
+
(((0 0.785714) (1 0.214286) 0))))
|
| 238 |
+
(((0 0.836364) (1 0.163636) 0))))
|
| 239 |
+
(((0 0.962025) (1 0.0379747) 0))))))
|
| 240 |
+
((stress is 0)
|
| 241 |
+
((n.syl_break is 4)
|
| 242 |
+
(((0 0.21875) (1 0.78125) 1))
|
| 243 |
+
((R:SylStructure.parent.R:Word.p.gpos is aux)
|
| 244 |
+
(((0 0.259259) (1 0.740741) 1))
|
| 245 |
+
((p.syl_break is 1)
|
| 246 |
+
(((0 0.243094) (1 0.756906) 1))
|
| 247 |
+
((R:SylStructure.parent.R:Word.p.gpos is det)
|
| 248 |
+
(((0 0.290323) (1 0.709677) 1))
|
| 249 |
+
((R:SylStructure.parent.R:Word.p.gpos is in)
|
| 250 |
+
(((0 0.3) (1 0.7) 1))
|
| 251 |
+
((syl_break is 1)
|
| 252 |
+
(((0 0.289157) (1 0.710843) 1))
|
| 253 |
+
((p.syl_break is 4)
|
| 254 |
+
(((0 0.352941) (1 0.647059) 1))
|
| 255 |
+
((n.syl_break is 0)
|
| 256 |
+
(((0 0.311475) (1 0.688525) 1))
|
| 257 |
+
((syl_break is 4)
|
| 258 |
+
(((0 0.4) (1 0.6) 1))
|
| 259 |
+
(((0 0.581395) (1 0.418605) 0)))))))))))
|
| 260 |
+
(((0 1) (1 0) 0)))))))))
|
| 261 |
+
((stress is 0)
|
| 262 |
+
((R:SylStructure.parent.R:Word.n.gpos is 0)
|
| 263 |
+
(((0 0.121212) (1 0.878788) 1))
|
| 264 |
+
((next_accent < 2.4)
|
| 265 |
+
((R:SylStructure.parent.gpos is content)
|
| 266 |
+
((position_type is mid)
|
| 267 |
+
(((0 0.176895) (1 0.823105) 1))
|
| 268 |
+
((p.syl_break is 1)
|
| 269 |
+
(((0 0.229167) (1 0.770833) 1))
|
| 270 |
+
((syl_break is 4)
|
| 271 |
+
(((0 0.242775) (1 0.757225) 1))
|
| 272 |
+
((p.syl_break is 0)
|
| 273 |
+
((n.R:SylStructure.parent.gpos is in)
|
| 274 |
+
(((0 0.253521) (1 0.746479) 1))
|
| 275 |
+
((R:SylStructure.parent.R:Word.p.gpos is in)
|
| 276 |
+
(((0 0.262774) (1 0.737226) 1))
|
| 277 |
+
((last_accent < 2.1)
|
| 278 |
+
((n.R:SylStructure.parent.gpos is aux)
|
| 279 |
+
(((0 0.304348) (1 0.695652) 1))
|
| 280 |
+
((next_accent < 1.2)
|
| 281 |
+
((n.R:SylStructure.parent.gpos is cc)
|
| 282 |
+
(((0 0.291667) (1 0.708333) 1))
|
| 283 |
+
((syl_break is 1)
|
| 284 |
+
((n.syl_break is 4)
|
| 285 |
+
(((0 0.344828) (1 0.655172) 1))
|
| 286 |
+
((R:SylStructure.parent.R:Word.p.gpos is det)
|
| 287 |
+
(((0 0.364706) (1 0.635294) 1))
|
| 288 |
+
((n.syl_break is 4)
|
| 289 |
+
(((0 0.384615) (1 0.615385) 1))
|
| 290 |
+
((last_accent < 1.2)
|
| 291 |
+
((p.accented is 0)
|
| 292 |
+
(((0 0.584906) (1 0.415094) 0))
|
| 293 |
+
((n.accented is 0)
|
| 294 |
+
((R:SylStructure.parent.R:Word.p.gpos is content)
|
| 295 |
+
(((0 0.41) (1 0.59) 1))
|
| 296 |
+
(((0 0.6) (1 0.4) 0)))
|
| 297 |
+
(((0 0.333333) (1 0.666667) 1))))
|
| 298 |
+
(((0 0.380952) (1 0.619048) 1))))))
|
| 299 |
+
((p.accented is 0)
|
| 300 |
+
(((0 0.183673) (1 0.816327) 1))
|
| 301 |
+
((n.R:SylStructure.parent.gpos is content)
|
| 302 |
+
((n.stress is 0)
|
| 303 |
+
(((0 0.295455) (1 0.704545) 1))
|
| 304 |
+
((R:SylStructure.parent.R:Word.p.gpos is content)
|
| 305 |
+
((n.syl_break is 1)
|
| 306 |
+
(((0 0.5) (1 0.5) 0))
|
| 307 |
+
(((0 0.40625) (1 0.59375) 1)))
|
| 308 |
+
(((0 0.333333) (1 0.666667) 1))))
|
| 309 |
+
(((0 0.2) (1 0.8) 1))))))
|
| 310 |
+
(((0 0.3) (1 0.7) 1))))
|
| 311 |
+
(((0 0.302326) (1 0.697674) 1)))))
|
| 312 |
+
(((0 0.25) (1 0.75) 1))))))
|
| 313 |
+
(((0 0.173913) (1 0.826087) 1)))
|
| 314 |
+
(((0 0.166667) (1 0.833333) 1))))
|
| 315 |
+
(((0 1) (1 0) 0))))
|
| 316 |
+
(((0 0.2) (1 0.8) 1)))))))))
|
| 317 |
+
(((0 0.15) (1 0.85) 1)))))))
|
| 318 |
+
|
| 319 |
+
(defvar postlex_mrpa_r_cart_tree
|
| 320 |
+
'((name is r)
|
| 321 |
+
((R:Segment.n.ph_vc is -)
|
| 322 |
+
((delete))
|
| 323 |
+
((nil)))
|
| 324 |
+
((nil)))
|
| 325 |
+
"postlex_mrpa_r_cart_tree
|
| 326 |
+
For remove final R when not between vowels.")
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
;; Changed this to actually work... (Rob 09/12/04)
|
| 330 |
+
;; Changed this to delete the syllable when schwa is unneccesary (awb 19/07/04)
|
| 331 |
+
(define (postlex_apos_s_check utt)
|
| 332 |
+
"(postlex_apos_s_check UTT)
|
| 333 |
+
Deal with possesive s for English (American and British). Delete
|
| 334 |
+
schwa of 's if previous is not an alveolar or palatal fricative or affricative, and
|
| 335 |
+
change voiced to unvoiced s if previous is not voiced."
|
| 336 |
+
(mapcar
|
| 337 |
+
(lambda (syl)
|
| 338 |
+
; word is 's
|
| 339 |
+
(if (string-equal "'s" (item.feat
|
| 340 |
+
syl "R:SylStructure.parent.name"))
|
| 341 |
+
(begin
|
| 342 |
+
;; de-voice if last phone of previous word is unvoiced
|
| 343 |
+
(if (string-equal
|
| 344 |
+
"-"
|
| 345 |
+
(item.feat syl "p.R:SylStructure.daughtern.ph_cvox"))
|
| 346 |
+
(item.set_name
|
| 347 |
+
(item.relation.daughtern syl 'SylStructure)
|
| 348 |
+
"s")) ;; change it from "z" to "s"
|
| 349 |
+
; if the previous seg is a aveolar or palatal,
|
| 350 |
+
; fricative or affricate don't delete schwa otherwise delete it
|
| 351 |
+
(if (and
|
| 352 |
+
(member_string
|
| 353 |
+
(item.feat syl "p.R:SylStructure.daughtern.ph_ctype") '(f a))
|
| 354 |
+
(member_string
|
| 355 |
+
(item.feat syl "p.R:SylStructure.daughtern.ph_cplace") '(a p)))
|
| 356 |
+
(begin
|
| 357 |
+
t)
|
| 358 |
+
(begin
|
| 359 |
+
;; delete the schwa
|
| 360 |
+
(item.delete (item.relation.daughter1 syl 'SylStructure))
|
| 361 |
+
;; attach orphaned s/z to previous word
|
| 362 |
+
(item.relation.append_daughter
|
| 363 |
+
(item.prev syl)
|
| 364 |
+
'SylStructure
|
| 365 |
+
(item.relation.daughtern syl 'SylStructure))
|
| 366 |
+
;; delete the now empty syllable
|
| 367 |
+
(item.delete syl))))))
|
| 368 |
+
;; never happens to if 's is first in an utterance
|
| 369 |
+
(cdr (utt.relation.items utt 'Syllable)))
|
| 370 |
+
utt)
|
| 371 |
+
|
| 372 |
+
;; Changed this to work the other way round, too. Volker 10/08/06
|
| 373 |
+
(define (postlex_the_vs_thee utt)
|
| 374 |
+
"(postlex_the_vs_thee utt)
|
| 375 |
+
Unnreduce the schwa in \"the\" when a vowel follows.
|
| 376 |
+
Reduce the vowel in \"the\" when no vowel follows (this
|
| 377 |
+
requires a lexicon entry for \"the\" with feature \"reduced\",
|
| 378 |
+
otherwise there will be no reduction)."
|
| 379 |
+
(let ((fullform (cadr (car (caar (cdr (cdar (lex.lookup_all 'thee)))))))
|
| 380 |
+
(reducedform (cadr(car(caar(cddr(lex.lookup 'the '(reduced)))))))
|
| 381 |
+
seg)
|
| 382 |
+
|
| 383 |
+
(mapcar
|
| 384 |
+
(lambda (word)
|
| 385 |
+
(if (string-equal "the" (downcase (item.feat word "name")))
|
| 386 |
+
(begin
|
| 387 |
+
(set! seg (item.relation (item.daughtern (item.relation.daughtern word 'SylStructure)) 'Segment))
|
| 388 |
+
(if (string-equal "+" (item.feat (item.next seg) 'ph_vc))
|
| 389 |
+
(item.set_feat seg 'name fullform)
|
| 390 |
+
(item.set_feat seg 'name reducedform)))))
|
| 391 |
+
(utt.relation.items utt 'Word)))
|
| 392 |
+
utt)
|
| 393 |
+
|
| 394 |
+
(define (postlex_the_vs_thee_changeflag utt)
|
| 395 |
+
"(postlex_the_vs_thee_changeflag utt)
|
| 396 |
+
Unnreduce the schwa in \"the\" when a vowel follows.
|
| 397 |
+
Reduce the vowel in \"the\" when no vowel follows (this
|
| 398 |
+
requires a lexicon entry for \"the\" with feature \"reduced\",
|
| 399 |
+
otherwise there will be no reduction)."
|
| 400 |
+
(let ((fullform (cadr (car (caar (cdr (cdar (lex.lookup_all 'thee)))))))
|
| 401 |
+
(reducedform (cadr(car(caar(cddr(lex.lookup 'the '(reduced)))))))
|
| 402 |
+
seg)
|
| 403 |
+
|
| 404 |
+
(mapcar
|
| 405 |
+
(lambda (word)
|
| 406 |
+
(if (string-equal "the" (downcase (item.feat word "name")))
|
| 407 |
+
(begin
|
| 408 |
+
(set! seg (item.relation (item.daughtern (item.relation.daughtern word 'SylStructure)) 'Segment))
|
| 409 |
+
(if (string-equal "+" (item.feat (item.next seg) 'ph_vc))
|
| 410 |
+
(item.set_feat seg 'reducable 0)
|
| 411 |
+
(item.set_feat seg 'reducable 1)))))
|
| 412 |
+
(utt.relation.items utt 'Word)))
|
| 413 |
+
utt)
|
| 414 |
+
|
| 415 |
+
|
| 416 |
+
;; For Multisyn voices only. Volker 14/08/06
|
| 417 |
+
(define (postlex_a utt)
|
| 418 |
+
"(postlex_a utt)
|
| 419 |
+
If POS of \"a\" is \"nn\" and segment feature \"reducable\", set it to 0.
|
| 420 |
+
This is a bugfix, but still requires the target cost function to add a
|
| 421 |
+
penalty if a candidate is reducable but the target is not. expro_target_cost
|
| 422 |
+
does that."
|
| 423 |
+
(let(seg)
|
| 424 |
+
(mapcar
|
| 425 |
+
(lambda(word)
|
| 426 |
+
;; (format t "%s\t%s\n" (item.feat word 'name)(item.feat word 'pos))
|
| 427 |
+
(if(and(string-equal "a" (downcase (item.feat word "name")))
|
| 428 |
+
(string-equal "nn" (item.feat word "pos")))
|
| 429 |
+
(begin
|
| 430 |
+
(set! seg (item.relation (item.daughtern (item.relation.daughtern word
|
| 431 |
+
'SylStructure)) 'Segment))
|
| 432 |
+
;; (format t "should not be reducable\n")
|
| 433 |
+
(if (eq 1 (parse-number (item.feat seg 'reducable)))
|
| 434 |
+
(item.set_feat seg 'reducable 0))))
|
| 435 |
+
)
|
| 436 |
+
(utt.relation.items utt 'Word)))
|
| 437 |
+
utt)
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
(define (postlex_unilex_vowel_reduction utt)
|
| 442 |
+
"(postlex_unilex_vowel_reduction utt)
|
| 443 |
+
Perform vowel reduction based on unilex specification of what can be reduced."
|
| 444 |
+
(let ()
|
| 445 |
+
(mapcar
|
| 446 |
+
(lambda (seg)
|
| 447 |
+
(if (and (eq? (parse-number (item.feat seg "reducable")) 1)
|
| 448 |
+
(not (> (parse-number (item.feat seg "R:SylStructure.parent.stress")) 0)))
|
| 449 |
+
(if (not (and (seg_word_final seg)
|
| 450 |
+
(string-equal (item.feat (item.next seg) 'ph_vc) "+")))
|
| 451 |
+
(item.set_feat seg "name" (item.feat seg "reducedform")))))
|
| 452 |
+
(utt.relation.items utt 'Segment)))
|
| 453 |
+
utt)
|
| 454 |
+
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
|
| 458 |
+
(define (seg_word_final seg)
|
| 459 |
+
"(seg_word_final seg)
|
| 460 |
+
Is this segment word final?"
|
| 461 |
+
(let ((this_seg_word (item.parent (item.relation.parent seg 'SylStructure)))
|
| 462 |
+
(silence (car (cadr (car (PhoneSet.description '(silences))))))
|
| 463 |
+
next_seg_word)
|
| 464 |
+
(if (item.next seg)
|
| 465 |
+
(set! next_seg_word (item.parent (item.relation.parent (item.next seg) 'SylStructure))))
|
| 466 |
+
(if (or (equal? this_seg_word next_seg_word)
|
| 467 |
+
(string-equal (item.feat seg "name") silence))
|
| 468 |
+
nil
|
| 469 |
+
t)))
|
| 470 |
+
|
| 471 |
+
|
| 472 |
+
|
| 473 |
+
;; imported from postlex_intervoc_r.scm Volker 14/08/06
|
| 474 |
+
(define (postlex_intervoc_r utt)
|
| 475 |
+
"(postlex_intervoc_r UTT)
|
| 476 |
+
|
| 477 |
+
Remove any word-final /r/ which is phrase-final or not going
|
| 478 |
+
to be inter-vocalic i.e. the following words does not start
|
| 479 |
+
with a vowel.
|
| 480 |
+
|
| 481 |
+
NOTE: in older versions of unilex-rpx.out for Festival, there
|
| 482 |
+
is no word-final /r/.
|
| 483 |
+
|
| 484 |
+
"
|
| 485 |
+
(let (word next_word last_phone following_phone)
|
| 486 |
+
(set! word (utt.relation.first utt 'Word))
|
| 487 |
+
|
| 488 |
+
(while word
|
| 489 |
+
(set! next_word (item.next word))
|
| 490 |
+
(set! last_phone (item.daughtern
|
| 491 |
+
(item.daughtern(item.relation word 'SylStructure))))
|
| 492 |
+
(if next_word
|
| 493 |
+
(begin
|
| 494 |
+
|
| 495 |
+
(set! following_phone (item.daughter1
|
| 496 |
+
(item.daughter1
|
| 497 |
+
(item.relation next_word 'SylStructure))))
|
| 498 |
+
; last_phone and following_phone should always be defined at this point,
|
| 499 |
+
; but since the upgrade to Fedora and characters no longer being in ISO
|
| 500 |
+
; but in UTF8, the pound sterling is no longer treated correctly.
|
| 501 |
+
; Probably (Token utt) should be fixed.
|
| 502 |
+
|
| 503 |
+
(if (and following_phone last_phone)
|
| 504 |
+
(begin
|
| 505 |
+
(format t "%s\t%s %s %s %s\n" (item.name word)
|
| 506 |
+
(item.name last_phone)
|
| 507 |
+
(item.name following_phone)
|
| 508 |
+
(item.feat following_phone 'ph_vc)
|
| 509 |
+
(item.feat word 'pbreak))
|
| 510 |
+
(if(and(equal? "r" (item.name last_phone))
|
| 511 |
+
(or(not(equal? "NB" (item.feat word 'pbreak)))
|
| 512 |
+
(equal? "-" (item.feat following_phone 'ph_vc))))
|
| 513 |
+
(begin
|
| 514 |
+
(format t "\t\t\t/r/ in \"%s %s\" deleted\n"
|
| 515 |
+
(item.name word)(item.name next_word))
|
| 516 |
+
(item.delete last_phone))))))
|
| 517 |
+
(if(and last_phone (equal? "r" (item.name last_phone)))
|
| 518 |
+
(begin
|
| 519 |
+
(format t "\t\t\tutterance-final /r/ deleted\n")
|
| 520 |
+
(item.delete last_phone)))
|
| 521 |
+
)
|
| 522 |
+
|
| 523 |
+
(set! word (item.next word))))
|
| 524 |
+
utt)
|
| 525 |
+
|
| 526 |
+
|
| 527 |
+
(define (postlex_stop_deletion utt)
|
| 528 |
+
"(postlex_stop_deletion utt)
|
| 529 |
+
|
| 530 |
+
Delete any stop or affricative (phone which has a closure)
|
| 531 |
+
immediately followed by another stop or affricative.
|
| 532 |
+
|
| 533 |
+
Also save the identity of the deleted phone for the
|
| 534 |
+
context cost functions. Consider:
|
| 535 |
+
|
| 536 |
+
backtrack /b a k t r a k/ -> /b a t r a k/
|
| 537 |
+
(actually Jenny reduces : /b a k_cl k t_cl t r a k/ -> /b a k_cl t r a k/)
|
| 538 |
+
If we then look for a diphone /a t/ we want to favour
|
| 539 |
+
candidates coming from the same context i.e. which
|
| 540 |
+
are actually a reduced /a k t/. In the data base,
|
| 541 |
+
the 1st /a/ gets the feature right_context=k and the
|
| 542 |
+
/t/ gets the fearture left_context=k.
|
| 543 |
+
|
| 544 |
+
"
|
| 545 |
+
(let(seg next_seg prev_seg)
|
| 546 |
+
(set! seg (utt.relation.first utt 'Segment))
|
| 547 |
+
(while seg
|
| 548 |
+
(set! prev_seg (item.prev seg))
|
| 549 |
+
(if prev_seg
|
| 550 |
+
(begin
|
| 551 |
+
;(format t "%s %s %s\n" (item.name seg)
|
| 552 |
+
; (item.feat seg 'ph_ctype)
|
| 553 |
+
; (item.feat seg 'p.ph_ctype))
|
| 554 |
+
(if(and(or(equal? "s" (item.feat seg 'ph_ctype))
|
| 555 |
+
(equal? "a" (item.feat seg 'ph_ctype)))
|
| 556 |
+
(or(equal? "s" (item.feat seg 'p.ph_ctype))
|
| 557 |
+
(equal? "a" (item.feat seg 'p.ph_ctype)))
|
| 558 |
+
; When there are 3 stops in a row, and after the 1st has been
|
| 559 |
+
; deleted, this prevents the 2nd to be deleted as well:
|
| 560 |
+
(equal? 0 (item.feat prev_seg 'left_context)))
|
| 561 |
+
(begin
|
| 562 |
+
(set! prev_prev_seg (item.prev prev_seg))
|
| 563 |
+
(format t "postlex_stop_deletion: %s in %s\n"
|
| 564 |
+
(item.name prev_seg)
|
| 565 |
+
(item.name(item.parent(item.relation.parent prev_seg
|
| 566 |
+
'SylStructure))))
|
| 567 |
+
(if prev_prev_seg
|
| 568 |
+
(begin
|
| 569 |
+
;(format t "setting left_context of %s and right context of %s to %s\n"
|
| 570 |
+
; (item.name seg)
|
| 571 |
+
; (item.name prev_prev_seg)
|
| 572 |
+
; (item.name prev_seg))
|
| 573 |
+
(item.set_feat seg 'left_context (item.name prev_seg))
|
| 574 |
+
(item.set_feat prev_prev_seg 'right_context (item.name prev_seg))))
|
| 575 |
+
(if(and(item.next seg)
|
| 576 |
+
(equal? (item.name seg) (item.name prev_seg)))
|
| 577 |
+
(begin
|
| 578 |
+
;(format t "setting left_context of %s to %s\n"
|
| 579 |
+
; (item.name (item.next seg)
|
| 580 |
+
; (item.name prev_seg))
|
| 581 |
+
|
| 582 |
+
(item.set_feat (item.next seg) 'left_context (item.name prev_seg))))
|
| 583 |
+
(item.delete prev_seg)))))
|
| 584 |
+
(set! seg (item.next seg))))
|
| 585 |
+
utt)
|
| 586 |
+
|
| 587 |
+
(provide 'postlex)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/radio_phones.scm
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; A definition of the radio phone set used in the BU RADIO FM
|
| 35 |
+
;;; corpus, some people call this the darpa set. This one
|
| 36 |
+
;;; has the closures removed
|
| 37 |
+
;;;
|
| 38 |
+
|
| 39 |
+
(defPhoneSet
|
| 40 |
+
radio
|
| 41 |
+
;;; Phone Features
|
| 42 |
+
(;; vowel or consonant
|
| 43 |
+
(vc + -)
|
| 44 |
+
;; vowel length: short long dipthong schwa
|
| 45 |
+
(vlng s l d a 0)
|
| 46 |
+
;; vowel height: high mid low
|
| 47 |
+
(vheight 1 2 3 0)
|
| 48 |
+
;; vowel frontness: front mid back
|
| 49 |
+
(vfront 1 2 3 0)
|
| 50 |
+
;; lip rounding
|
| 51 |
+
(vrnd + - 0)
|
| 52 |
+
;; consonant type: stop fricative affricate nasal lateral approximant
|
| 53 |
+
(ctype s f a n l r 0)
|
| 54 |
+
;; place of articulation: labial alveolar palatal labio-dental
|
| 55 |
+
;; dental velar glottal
|
| 56 |
+
(cplace l a p b d v g 0)
|
| 57 |
+
;; consonant voicing
|
| 58 |
+
(cvox + - 0)
|
| 59 |
+
)
|
| 60 |
+
;; Phone set members
|
| 61 |
+
(
|
| 62 |
+
;; Note these features were set by awb so they are wrong !!!
|
| 63 |
+
(aa + l 3 3 - 0 0 0) ;; father
|
| 64 |
+
(ae + s 3 1 - 0 0 0) ;; fat
|
| 65 |
+
(ah + s 2 2 - 0 0 0) ;; but
|
| 66 |
+
(ao + l 3 3 + 0 0 0) ;; lawn
|
| 67 |
+
(aw + d 3 2 - 0 0 0) ;; how
|
| 68 |
+
(ax + a 2 2 - 0 0 0) ;; about
|
| 69 |
+
(axr + a 2 2 - r a +)
|
| 70 |
+
(ay + d 3 2 - 0 0 0) ;; hide
|
| 71 |
+
(b - 0 0 0 0 s l +)
|
| 72 |
+
(ch - 0 0 0 0 a p -)
|
| 73 |
+
(d - 0 0 0 0 s a +)
|
| 74 |
+
(dh - 0 0 0 0 f d +)
|
| 75 |
+
(dx - a 0 0 0 s a +) ;; ??
|
| 76 |
+
(eh + s 2 1 - 0 0 0) ;; get
|
| 77 |
+
(el + s 0 0 0 l a +)
|
| 78 |
+
(em + s 0 0 0 n l +)
|
| 79 |
+
(en + s 0 0 0 n a +)
|
| 80 |
+
(er + a 2 2 - r 0 0) ;; always followed by r (er-r == axr)
|
| 81 |
+
(ey + d 2 1 - 0 0 0) ;; gate
|
| 82 |
+
(f - 0 0 0 0 f b -)
|
| 83 |
+
(g - 0 0 0 0 s v +)
|
| 84 |
+
(hh - 0 0 0 0 f g -)
|
| 85 |
+
(hv - 0 0 0 0 f g +)
|
| 86 |
+
(ih + s 1 1 - 0 0 0) ;; bit
|
| 87 |
+
(iy + l 1 1 - 0 0 0) ;; beet
|
| 88 |
+
(jh - 0 0 0 0 a p +)
|
| 89 |
+
(k - 0 0 0 0 s v -)
|
| 90 |
+
(l - 0 0 0 0 l a +)
|
| 91 |
+
(m - 0 0 0 0 n l +)
|
| 92 |
+
(n - 0 0 0 0 n a +)
|
| 93 |
+
(nx - 0 0 0 0 n d +) ;; ???
|
| 94 |
+
(ng - 0 0 0 0 n v +)
|
| 95 |
+
(ow + d 2 3 + 0 0 0) ;; lone
|
| 96 |
+
(oy + d 2 3 + 0 0 0) ;; toy
|
| 97 |
+
(p - 0 0 0 0 s l -)
|
| 98 |
+
(r - 0 0 0 0 r a +)
|
| 99 |
+
(s - 0 0 0 0 f a -)
|
| 100 |
+
(sh - 0 0 0 0 f p -)
|
| 101 |
+
(t - 0 0 0 0 s a -)
|
| 102 |
+
(th - 0 0 0 0 f d -)
|
| 103 |
+
(uh + s 1 3 + 0 0 0) ;; full
|
| 104 |
+
(uw + l 1 3 + 0 0 0) ;; fool
|
| 105 |
+
(v - 0 0 0 0 f b +)
|
| 106 |
+
(w - 0 0 0 0 r l +)
|
| 107 |
+
(y - 0 0 0 0 r p +)
|
| 108 |
+
(z - 0 0 0 0 f a +)
|
| 109 |
+
(zh - 0 0 0 0 f p +)
|
| 110 |
+
(pau - 0 0 0 0 0 0 -)
|
| 111 |
+
(h# - 0 0 0 0 0 0 -)
|
| 112 |
+
(brth - 0 0 0 0 0 0 -)
|
| 113 |
+
)
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
(PhoneSet.silences '(pau h# brth))
|
| 117 |
+
|
| 118 |
+
(provide 'radio_phones)
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/sable-latin.ent
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!--
|
| 2 |
+
|
| 3 |
+
NOTE: THIS IS A WORKING DOCUMENT. NOTHING IN HERE SHOULD BE
|
| 4 |
+
TAKEN AS FINAL.
|
| 5 |
+
|
| 6 |
+
This is an initial proposal for a Latin 1 and assorted
|
| 7 |
+
other characters entity set for SABLE. It is based on the
|
| 8 |
+
equivalent set for HTML 3.
|
| 9 |
+
|
| 10 |
+
-->
|
| 11 |
+
<!-- Portions of this text are copyright ISO:
|
| 12 |
+
|
| 13 |
+
(C) International Organization for Standardization 1986
|
| 14 |
+
Permission to copy in any form is granted for use with
|
| 15 |
+
conforming SGML systems and applications as defined in
|
| 16 |
+
ISO 8879, provided this notice is included in all copies.
|
| 17 |
+
-->
|
| 18 |
+
<!-- Character entity set. Typical invocation:
|
| 19 |
+
<!ENTITY % HTMLlat1 PUBLIC
|
| 20 |
+
"-//W3O//ENTITIES W3 Latin 1 for HTML//EN">
|
| 21 |
+
%HTMLlat1;
|
| 22 |
+
-->
|
| 23 |
+
<!-- Modified for use in HTML
|
| 24 |
+
$Id: sable-latin.ent,v 1.2 2001/04/04 13:12:35 awb Exp $ =
|
| 25 |
+
|
| 26 |
+
-->
|
| 27 |
+
<!-- Modified to add characters in the range &161; to &191; in
|
| 28 |
+
the ISO Latin-1 character set, which could only be referred =
|
| 29 |
+
|
| 30 |
+
to by numeric references. Entity names based on relevant entities in
|
| 31 |
+
ISO 8879-1986//ENTITIES Numeric and Special Graphic//EN"
|
| 32 |
+
Also added the standard lt gt amp entities from HTML 2.0
|
| 33 |
+
Chris Lilley, 13 March 1995
|
| 34 |
+
=
|
| 35 |
+
|
| 36 |
+
This covers all of Latin 1, but we are still unable to display a =
|
| 37 |
+
|
| 38 |
+
Trade Mark (TM)
|
| 39 |
+
-->
|
| 40 |
+
|
| 41 |
+
<!ENTITY AElig "Æ" > <!-- capital AE diphthong (ligature) -->
|
| 42 |
+
<!ENTITY Aacute "Á" > <!-- capital A, acute accent -->
|
| 43 |
+
<!ENTITY Acirc "Â" > <!-- capital A, circumflex accent -->
|
| 44 |
+
<!ENTITY Agrave "À" > <!-- capital A, grave accent -->
|
| 45 |
+
<!ENTITY Aring "Å" > <!-- capital A, ring -->
|
| 46 |
+
<!ENTITY Atilde "Ã" > <!-- capital A, tilde -->
|
| 47 |
+
<!ENTITY Auml "Ä" > <!-- capital A, dieresis or umlaut mark -->
|
| 48 |
+
<!ENTITY Ccedil "Ç" > <!-- capital C, cedilla -->
|
| 49 |
+
<!ENTITY ETH "Ð" > <!-- capital Eth, Icelandic -->
|
| 50 |
+
<!ENTITY Eacute "É" > <!-- capital E, acute accent -->
|
| 51 |
+
<!ENTITY Ecirc "Ê" > <!-- capital E, circumflex accent -->
|
| 52 |
+
<!ENTITY Egrave "È" > <!-- capital E, grave accent -->
|
| 53 |
+
<!ENTITY Euml "Ë" > <!-- capital E, dieresis or umlaut mark -->
|
| 54 |
+
<!ENTITY Iacute "Í" > <!-- capital I, acute accent -->
|
| 55 |
+
<!ENTITY Icirc "Î" > <!-- capital I, circumflex accent -->
|
| 56 |
+
<!ENTITY Igrave "Ì" > <!-- capital I, grave accent -->
|
| 57 |
+
<!ENTITY Iuml "Ï" > <!-- capital I, dieresis or umlaut mark -->
|
| 58 |
+
<!ENTITY Ntilde "Ñ" > <!-- capital N, tilde -->
|
| 59 |
+
<!ENTITY Oacute "Ó" > <!-- capital O, acute accent -->
|
| 60 |
+
<!ENTITY Ocirc "Ô" > <!-- capital O, circumflex accent -->
|
| 61 |
+
<!ENTITY Ograve "Ò" > <!-- capital O, grave accent -->
|
| 62 |
+
<!ENTITY Oslash "Ø" > <!-- capital O, slash -->
|
| 63 |
+
<!ENTITY Otilde "Õ" > <!-- capital O, tilde -->
|
| 64 |
+
<!ENTITY Ouml "Ö" > <!-- capital O, dieresis or umlaut mark -->
|
| 65 |
+
<!ENTITY THORN "Þ" > <!-- capital THORN, Icelandic -->
|
| 66 |
+
<!ENTITY Uacute "Ú" > <!-- capital U, acute accent -->
|
| 67 |
+
<!ENTITY Ucirc "Û" > <!-- capital U, circumflex accent -->
|
| 68 |
+
<!ENTITY Ugrave "Ù" > <!-- capital U, grave accent -->
|
| 69 |
+
<!ENTITY Uuml "Ü" > <!-- capital U, dieresis or umlaut mark -->
|
| 70 |
+
<!ENTITY Yacute "Ý" > <!-- capital Y, acute accent -->
|
| 71 |
+
<!ENTITY aacute "á" > <!-- small a, acute accent -->
|
| 72 |
+
<!ENTITY acirc "â" > <!-- small a, circumflex accent -->
|
| 73 |
+
<!ENTITY aelig "æ" > <!-- small ae diphthong (ligature) -->
|
| 74 |
+
<!ENTITY agrave "à" > <!-- small a, grave accent -->
|
| 75 |
+
<!ENTITY aring "å" > <!-- small a, ring -->
|
| 76 |
+
<!ENTITY atilde "ã" > <!-- small a, tilde -->
|
| 77 |
+
<!ENTITY auml "ä" > <!-- small a, dieresis or umlaut mark -->
|
| 78 |
+
<!ENTITY ccedil "ç" > <!-- small c, cedilla -->
|
| 79 |
+
<!ENTITY eacute "é" > <!-- small e, acute accent -->
|
| 80 |
+
<!ENTITY ecirc "ê" > <!-- small e, circumflex accent -->
|
| 81 |
+
<!ENTITY egrave "è" > <!-- small e, grave accent -->
|
| 82 |
+
<!ENTITY eth "ð" > <!-- small eth, Icelandic -->
|
| 83 |
+
<!ENTITY euml "ë" > <!-- small e, dieresis or umlaut mark -->
|
| 84 |
+
<!ENTITY iacute "í" > <!-- small i, acute accent -->
|
| 85 |
+
<!ENTITY icirc "î" > <!-- small i, circumflex accent -->
|
| 86 |
+
<!ENTITY igrave "ì" > <!-- small i, grave accent -->
|
| 87 |
+
<!ENTITY iuml "ï" > <!-- small i, dieresis or umlaut mark -->
|
| 88 |
+
<!ENTITY ntilde "ñ" > <!-- small n, tilde -->
|
| 89 |
+
<!ENTITY oacute "ó" > <!-- small o, acute accent -->
|
| 90 |
+
<!ENTITY ocirc "ô" > <!-- small o, circumflex accent -->
|
| 91 |
+
<!ENTITY ograve "ò" > <!-- small o, grave accent -->
|
| 92 |
+
<!ENTITY oslash "ø" > <!-- small o, slash -->
|
| 93 |
+
<!ENTITY otilde "õ" > <!-- small o, tilde -->
|
| 94 |
+
<!ENTITY ouml "ö" > <!-- small o, dieresis or umlaut mark -->
|
| 95 |
+
<!ENTITY szlig "ß" > <!-- small sharp s, German (sz ligature) -->
|
| 96 |
+
<!ENTITY thorn "þ" > <!-- small thorn, Icelandic -->
|
| 97 |
+
<!ENTITY uacute "ú" > <!-- small u, acute accent -->
|
| 98 |
+
<!ENTITY ucirc "û" > <!-- small u, circumflex accent -->
|
| 99 |
+
<!ENTITY ugrave "ù" > <!-- small u, grave accent -->
|
| 100 |
+
<!ENTITY uuml "ü" > <!-- small u, dieresis or umlaut mark -->
|
| 101 |
+
<!ENTITY yacute "ý" > <!-- small y, acute accent -->
|
| 102 |
+
<!ENTITY yuml "ÿ" > <!-- small y, dieresis or umlaut mark -->
|
| 103 |
+
<!-- =
|
| 104 |
+
|
| 105 |
+
Ones that aren't accented characters, and so not in ISO Added Latin =
|
| 106 |
+
1.
|
| 107 |
+
|
| 108 |
+
umlaut. macron, acute, cedilla
|
| 109 |
+
were not in ISO Numeric and Special Graphic
|
| 110 |
+
either; I took their names from the numeric entity list in
|
| 111 |
+
http://www.hpl.hp.co.uk/people/dsr/html/latin1.html =
|
| 112 |
+
|
| 113 |
+
Chris Lilley, 13 March 1995 =
|
| 114 |
+
|
| 115 |
+
-->
|
| 116 |
+
|
| 117 |
+
<!ENTITY iexcl "¡" > <!-- inverted exclamation mark &161; -->
|
| 118 |
+
<!ENTITY cent "¢" > <!-- cent sign &162; -->
|
| 119 |
+
<!ENTITY pound "£" > <!-- pound sterling sign &163; -->
|
| 120 |
+
<!ENTITY curren "¤" > <!-- general currency sign &164; -->
|
| 121 |
+
<!ENTITY yen "¥" > <!-- yen sign &165; -->
|
| 122 |
+
<!ENTITY brvbar "¦" > <!-- broken (vertical) bar &166; -->
|
| 123 |
+
<!ENTITY sect "§" > <!-- section sign &167; -->
|
| 124 |
+
<!ENTITY umlaut "¨" > <!-- umlaut (dieresis) &168; -->
|
| 125 |
+
<!ENTITY copy "©" > <!-- copyright sign &169; -->
|
| 126 |
+
<!ENTITY ordf "ª" > <!-- ordinal indicator, feminine &170; -->
|
| 127 |
+
<!ENTITY laquo "«" > <!-- angle quotation mark, left &171; -->
|
| 128 |
+
<!ENTITY not "¬" > <!-- not sign &172; -->
|
| 129 |
+
<!ENTITY shy "­" > <!-- soft hyphen &173;-->
|
| 130 |
+
<!ENTITY reg "®" > <!-- registered trademark &174; -->
|
| 131 |
+
<!ENTITY macron "¯" > <!-- macron &175; -->
|
| 132 |
+
<!ENTITY deg "°" > <!-- degree sign &176; -->
|
| 133 |
+
<!ENTITY plusmn "±" > <!-- plus-or-minus sign &177; -->
|
| 134 |
+
<!ENTITY sup2 "²" > <!-- superscript two &178; -->
|
| 135 |
+
<!ENTITY sup3 "³" > <!-- superscript three &179; -->
|
| 136 |
+
<!ENTITY acute "´" > <!-- acute accent &180; -->
|
| 137 |
+
<!ENTITY micro "µ" > <!-- micro sign &181; -->
|
| 138 |
+
<!ENTITY para "¶" > <!-- pilcrow (paragraph sign) &182; -->
|
| 139 |
+
<!ENTITY middot "·" > <!-- middle dot (centred decimal point) &183; -->
|
| 140 |
+
<!ENTITY cedilla "¸" > <!-- cedilla accent &184; -->
|
| 141 |
+
<!ENTITY sup1 "¹" > <!-- superscript one -->
|
| 142 |
+
<!ENTITY ordm "º" > <!-- ordinal indicator, masculine -->
|
| 143 |
+
<!ENTITY raquo "»" > <!-- angle quotation mark, right -->
|
| 144 |
+
<!ENTITY frac14 "¼" > <!-- fraction one-quarter -->
|
| 145 |
+
<!ENTITY frac12 "½" > <!-- fraction one-half -->
|
| 146 |
+
<!ENTITY frac34 "¾" > <!-- fraction three-quarters -->
|
| 147 |
+
<!ENTITY iquest "¿" > <!-- inverted question mark -->
|
| 148 |
+
<!-- the odd ones tucked in amongst the accented letters -->
|
| 149 |
+
<!ENTITY times "×" > <!-- multiply sign -->
|
| 150 |
+
<!ENTITY divide "÷" > <!-- divide sign -->
|
| 151 |
+
|
| 152 |
+
<!ENTITY amp "&" > <!-- ampersand -->
|
| 153 |
+
<!ENTITY gt ">" > <!-- greater than -->
|
| 154 |
+
<!ENTITY lt "<" > <!-- less than -->
|
| 155 |
+
<!ENTITY quot """ > <!-- double quote -->
|
| 156 |
+
|
| 157 |
+
<!--
|
| 158 |
+
Should the dollar sign and such like also be given entity names?
|
| 159 |
+
There are suitable ones around. For example, people using =
|
| 160 |
+
|
| 161 |
+
internationalised keyboards in various countries might not be =
|
| 162 |
+
|
| 163 |
+
readily able to access some of these characters.
|
| 164 |
+
=
|
| 165 |
+
|
| 166 |
+
On such grounds, I suggest entity names for these:
|
| 167 |
+
! " # $ % ' ( ) * + , - . / : ; =3D ? [ \ ] ^ _ ` { | } ~
|
| 168 |
+
Lastly, what about &tab; for those whose editors convert all
|
| 169 |
+
tabs to spaces?
|
| 170 |
+
|
| 171 |
+
-->
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/sable-mode.scm
ADDED
|
@@ -0,0 +1,560 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1998 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;; ;;
|
| 34 |
+
;;; Festival (1.3.X) support for SABLE 0.2 the SGML/XML based mark up ;;
|
| 35 |
+
;;; language. ;;
|
| 36 |
+
;;; ;;
|
| 37 |
+
;;; This is XML version requiring Edinburgh's LTG's rxp XML parser as ;;
|
| 38 |
+
;;; distributed with Festival ;;
|
| 39 |
+
;;; ;;
|
| 40 |
+
|
| 41 |
+
(require_module 'rxp)
|
| 42 |
+
|
| 43 |
+
;;(set! auto-text-mode-alist
|
| 44 |
+
;; (cons
|
| 45 |
+
;; (cons "\\.sable$" 'sable)
|
| 46 |
+
;; auto-text-mode-alist))
|
| 47 |
+
|
| 48 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 49 |
+
;; ;;
|
| 50 |
+
;; Remember where to find these two XML entities. ;;
|
| 51 |
+
;; ;;
|
| 52 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
(xml_register_id "-//SABLE//DTD SABLE speech mark up//EN"
|
| 56 |
+
(path-append libdir "Sable.v0_2.dtd")
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
(xml_register_id "-//SABLE//ENTITIES Added Latin 1 for SABLE//EN"
|
| 60 |
+
(path-append libdir "sable-latin.ent")
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
;; (print (xml_registered_ids))
|
| 64 |
+
|
| 65 |
+
(defvar SABLE_RXDOUBLE "-?\\(\\([0-9]+\\.[0-9]*\\)\\|\\([0-9]+\\)\\|\\(\\.[0-9]+\\)\\)\\([eE][---+]?[0-9]+\\)?")
|
| 66 |
+
|
| 67 |
+
(defvar sable_pitch_base_map
|
| 68 |
+
'((highest 1.2)
|
| 69 |
+
(high 1.1)
|
| 70 |
+
(medium 1.0)
|
| 71 |
+
(default 1.0)
|
| 72 |
+
(low 0.9)
|
| 73 |
+
(lowest 0.8)))
|
| 74 |
+
(defvar sable_pitch_med_map
|
| 75 |
+
'((highest 1.2)
|
| 76 |
+
(high 1.1)
|
| 77 |
+
(medium 1.0)
|
| 78 |
+
(default 1.0)
|
| 79 |
+
(low 0.9)
|
| 80 |
+
(lowest 0.8)))
|
| 81 |
+
(defvar sable_pitch_range_map
|
| 82 |
+
'((largest 1.2)
|
| 83 |
+
(large 1.1)
|
| 84 |
+
(medium 1.0)
|
| 85 |
+
(default 1.0)
|
| 86 |
+
(small 0.9)
|
| 87 |
+
(smallest 0.8)))
|
| 88 |
+
(defvar sable_rate_speed_map
|
| 89 |
+
'((fastest 1.5)
|
| 90 |
+
(fast 1.2)
|
| 91 |
+
(medium 1.0)
|
| 92 |
+
(default 1.0)
|
| 93 |
+
(slow 0.8)
|
| 94 |
+
(slowest 0.6)))
|
| 95 |
+
(defvar sable_volume_level_map
|
| 96 |
+
'((loudest 2.0)
|
| 97 |
+
(loud 1.5)
|
| 98 |
+
(default 1.0)
|
| 99 |
+
(medium 1.0)
|
| 100 |
+
(quiet 0.5)))
|
| 101 |
+
|
| 102 |
+
(define (sable_init_globals)
|
| 103 |
+
(set! utts nil)
|
| 104 |
+
(set! sable_omitted_mode nil)
|
| 105 |
+
(set! sable_word_features_stack nil)
|
| 106 |
+
(set! sable_pitch_context nil)
|
| 107 |
+
(set! sable_vol_context nil)
|
| 108 |
+
(set! sable_vol_type 'no_change)
|
| 109 |
+
(set! sable_vol_factor 1.0)
|
| 110 |
+
(set! sable_current_language 'britishenglish)
|
| 111 |
+
(set! sable_unsupported_language nil)
|
| 112 |
+
(set! sable_language_stack nil)
|
| 113 |
+
(set! sable_current_speaker 'voice_kal_diphone)
|
| 114 |
+
(set! sable_speaker_stack nil)
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
(define (sable_token_to_words token name)
|
| 118 |
+
"(sable_token_to_words utt token name)
|
| 119 |
+
SABLE mode token specific analysis."
|
| 120 |
+
(cond
|
| 121 |
+
((or sable_omitted_mode sable_unsupported_language)
|
| 122 |
+
;; don't say anything (whole utterance)
|
| 123 |
+
nil)
|
| 124 |
+
((string-equal "1" (item.feat token "done_sable_sub"))
|
| 125 |
+
;; to catch recursive calls this when splitting up sub expressions
|
| 126 |
+
(sable_previous_token_to_words token name))
|
| 127 |
+
((and (not (string-equal "0" (item.feat token "sable_sub")))
|
| 128 |
+
(string-equal "0" (item.feat token "p.sable_sub")))
|
| 129 |
+
(let (words (sub (item.feat token "sable_sub")))
|
| 130 |
+
(item.set_feat token "done_sable_sub" "1")
|
| 131 |
+
(set! words
|
| 132 |
+
(apply append
|
| 133 |
+
(mapcar
|
| 134 |
+
(lambda (w)
|
| 135 |
+
(set! www (sable_previous_token_to_words token w))
|
| 136 |
+
www)
|
| 137 |
+
(read-from-string sub))))
|
| 138 |
+
(item.set_feat token "done_sable_sub" "0")
|
| 139 |
+
words))
|
| 140 |
+
((string-equal "1" (item.feat token "sable_ignore"))
|
| 141 |
+
;; don't say anything (individual word)
|
| 142 |
+
nil)
|
| 143 |
+
((string-equal "1" (item.feat token "sable_ipa"))
|
| 144 |
+
;; Each token is an IPA phone
|
| 145 |
+
(item.set_feat token "phonemes" (sable-map-ipa name))
|
| 146 |
+
(list name))
|
| 147 |
+
((string-equal "1" (item.feat token "sable_literal"))
|
| 148 |
+
;; Only deal with spell here
|
| 149 |
+
(let ((subwords) (subword))
|
| 150 |
+
(item.set_feat token "pos" token.letter_pos)
|
| 151 |
+
(mapcar
|
| 152 |
+
(lambda (letter)
|
| 153 |
+
;; might be symbols or digits
|
| 154 |
+
(set! subword (sable_previous_token_to_words token letter))
|
| 155 |
+
(if subwords
|
| 156 |
+
(set! subwords (append subwords subword))
|
| 157 |
+
(set! subwords subword)))
|
| 158 |
+
(symbolexplode name))
|
| 159 |
+
subwords))
|
| 160 |
+
((not (string-equal "0" (item.feat token "token_pos")))
|
| 161 |
+
;; bypass the prediction stage, if English
|
| 162 |
+
(if (member_string (Parameter.get 'Language)
|
| 163 |
+
'(britishenglish americanenglish))
|
| 164 |
+
(builtin_english_token_to_words token name)
|
| 165 |
+
(sable_previous_token_to_words token name)))
|
| 166 |
+
;; could be others here later
|
| 167 |
+
(t
|
| 168 |
+
(sable_previous_token_to_words token name))))
|
| 169 |
+
|
| 170 |
+
(defvar sable_elements
|
| 171 |
+
'(
|
| 172 |
+
("(SABLE" (ATTLIST UTT)
|
| 173 |
+
(eval (list sable_current_speaker)) ;; so we know what state we start in
|
| 174 |
+
(sable_setup_voice_params)
|
| 175 |
+
nil
|
| 176 |
+
)
|
| 177 |
+
(")SABLE" (ATTLIST UTT)
|
| 178 |
+
(xxml_synth UTT) ;; Synthesis the remaining tokens
|
| 179 |
+
nil
|
| 180 |
+
)
|
| 181 |
+
;; Utterance break elements
|
| 182 |
+
("(LANGUAGE" (ATTLIST UTT)
|
| 183 |
+
;; Status: probably complete
|
| 184 |
+
(xxml_synth UTT)
|
| 185 |
+
(set! sable_language_stack
|
| 186 |
+
(cons
|
| 187 |
+
(list sable_current_language sable_unsupported_language)
|
| 188 |
+
sable_language_stack))
|
| 189 |
+
;; Select a new language
|
| 190 |
+
(let ((language (upcase (car (xxml_attval "ID" ATTLIST)))))
|
| 191 |
+
(cond
|
| 192 |
+
((or (string-equal language "SPANISH")
|
| 193 |
+
(string-equal language "ES"))
|
| 194 |
+
(set! sable_current_language 'spanish)
|
| 195 |
+
(set! sable_unsupported_language nil)
|
| 196 |
+
(select_language 'spanish))
|
| 197 |
+
((or (string-equal language "ENGLISH")
|
| 198 |
+
(string-equal language "EN"))
|
| 199 |
+
(set! sable_current_language 'britishenglish)
|
| 200 |
+
(set! sable_unsupported_language nil)
|
| 201 |
+
(select_language 'britishenglish))
|
| 202 |
+
(t ;; skip languages you don't know
|
| 203 |
+
;; BUG: if current language isn't English this wont work
|
| 204 |
+
(apply_hooks tts_hooks
|
| 205 |
+
(eval (list 'Utterance 'Text
|
| 206 |
+
(string-append "Some text in " language))))
|
| 207 |
+
(set! sable_unsupported_language t)))
|
| 208 |
+
nil))
|
| 209 |
+
(")LANGUAGE" (ATTLIST UTT)
|
| 210 |
+
(xxml_synth UTT)
|
| 211 |
+
(set! sable_unsupported_language (car (cdr (car sable_language_stack))))
|
| 212 |
+
(set! sable_current_language (car (car sable_language_stack)))
|
| 213 |
+
(set! sable_language_stack (cdr sable_language_stack))
|
| 214 |
+
(if (not sable_omitted_mode)
|
| 215 |
+
(begin
|
| 216 |
+
(select_language sable_current_language)
|
| 217 |
+
(sable_setup_voice_params)))
|
| 218 |
+
nil)
|
| 219 |
+
("(SPEAKER" (ATTLIST UTT)
|
| 220 |
+
;; Status: GENDER/AGE ignored, should be done by sable-def-speaker
|
| 221 |
+
;; function to define Festival voices to SABLE
|
| 222 |
+
(xxml_synth UTT)
|
| 223 |
+
(set! sable_speaker_stack (cons sable_current_speaker sable_speaker_stack))
|
| 224 |
+
(cond
|
| 225 |
+
((not equal? sable_current_language 'britishenglish)
|
| 226 |
+
(print "SABLE: choosen unknown voice, current voice unchanged"))
|
| 227 |
+
((equal? (car (xxml_attval "NAME" ATTLIST)) 'male1)
|
| 228 |
+
(set! sable_current_speaker 'voice_kal_diphone)
|
| 229 |
+
(voice_kal_diphone))
|
| 230 |
+
((equal? (car (xxml_attval "NAME" ATTLIST)) 'male2)
|
| 231 |
+
(set! sable_current_speaker 'voice_cmu_us_rms_cg)
|
| 232 |
+
(voice_cmu_us_rms_cg))
|
| 233 |
+
((equal? (car (xxml_attval "NAME" ATTLIST)) 'male3)
|
| 234 |
+
(set! sable_current_speaker 'voice_ked_diphone)
|
| 235 |
+
(voice_ked_diphone))
|
| 236 |
+
((equal? (car (xxml_attval "NAME" ATTLIST)) 'male4)
|
| 237 |
+
(set! sable_current_speaker 'voice_rab_diphone)
|
| 238 |
+
(voice_rab_diphone))
|
| 239 |
+
((equal? (car (xxml_attval "NAME" ATTLIST)) 'male5)
|
| 240 |
+
(set! sable_current_speaker 'voice_cmu_us_awb_cg)
|
| 241 |
+
(voice_cmu_us_awb_cg))
|
| 242 |
+
((equal? (car (xxml_attval "NAME" ATTLIST)) 'female1)
|
| 243 |
+
(set! sable_current_speaker 'voice_cmu_us_slt_cg)
|
| 244 |
+
(voice_us1_mbrola))
|
| 245 |
+
(t
|
| 246 |
+
(set! sable_current_speaker (intern (string-append "voice_" (car (xxml_attval "NAME" ATTLIST)))))
|
| 247 |
+
(eval (list sable_current_speaker))))
|
| 248 |
+
(sable_setup_voice_params)
|
| 249 |
+
nil)
|
| 250 |
+
(")SPEAKER" (ATTLIST UTT)
|
| 251 |
+
(xxml_synth UTT)
|
| 252 |
+
(set! sable_utt UTT)
|
| 253 |
+
(set! sable_current_speaker (car sable_speaker_stack))
|
| 254 |
+
(set! sable_speaker_stack (cdr sable_speaker_stack))
|
| 255 |
+
(eval (list sable_current_speaker))
|
| 256 |
+
(sable_setup_voice_params)
|
| 257 |
+
nil)
|
| 258 |
+
("BREAK" (ATTLIST UTT)
|
| 259 |
+
;; Status: probably complete
|
| 260 |
+
;; may cause an utterance break
|
| 261 |
+
(let ((level (upcase (car (xxml_attval "LEVEL" ATTLIST)))))
|
| 262 |
+
(cond
|
| 263 |
+
((null UTT) nil)
|
| 264 |
+
((string-equal "LARGE" level)
|
| 265 |
+
(xxml_synth UTT)
|
| 266 |
+
nil)
|
| 267 |
+
(t
|
| 268 |
+
(let ((last_token (utt.relation.last UTT'Token)))
|
| 269 |
+
(if last_token
|
| 270 |
+
(item.set_feat last_token "pbreak" "B"))
|
| 271 |
+
UTT)))))
|
| 272 |
+
("(DIV" (ATLIST UTT)
|
| 273 |
+
;; Status: probably complete
|
| 274 |
+
(xxml_synth UTT)
|
| 275 |
+
nil)
|
| 276 |
+
("AUDIO" (ATTLIST UTT)
|
| 277 |
+
;; Status: MODE (background) ignored, only insertion supported
|
| 278 |
+
;; mime type of file also ignored, as its LEVEL
|
| 279 |
+
(let ((tmpfile (make_tmp_filename)))
|
| 280 |
+
;; ignoring mode-background (and will for sometime)
|
| 281 |
+
;; ignoring level option
|
| 282 |
+
(xxml_synth UTT) ;; synthesizing anything ready to be synthesized
|
| 283 |
+
(get_url (car (xxml_attval "SRC" ATTLIST)) tmpfile)
|
| 284 |
+
(apply_hooks tts_hooks
|
| 285 |
+
(eval (list 'Utterance 'Wave tmpfile)))
|
| 286 |
+
(delete-file tmpfile)
|
| 287 |
+
nil))
|
| 288 |
+
("(EMPH" (ATTLIST UTT)
|
| 289 |
+
;; Status: nesting makes no difference, levels ignored
|
| 290 |
+
;; Festival is particularly bad at adding specific emphasis
|
| 291 |
+
;; that's what happens when you use statistical methods that
|
| 292 |
+
;; don't include any notion of emphasis
|
| 293 |
+
;; This is *not* recursive and only one level of EMPH supported
|
| 294 |
+
(sable_push_word_features)
|
| 295 |
+
(set! xxml_word_features
|
| 296 |
+
(cons (list "dur_stretch" 1.6)
|
| 297 |
+
(cons
|
| 298 |
+
(list "EMPH" "1") xxml_word_features)))
|
| 299 |
+
UTT)
|
| 300 |
+
(")EMPH" (ATTLIST UTT)
|
| 301 |
+
(set! xxml_word_features (sable_pop_word_features))
|
| 302 |
+
UTT)
|
| 303 |
+
("(PITCH" (ATTLIST UTT)
|
| 304 |
+
;; Status: probably complete
|
| 305 |
+
;; At present festival requires an utterance break here
|
| 306 |
+
(xxml_synth UTT)
|
| 307 |
+
(set! sable_pitch_context (cons int_lr_params sable_pitch_context))
|
| 308 |
+
(let ((base (sable_interpret_param
|
| 309 |
+
(car (xxml_attval "BASE" ATTLIST))
|
| 310 |
+
sable_pitch_base_map
|
| 311 |
+
(cadr (assoc 'target_f0_mean int_lr_params))
|
| 312 |
+
sable_pitch_base_original))
|
| 313 |
+
(med (sable_interpret_param
|
| 314 |
+
(car (xxml_attval "MED" ATTLIST))
|
| 315 |
+
sable_pitch_med_map
|
| 316 |
+
(cadr (assoc 'target_f0_mean int_lr_params))
|
| 317 |
+
sable_pitch_med_original))
|
| 318 |
+
(range (sable_interpret_param
|
| 319 |
+
(car (xxml_attval "RANGE" ATTLIST))
|
| 320 |
+
sable_pitch_range_map
|
| 321 |
+
(cadr (assoc 'target_f0_std int_lr_params))
|
| 322 |
+
sable_pitch_range_original))
|
| 323 |
+
(oldmean (cadr (assoc 'target_f0_mean int_lr_params))))
|
| 324 |
+
;; Festival (if it supports anything) supports mean and std
|
| 325 |
+
;; so we treat base as med if med doesn't seem to do anything
|
| 326 |
+
(if (equal? med oldmean)
|
| 327 |
+
(set! med base))
|
| 328 |
+
(set! int_lr_params
|
| 329 |
+
(cons
|
| 330 |
+
(list 'target_f0_mean med)
|
| 331 |
+
(cons
|
| 332 |
+
(list 'target_f0_std range)
|
| 333 |
+
int_lr_params)))
|
| 334 |
+
nil))
|
| 335 |
+
(")PITCH" (ATTLIST UTT)
|
| 336 |
+
(xxml_synth UTT)
|
| 337 |
+
(set! int_lr_params (car sable_pitch_context))
|
| 338 |
+
(set! sable_pitch_context (cdr sable_pitch_context))
|
| 339 |
+
nil)
|
| 340 |
+
("(RATE" (ATTLIST UTT)
|
| 341 |
+
;; Status: can't deal with absolute word per minute SPEED.
|
| 342 |
+
(sable_push_word_features)
|
| 343 |
+
;; can't deal with words per minute value
|
| 344 |
+
(let ((rate (sable_interpret_param
|
| 345 |
+
(car (xxml_attval "SPEED" ATTLIST))
|
| 346 |
+
sable_rate_speed_map
|
| 347 |
+
(sable_find_fval "dur_stretch" xxml_word_features 1.0)
|
| 348 |
+
sable_rate_speed_original)))
|
| 349 |
+
(set! xxml_word_features
|
| 350 |
+
(cons (list "dur_stretch" (/ 1.0 rate)) xxml_word_features))
|
| 351 |
+
UTT))
|
| 352 |
+
(")RATE" (ATTLIST UTT)
|
| 353 |
+
(set! xxml_word_features (sable_pop_word_features))
|
| 354 |
+
UTT)
|
| 355 |
+
("(VOLUME" (ATTLIST UTT)
|
| 356 |
+
;; Status: probably complete
|
| 357 |
+
;; At present festival requires an utterance break here
|
| 358 |
+
(xxml_synth UTT)
|
| 359 |
+
(set! sable_vol_context (cons (list sable_vol_type sable_vol_factor)
|
| 360 |
+
sable_vol_context))
|
| 361 |
+
(let ((level (sable_interpret_param
|
| 362 |
+
(car (xxml_attval "LEVEL" ATTLIST))
|
| 363 |
+
sable_volume_level_map
|
| 364 |
+
sable_vol_factor
|
| 365 |
+
1.0)))
|
| 366 |
+
(cond
|
| 367 |
+
((string-matches (car (xxml_attval "LEVEL" ATTLIST)) ".*%")
|
| 368 |
+
(set! sable_vol_type 'relative))
|
| 369 |
+
((string-matches (car (xxml_attval "LEVEL" ATTLIST)) SABLE_RXDOUBLE)
|
| 370 |
+
(set! sable_vol_type 'absolute))
|
| 371 |
+
(t
|
| 372 |
+
(set! sable_vol_type 'relative)))
|
| 373 |
+
(set! sable_vol_factor level))
|
| 374 |
+
nil)
|
| 375 |
+
(")VOLUME" (ATTLIST UTT)
|
| 376 |
+
(xxml_synth UTT)
|
| 377 |
+
(set! sable_vol_type (car (car sable_vol_context)))
|
| 378 |
+
(set! sable_vol_factor (car (cdr (car sable_vol_context))))
|
| 379 |
+
(set! sable_vol_context (cdr sable_vol_context))
|
| 380 |
+
nil)
|
| 381 |
+
("(ENGINE" (ATTLIST UTT)
|
| 382 |
+
;; Status: probably complete
|
| 383 |
+
(xxml_synth UTT)
|
| 384 |
+
(if (string-matches (car (xxml_attval "ID" ATTLIST)) "festival.*")
|
| 385 |
+
(let ((datastr ""))
|
| 386 |
+
(mapcar
|
| 387 |
+
(lambda (c) (set! datastr (string-append datastr " " c)))
|
| 388 |
+
(xxml_attval "DATA" ATTLIST))
|
| 389 |
+
(apply_hooks tts_hooks (eval (list 'Utterance 'Text datastr)))
|
| 390 |
+
(set! sable_omitted_mode t)) ;; ignore contents
|
| 391 |
+
;; else
|
| 392 |
+
;; its not relevant to me
|
| 393 |
+
)
|
| 394 |
+
nil)
|
| 395 |
+
(")ENGINE" (ATTLIST UTT)
|
| 396 |
+
(xxml_synth UTT)
|
| 397 |
+
(set! sable_omitted_mode nil)
|
| 398 |
+
nil)
|
| 399 |
+
("MARKER" (ATTLIST UTT)
|
| 400 |
+
;; Status: does nothing
|
| 401 |
+
;; Can't support this without low-level control of audio spooler
|
| 402 |
+
(format t "SABLE: marker \"%s\"\n"
|
| 403 |
+
(car (xxml_attval "MARK" ATTLIST)))
|
| 404 |
+
UTT)
|
| 405 |
+
("(PRON" (ATTLIST UTT)
|
| 406 |
+
;; Status: IPA currently ignored
|
| 407 |
+
(sable_push_word_features)
|
| 408 |
+
;; can't deal with words per minute value
|
| 409 |
+
(let ((ipa (xxml_attval "IPA" ATTLIST))
|
| 410 |
+
(sub (xxml_attval "SUB" ATTLIST)))
|
| 411 |
+
(cond
|
| 412 |
+
(ipa
|
| 413 |
+
(format t "SABLE: ipa ignored\n")
|
| 414 |
+
(set! xxml_word_features
|
| 415 |
+
(cons (list "sable_ignore" "1") xxml_word_features)))
|
| 416 |
+
(sub
|
| 417 |
+
(set! xxml_word_features
|
| 418 |
+
(cons (list "sable_sub" (format nil "%l" sub))
|
| 419 |
+
xxml_word_features))
|
| 420 |
+
(set! xxml_word_features
|
| 421 |
+
(cons (list "sable_ignore" "1") xxml_word_features))))
|
| 422 |
+
UTT))
|
| 423 |
+
(")PRON" (ATTLIST UTT)
|
| 424 |
+
(set! xxml_word_features (sable_pop_word_features))
|
| 425 |
+
UTT)
|
| 426 |
+
("(SAYAS" (ATTLIST UTT)
|
| 427 |
+
;; Status: only a few of the types are dealt with
|
| 428 |
+
(sable_push_word_features)
|
| 429 |
+
(set! sable_utt UTT)
|
| 430 |
+
;; can't deal with words per minute value
|
| 431 |
+
(let ((mode (downcase (car (xxml_attval "MODE" ATTLIST))))
|
| 432 |
+
(modetype (car (xxml_attval "MODETYPE" ATTLIST))))
|
| 433 |
+
(cond
|
| 434 |
+
((string-equal mode "literal")
|
| 435 |
+
(set! xxml_word_features
|
| 436 |
+
(cons (list "sable_literal" "1") xxml_word_features)))
|
| 437 |
+
((string-equal mode "phone")
|
| 438 |
+
(set! xxml_word_features
|
| 439 |
+
(cons (list "token_pos" "digits") xxml_word_features)))
|
| 440 |
+
((string-equal mode "ordinal")
|
| 441 |
+
(set! xxml_word_features
|
| 442 |
+
(cons (list "token_pos" "ordinal") xxml_word_features)))
|
| 443 |
+
((string-equal mode "cardinal")
|
| 444 |
+
(set! xxml_word_features
|
| 445 |
+
(cons (list "token_pos" "cardinal") xxml_word_features)))
|
| 446 |
+
(t
|
| 447 |
+
;; blindly trust festival to get it right
|
| 448 |
+
t))
|
| 449 |
+
UTT))
|
| 450 |
+
(")SAYAS" (ATTLIST UTT)
|
| 451 |
+
(set! xxml_word_features (sable_pop_word_features))
|
| 452 |
+
UTT)
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
))
|
| 456 |
+
|
| 457 |
+
(define (sable_init_func)
|
| 458 |
+
"(sable_init_func)
|
| 459 |
+
Initialisation for SABLE mode"
|
| 460 |
+
(sable_init_globals)
|
| 461 |
+
(voice_kal_diphone)
|
| 462 |
+
(set! sable_previous_elements xxml_elements)
|
| 463 |
+
(set! xxml_elements sable_elements)
|
| 464 |
+
(set! sable_previous_token_to_words english_token_to_words)
|
| 465 |
+
(set! english_token_to_words sable_token_to_words)
|
| 466 |
+
(set! token_to_words sable_token_to_words))
|
| 467 |
+
|
| 468 |
+
(define (sable_exit_func)
|
| 469 |
+
"(sable_exit_func)
|
| 470 |
+
Exit function for SABLE mode"
|
| 471 |
+
(set! xxml_elements sable_previous_elements)
|
| 472 |
+
(set! token_to_words sable_previous_token_to_words)
|
| 473 |
+
(set! english_token_to_words sable_previous_token_to_words))
|
| 474 |
+
|
| 475 |
+
(define (sable_push_word_features)
|
| 476 |
+
"(sable_push_word_features)
|
| 477 |
+
Save current word features on stack."
|
| 478 |
+
(set! sable_word_features_stack
|
| 479 |
+
(cons xxml_word_features sable_word_features_stack)))
|
| 480 |
+
|
| 481 |
+
(define (sable_adjust_volume utt)
|
| 482 |
+
"(sable_adjust_volume utt)
|
| 483 |
+
Amplify or attenutate signale based on value of sable_vol_factor
|
| 484 |
+
and sable_vol_type (absolute or relative)."
|
| 485 |
+
(set! utts (cons utt utts))
|
| 486 |
+
(cond
|
| 487 |
+
((equal? sable_vol_type 'no_change)
|
| 488 |
+
utt)
|
| 489 |
+
((equal? sable_vol_type 'absolute)
|
| 490 |
+
(utt.wave.rescale utt sable_vol_factor 'absolute))
|
| 491 |
+
((equal? sable_vol_type 'relative)
|
| 492 |
+
(utt.wave.rescale utt sable_vol_factor))
|
| 493 |
+
(t
|
| 494 |
+
(format stderr "SABLE: volume unknown type \"%s\"\n" sable_vol_type)
|
| 495 |
+
utt))
|
| 496 |
+
utt)
|
| 497 |
+
|
| 498 |
+
(define (sable_pop_word_features)
|
| 499 |
+
"(sable_pop_word_features)
|
| 500 |
+
Pop word features from stack."
|
| 501 |
+
(let ((r (car sable_word_features_stack)))
|
| 502 |
+
(set! sable_word_features_stack (cdr sable_word_features_stack))
|
| 503 |
+
r))
|
| 504 |
+
|
| 505 |
+
(define (sable_find_fval feat flist def)
|
| 506 |
+
(cond
|
| 507 |
+
((null flist) def)
|
| 508 |
+
((string-equal feat (car (car flist)))
|
| 509 |
+
(car (cdr (car flist))))
|
| 510 |
+
(t
|
| 511 |
+
(sable_find_fval feat (cdr flist) def))))
|
| 512 |
+
|
| 513 |
+
(define (sable_interpret_param ident map original current)
|
| 514 |
+
"(sable_interpret_param IDENT MAP ORIGINAL CURRENT)
|
| 515 |
+
If IDENT is in map return ORIGINAL times value in map, otherwise
|
| 516 |
+
treat IDENT of the form +/-N% and modify CURRENT accordingly."
|
| 517 |
+
(let ((mm (assoc ident map)))
|
| 518 |
+
(cond
|
| 519 |
+
(mm
|
| 520 |
+
(* original (car (cdr mm))))
|
| 521 |
+
((string-matches ident SABLE_RXDOUBLE)
|
| 522 |
+
(parse-number ident))
|
| 523 |
+
((string-matches ident ".*%")
|
| 524 |
+
(+ current (* current (/ (parse-number (string-before ident "%"))
|
| 525 |
+
100.0))))
|
| 526 |
+
;; ((string-matches ident ".*%")
|
| 527 |
+
;; (* current (/ (parse-number (string-before ident "%")) 100.0)))
|
| 528 |
+
((not ident) current)
|
| 529 |
+
(t
|
| 530 |
+
(format stderr "SABLE: modifier \"%s\" not of float, tag or +/-N\n"
|
| 531 |
+
ident)
|
| 532 |
+
current))))
|
| 533 |
+
|
| 534 |
+
(define (sable_setup_voice_params)
|
| 535 |
+
"(sable_setup_voice_params)
|
| 536 |
+
Set up original values for various voice parameters."
|
| 537 |
+
(set! sable_pitch_base_original (cadr (assoc 'target_f0_mean int_lr_params)))
|
| 538 |
+
(set! sable_pitch_med_original (cadr (assoc 'target_f0_mean int_lr_params)))
|
| 539 |
+
(set! sable_pitch_range_original (cadr (assoc 'target_f0_std int_lr_params)))
|
| 540 |
+
(set! sable_rate_speed_original 1.0)
|
| 541 |
+
(if (and after_synth_hooks (not (consp after_synth_hooks)))
|
| 542 |
+
(set! after_synth_hooks
|
| 543 |
+
(cons after_synth_hooks (list sable_adjust_volume)))
|
| 544 |
+
(set! after_synth_hooks
|
| 545 |
+
(append after_synth_hooks (list sable_adjust_volume))))
|
| 546 |
+
)
|
| 547 |
+
|
| 548 |
+
;;; Declare the new mode to Festival
|
| 549 |
+
(set! tts_text_modes
|
| 550 |
+
(cons
|
| 551 |
+
(list
|
| 552 |
+
'sable ;; mode name
|
| 553 |
+
(list
|
| 554 |
+
(list 'init_func sable_init_func)
|
| 555 |
+
(list 'exit_func sable_exit_func)
|
| 556 |
+
'(analysis_type xml)
|
| 557 |
+
))
|
| 558 |
+
tts_text_modes))
|
| 559 |
+
|
| 560 |
+
(provide 'sable-mode)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/scfg.scm
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Some functions for manipulating a SCFG parse tree
|
| 35 |
+
|
| 36 |
+
(require_module 'parser)
|
| 37 |
+
|
| 38 |
+
(define (scfg_simplify tree)
|
| 39 |
+
"(scfg_brackets_only tree)
|
| 40 |
+
Output only the bracketing and the bottom level pos and words."
|
| 41 |
+
(cond
|
| 42 |
+
((not tree) nil)
|
| 43 |
+
((car (cdr (assoc 'pos (car (cdr (car tree))))))
|
| 44 |
+
;; terminal node
|
| 45 |
+
(list
|
| 46 |
+
(car (cdr (assoc 'pos (car (cdr (car tree))))))
|
| 47 |
+
(car (car tree))))
|
| 48 |
+
(t
|
| 49 |
+
(cons
|
| 50 |
+
(car (car tree))
|
| 51 |
+
(mapcar scfg_simplify (cdr tree))))))
|
| 52 |
+
|
| 53 |
+
(define (scfg_simplify_relation_tree trees)
|
| 54 |
+
(mapcar scfg_simplify trees))
|
| 55 |
+
|
| 56 |
+
(defvar scfg_eos_tree eou_tree
|
| 57 |
+
"scfg_eos_tree
|
| 58 |
+
In MultiProbParse this CART tree is used to define end of sentence
|
| 59 |
+
within an utterance. It is applied to the token relation.
|
| 60 |
+
By default it is set to eou_tree.")
|
| 61 |
+
|
| 62 |
+
(provide 'scfg)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/scfg_wsj_wp20.gram
ADDED
|
@@ -0,0 +1,523 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;-*-mode:scheme-*-
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; A Stochastic context free grammar for the wp20 tag set with 19
|
| 35 |
+
;;; nonterminals
|
| 36 |
+
;;;
|
| 37 |
+
;;; This was trained from 10,000 sentences (00-04) of the UPenn WSJ tree
|
| 38 |
+
;;; bank using the inside-outside algorithm seeded with the bracketing from
|
| 39 |
+
;;; the treebank. The implementation is the scfg_ suite in the
|
| 40 |
+
;; speech tools and is based on the paper "Inside-Outside
|
| 41 |
+
;;; Reestimation from partially bracketed corpora", F Pereira and
|
| 42 |
+
;;; Y. Schabes. pp 128-135, 30th ACL, Newark, Delaware 1992.
|
| 43 |
+
;;;
|
| 44 |
+
;;; This grammar with 19 nonterminals was trained for 174 passes
|
| 45 |
+
;;; using a fifth of training data each time. It was tested against
|
| 46 |
+
;;; independent data both bracketed and unbracketed. After training,
|
| 47 |
+
;;; all rules with a probability less than 1.0e-6 were pruned.
|
| 48 |
+
;;;
|
| 49 |
+
;;; On an unseen test set of 686 sentences (from wsj/05/) this gets
|
| 50 |
+
;;; 92.2397% bracketing accuracy and 29.5918% sentences fully correct
|
| 51 |
+
;;;
|
| 52 |
+
;;; previous best 15_20 grammar
|
| 53 |
+
;;; 90.2377% bracketing accuracy and 24.7813% sentences fully correct
|
| 54 |
+
;;;
|
| 55 |
+
;;; Training this grammar took a long time. This is best grammar
|
| 56 |
+
;;; by testing grammars varying the number of non-terminals from 11-25
|
| 57 |
+
;;; as the number of NTs increases the time for training also increases
|
| 58 |
+
;;; This 19_20 grammar took 20 days on a Sun Ultra 1 140, but I also
|
| 59 |
+
;;; had to search 11-18 to confirm this is best, which was done with a
|
| 60 |
+
;;; collection of Ultra 140s 170s and Pentium Pros (Linux and FreeBSD)
|
| 61 |
+
;;;
|
| 62 |
+
(0.00593452 NT00 NT00 NT00)
|
| 63 |
+
(0.0319023 NT00 NT00 NT13)
|
| 64 |
+
(0.00105452 NT00 NT00 NT18)
|
| 65 |
+
(0.00061816 NT00 NT02 NT10)
|
| 66 |
+
(0.000399698 NT00 NT02 NT12)
|
| 67 |
+
(0.0383818 NT00 NT05 NT00)
|
| 68 |
+
(0.00011458 NT00 NT06 NT03)
|
| 69 |
+
(0.00164298 NT00 NT06 NT17)
|
| 70 |
+
(0.00153884 NT00 NT07 NT07)
|
| 71 |
+
(0.00118244 NT00 NT07 NT12)
|
| 72 |
+
(0.00171642 NT00 NT07 NT13)
|
| 73 |
+
(0.00031308 NT00 NT07 NT17)
|
| 74 |
+
(0.0949408 NT00 NT09 NT18)
|
| 75 |
+
(0.000932166 NT00 NT10 NT03)
|
| 76 |
+
(0.000150288 NT00 NT10 NT17)
|
| 77 |
+
(0.0152371 NT00 NT12 NT18)
|
| 78 |
+
(0.73409 NT00 NT14 NT13)
|
| 79 |
+
(0.0403652 NT00 NT14 NT18)
|
| 80 |
+
(0.000195643 NT00 NT16 NT07)
|
| 81 |
+
(0.0134222 NT00 NT18 NT13)
|
| 82 |
+
(0.015624 NT00 NT18 NT18)
|
| 83 |
+
(0.00251118 NT01 NT01 NT07)
|
| 84 |
+
(0.00354571 NT01 NT01 NT11)
|
| 85 |
+
(0.22337 NT01 NT01 NT16)
|
| 86 |
+
(0.0467048 NT01 NT02 NT05)
|
| 87 |
+
(0.000518329 NT01 NT04 NT01)
|
| 88 |
+
(0.000100574 NT01 NT06 NT05)
|
| 89 |
+
(0.0480904 NT01 NT07 NT05)
|
| 90 |
+
(0.000358197 NT01 NT11 NT11)
|
| 91 |
+
(0.00278007 NT01 NT16 NT05)
|
| 92 |
+
(0.000179198 NT01 NT16 NT15)
|
| 93 |
+
(0.00140099 NT01 n)
|
| 94 |
+
(0.00228587 NT01 v)
|
| 95 |
+
(0.524988 NT01 dt)
|
| 96 |
+
(0.00128028 NT01 in)
|
| 97 |
+
(0.0660845 NT01 j)
|
| 98 |
+
(0.0131026 NT01 cd)
|
| 99 |
+
(0.00584238 NT01 r)
|
| 100 |
+
(0.0548382 NT01 prp)
|
| 101 |
+
(0.000445004 NT01 wdt)
|
| 102 |
+
(0.00135794 NT01 wp)
|
| 103 |
+
(0.000195991 NT01 wrb)
|
| 104 |
+
(0.000264526 NT02 NT01 NT01)
|
| 105 |
+
(0.00243627 NT02 NT01 NT02)
|
| 106 |
+
(0.613543 NT02 NT01 NT07)
|
| 107 |
+
(0.00180865 NT02 NT01 NT11)
|
| 108 |
+
(0.0042804 NT02 NT01 NT16)
|
| 109 |
+
(0.0392418 NT02 NT02 NT07)
|
| 110 |
+
(0.026104 NT02 NT02 NT12)
|
| 111 |
+
(0.000916683 NT02 NT02 NT16)
|
| 112 |
+
(0.00158862 NT02 NT04 NT01)
|
| 113 |
+
(0.000206161 NT02 NT04 NT02)
|
| 114 |
+
(0.00343189 NT02 NT04 NT16)
|
| 115 |
+
(0.000417113 NT02 NT07 NT05)
|
| 116 |
+
(0.0988457 NT02 NT07 NT07)
|
| 117 |
+
(0.000931386 NT02 NT07 NT11)
|
| 118 |
+
(0.00073236 NT02 NT07 NT12)
|
| 119 |
+
(0.000153421 NT02 NT10 NT13)
|
| 120 |
+
(0.00163484 NT02 NT11 NT02)
|
| 121 |
+
(0.0379562 NT02 NT11 NT07)
|
| 122 |
+
(0.0149 NT02 NT11 NT11)
|
| 123 |
+
(0.00105811 NT02 NT11 NT12)
|
| 124 |
+
(0.000175184 NT02 NT16 NT02)
|
| 125 |
+
(0.0403395 NT02 NT16 NT07)
|
| 126 |
+
(0.00297703 NT02 NT16 NT12)
|
| 127 |
+
(0.0875026 NT02 n)
|
| 128 |
+
(0.00496719 NT02 v)
|
| 129 |
+
(0.000409658 NT02 dt)
|
| 130 |
+
(0.00239978 NT02 j)
|
| 131 |
+
(0.010203 NT02 r)
|
| 132 |
+
(0.000194628 NT02 pdt)
|
| 133 |
+
(0.000377009 NT03 NT04 NT02)
|
| 134 |
+
(0.11551 NT03 NT08 NT13)
|
| 135 |
+
(0.347629 NT03 NT09 NT13)
|
| 136 |
+
(0.484911 NT03 NT10 NT13)
|
| 137 |
+
(0.00188291 NT03 NT11 NT12)
|
| 138 |
+
(0.0495461 NT03 NT17 NT13)
|
| 139 |
+
(0.00918797 NT04 NT03 NT05)
|
| 140 |
+
(0.000303954 NT04 NT04 NT02)
|
| 141 |
+
(0.00284848 NT04 NT04 NT04)
|
| 142 |
+
(0.00710115 NT04 NT04 NT12)
|
| 143 |
+
(0.000597744 NT04 NT04 NT15)
|
| 144 |
+
(0.000377075 NT04 NT04 NT16)
|
| 145 |
+
(0.00130088 NT04 NT09 NT05)
|
| 146 |
+
(0.00175428 NT04 NT10 NT13)
|
| 147 |
+
(0.000127716 NT04 NT15 NT04)
|
| 148 |
+
(0.00013648 NT04 NT15 NT06)
|
| 149 |
+
(0.00045093 NT04 NT15 NT07)
|
| 150 |
+
(0.000626479 NT04 NT15 NT16)
|
| 151 |
+
(0.000563588 NT04 NT16 NT15)
|
| 152 |
+
(0.0232089 NT04 NT17 NT05)
|
| 153 |
+
(0.000138094 NT04 NT17 NT15)
|
| 154 |
+
(0.00094009 NT04 n)
|
| 155 |
+
(0.671108 NT04 v)
|
| 156 |
+
(0.0150619 NT04 punc)
|
| 157 |
+
(0.00056566 NT04 dt)
|
| 158 |
+
(0.144629 NT04 r)
|
| 159 |
+
(0.00270621 NT04 prp)
|
| 160 |
+
(0.0449587 NT04 to)
|
| 161 |
+
(0.0543755 NT04 md)
|
| 162 |
+
(0.00839747 NT04 wdt)
|
| 163 |
+
(0.00813689 NT04 wp)
|
| 164 |
+
(0.000560496 NT05 NT07 NT05)
|
| 165 |
+
(0.000901219 NT05 NT15 NT07)
|
| 166 |
+
(0.180172 NT05 punc)
|
| 167 |
+
(0.533041 NT05 cc)
|
| 168 |
+
(0.285244 NT05 pos)
|
| 169 |
+
(0.00164003 NT06 NT00 NT13)
|
| 170 |
+
(0.00222915 NT06 NT01 NT06)
|
| 171 |
+
(0.275903 NT06 NT01 NT07)
|
| 172 |
+
(0.00191616 NT06 NT01 NT11)
|
| 173 |
+
(0.00316549 NT06 NT01 NT12)
|
| 174 |
+
(0.000730143 NT06 NT01 NT14)
|
| 175 |
+
(0.000559842 NT06 NT02 NT06)
|
| 176 |
+
(0.0236744 NT06 NT02 NT07)
|
| 177 |
+
(0.00284929 NT06 NT02 NT09)
|
| 178 |
+
(0.155052 NT06 NT02 NT12)
|
| 179 |
+
(0.00387995 NT06 NT02 NT14)
|
| 180 |
+
(0.0161403 NT06 NT02 NT18)
|
| 181 |
+
(0.000110944 NT06 NT04 NT01)
|
| 182 |
+
(0.00237845 NT06 NT04 NT02)
|
| 183 |
+
(0.00625142 NT06 NT04 NT06)
|
| 184 |
+
(0.00118802 NT06 NT04 NT08)
|
| 185 |
+
(0.000132901 NT06 NT04 NT10)
|
| 186 |
+
(0.000192545 NT06 NT04 NT11)
|
| 187 |
+
(0.000199118 NT06 NT06 NT01)
|
| 188 |
+
(0.0081704 NT06 NT06 NT12)
|
| 189 |
+
(0.00198439 NT06 NT06 NT14)
|
| 190 |
+
(0.000889455 NT06 NT06 NT18)
|
| 191 |
+
(0.00142038 NT06 NT07 NT05)
|
| 192 |
+
(0.0820095 NT06 NT07 NT07)
|
| 193 |
+
(0.000112894 NT06 NT07 NT09)
|
| 194 |
+
(0.0220243 NT06 NT07 NT12)
|
| 195 |
+
(0.000133911 NT06 NT07 NT14)
|
| 196 |
+
(0.00100807 NT06 NT07 NT17)
|
| 197 |
+
(0.000191764 NT06 NT08 NT13)
|
| 198 |
+
(0.000340112 NT06 NT10 NT08)
|
| 199 |
+
(0.000126776 NT06 NT10 NT09)
|
| 200 |
+
(0.0136266 NT06 NT10 NT12)
|
| 201 |
+
(0.00867414 NT06 NT10 NT13)
|
| 202 |
+
(0.00341334 NT06 NT10 NT18)
|
| 203 |
+
(0.00154851 NT06 NT11 NT12)
|
| 204 |
+
(0.00104947 NT06 NT12 NT12)
|
| 205 |
+
(0.000219189 NT06 NT14 NT05)
|
| 206 |
+
(0.00313879 NT06 NT14 NT13)
|
| 207 |
+
(0.000745073 NT06 NT15 NT02)
|
| 208 |
+
(0.000433144 NT06 NT15 NT06)
|
| 209 |
+
(0.000159867 NT06 NT15 NT16)
|
| 210 |
+
(0.00124313 NT06 NT16 NT02)
|
| 211 |
+
(0.00918606 NT06 NT16 NT07)
|
| 212 |
+
(0.00373496 NT06 NT16 NT12)
|
| 213 |
+
(0.014053 NT06 NT18 NT13)
|
| 214 |
+
(0.0155714 NT06 n)
|
| 215 |
+
(0.00123379 NT06 punc)
|
| 216 |
+
(0.0152764 NT06 dt)
|
| 217 |
+
(0.00123486 NT06 j)
|
| 218 |
+
(0.00359625 NT06 r)
|
| 219 |
+
(0.212966 NT06 prp)
|
| 220 |
+
(0.00199168 NT06 cc)
|
| 221 |
+
(0.0383471 NT06 wdt)
|
| 222 |
+
(0.0182587 NT06 wp)
|
| 223 |
+
(0.00204833 NT06 wrb)
|
| 224 |
+
(0.0109929 NT06 ex)
|
| 225 |
+
(0.0011995 NT07 NT05 NT16)
|
| 226 |
+
(0.119588 NT07 NT07 NT07)
|
| 227 |
+
(0.000353596 NT07 NT07 NT11)
|
| 228 |
+
(0.000177793 NT07 NT07 NT12)
|
| 229 |
+
(0.00101956 NT07 NT11 NT11)
|
| 230 |
+
(0.000357614 NT07 NT15 NT01)
|
| 231 |
+
(0.00084812 NT07 NT15 NT06)
|
| 232 |
+
(0.0182872 NT07 NT16 NT07)
|
| 233 |
+
(0.00018607 NT07 NT16 NT11)
|
| 234 |
+
(0.856315 NT07 n)
|
| 235 |
+
(0.000736333 NT07 v)
|
| 236 |
+
(0.000645479 NT08 NT00 NT09)
|
| 237 |
+
(0.000990156 NT08 NT01 NT02)
|
| 238 |
+
(0.0410251 NT08 NT01 NT07)
|
| 239 |
+
(0.0013863 NT08 NT01 NT09)
|
| 240 |
+
(0.000242552 NT08 NT01 NT12)
|
| 241 |
+
(0.00174478 NT08 NT01 NT14)
|
| 242 |
+
(0.000596656 NT08 NT01 NT16)
|
| 243 |
+
(0.00130945 NT08 NT02 NT07)
|
| 244 |
+
(0.166303 NT08 NT02 NT09)
|
| 245 |
+
(0.0143253 NT08 NT02 NT12)
|
| 246 |
+
(0.0113813 NT08 NT02 NT14)
|
| 247 |
+
(0.000597887 NT08 NT02 NT16)
|
| 248 |
+
(0.0133053 NT08 NT03 NT09)
|
| 249 |
+
(0.0109076 NT08 NT03 NT17)
|
| 250 |
+
(0.000211313 NT08 NT04 NT01)
|
| 251 |
+
(0.0105796 NT08 NT04 NT02)
|
| 252 |
+
(0.00440181 NT08 NT04 NT04)
|
| 253 |
+
(0.00203737 NT08 NT04 NT06)
|
| 254 |
+
(0.213275 NT08 NT04 NT08)
|
| 255 |
+
(0.0781169 NT08 NT04 NT09)
|
| 256 |
+
(0.0190657 NT08 NT04 NT10)
|
| 257 |
+
(0.00319326 NT08 NT04 NT12)
|
| 258 |
+
(0.000693766 NT08 NT04 NT15)
|
| 259 |
+
(0.00112226 NT08 NT04 NT16)
|
| 260 |
+
(0.00117025 NT08 NT06 NT02)
|
| 261 |
+
(0.00807496 NT08 NT06 NT08)
|
| 262 |
+
(0.0183971 NT08 NT06 NT09)
|
| 263 |
+
(0.00127343 NT08 NT06 NT14)
|
| 264 |
+
(0.0322725 NT08 NT06 NT17)
|
| 265 |
+
(0.00396897 NT08 NT07 NT07)
|
| 266 |
+
(0.0154729 NT08 NT07 NT09)
|
| 267 |
+
(0.000708139 NT08 NT07 NT10)
|
| 268 |
+
(0.00186499 NT08 NT07 NT11)
|
| 269 |
+
(0.000701346 NT08 NT07 NT14)
|
| 270 |
+
(0.0116278 NT08 NT08 NT09)
|
| 271 |
+
(0.0965117 NT08 NT10 NT09)
|
| 272 |
+
(0.000142086 NT08 NT10 NT12)
|
| 273 |
+
(0.000210725 NT08 NT10 NT14)
|
| 274 |
+
(0.00336223 NT08 NT11 NT07)
|
| 275 |
+
(0.00183799 NT08 NT11 NT09)
|
| 276 |
+
(0.00109249 NT08 NT11 NT11)
|
| 277 |
+
(0.000880671 NT08 NT11 NT12)
|
| 278 |
+
(0.0032493 NT08 NT12 NT08)
|
| 279 |
+
(0.0372072 NT08 NT12 NT09)
|
| 280 |
+
(0.00113127 NT08 NT12 NT12)
|
| 281 |
+
(0.00892231 NT08 NT15 NT02)
|
| 282 |
+
(0.00383754 NT08 NT15 NT06)
|
| 283 |
+
(0.000528365 NT08 NT15 NT07)
|
| 284 |
+
(0.0060705 NT08 NT15 NT08)
|
| 285 |
+
(0.00853698 NT08 NT15 NT10)
|
| 286 |
+
(0.0349777 NT08 NT15 NT14)
|
| 287 |
+
(0.000202857 NT08 NT16 NT06)
|
| 288 |
+
(0.00709689 NT08 NT16 NT07)
|
| 289 |
+
(0.000240097 NT08 NT16 NT08)
|
| 290 |
+
(0.0401819 NT08 NT16 NT09)
|
| 291 |
+
(0.00124754 NT08 NT16 NT14)
|
| 292 |
+
(0.00862498 NT08 n)
|
| 293 |
+
(0.0115193 NT08 v)
|
| 294 |
+
(0.000974267 NT08 in)
|
| 295 |
+
(0.0169837 NT08 j)
|
| 296 |
+
(0.00626434 NT08 r)
|
| 297 |
+
(0.00437851 NT08 prp)
|
| 298 |
+
(0.0062359 NT09 NT01 NT07)
|
| 299 |
+
(0.000165196 NT09 NT01 NT14)
|
| 300 |
+
(0.00151872 NT09 NT02 NT04)
|
| 301 |
+
(0.000660061 NT09 NT02 NT15)
|
| 302 |
+
(0.000434321 NT09 NT02 NT16)
|
| 303 |
+
(0.00805872 NT09 NT03 NT09)
|
| 304 |
+
(0.000180982 NT09 NT04 NT08)
|
| 305 |
+
(0.050609 NT09 NT04 NT09)
|
| 306 |
+
(0.000307442 NT09 NT04 NT15)
|
| 307 |
+
(0.00281491 NT09 NT04 NT17)
|
| 308 |
+
(0.000295911 NT09 NT06 NT15)
|
| 309 |
+
(0.00133828 NT09 NT07 NT11)
|
| 310 |
+
(0.0235741 NT09 NT12 NT09)
|
| 311 |
+
(0.00121997 NT09 NT12 NT12)
|
| 312 |
+
(0.00391762 NT09 NT15 NT01)
|
| 313 |
+
(0.173027 NT09 NT15 NT02)
|
| 314 |
+
(0.000462089 NT09 NT15 NT06)
|
| 315 |
+
(0.0276663 NT09 NT15 NT07)
|
| 316 |
+
(0.210483 NT09 NT15 NT08)
|
| 317 |
+
(0.000177004 NT09 NT15 NT09)
|
| 318 |
+
(0.243402 NT09 NT15 NT10)
|
| 319 |
+
(0.0174403 NT09 NT15 NT11)
|
| 320 |
+
(0.00646962 NT09 NT15 NT12)
|
| 321 |
+
(0.155174 NT09 NT15 NT14)
|
| 322 |
+
(0.00930502 NT09 NT15 NT17)
|
| 323 |
+
(0.000311399 NT09 NT16 NT02)
|
| 324 |
+
(0.0052031 NT09 NT16 NT07)
|
| 325 |
+
(0.00742336 NT09 NT16 NT09)
|
| 326 |
+
(0.000409254 NT09 in)
|
| 327 |
+
(0.0019424 NT09 j)
|
| 328 |
+
(0.0393282 NT09 r)
|
| 329 |
+
(0.00016039 NT09 prp)
|
| 330 |
+
(0.00268682 NT10 NT01 NT07)
|
| 331 |
+
(0.00173594 NT10 NT01 NT09)
|
| 332 |
+
(0.00550051 NT10 NT01 NT10)
|
| 333 |
+
(0.00269002 NT10 NT01 NT11)
|
| 334 |
+
(0.00881491 NT10 NT01 NT12)
|
| 335 |
+
(0.0158503 NT10 NT02 NT02)
|
| 336 |
+
(0.00229071 NT10 NT02 NT07)
|
| 337 |
+
(0.00765082 NT10 NT02 NT09)
|
| 338 |
+
(0.00102327 NT10 NT02 NT11)
|
| 339 |
+
(0.474288 NT10 NT02 NT12)
|
| 340 |
+
(0.0119086 NT10 NT02 NT14)
|
| 341 |
+
(0.000270767 NT10 NT02 NT15)
|
| 342 |
+
(0.00425023 NT10 NT02 NT16)
|
| 343 |
+
(0.0533347 NT10 NT04 NT02)
|
| 344 |
+
(0.00286524 NT10 NT04 NT06)
|
| 345 |
+
(0.0687658 NT10 NT04 NT10)
|
| 346 |
+
(0.0157381 NT10 NT04 NT12)
|
| 347 |
+
(0.000809508 NT10 NT05 NT12)
|
| 348 |
+
(0.00188343 NT10 NT06 NT04)
|
| 349 |
+
(0.000155481 NT10 NT06 NT09)
|
| 350 |
+
(0.00569591 NT10 NT06 NT14)
|
| 351 |
+
(0.00233367 NT10 NT06 NT17)
|
| 352 |
+
(0.000189475 NT10 NT07 NT05)
|
| 353 |
+
(0.018548 NT10 NT07 NT07)
|
| 354 |
+
(0.00472354 NT10 NT07 NT09)
|
| 355 |
+
(0.0121145 NT10 NT07 NT11)
|
| 356 |
+
(0.0698482 NT10 NT07 NT12)
|
| 357 |
+
(0.000402661 NT10 NT07 NT16)
|
| 358 |
+
(0.00183044 NT10 NT07 NT17)
|
| 359 |
+
(0.00166519 NT10 NT10 NT02)
|
| 360 |
+
(0.015445 NT10 NT10 NT09)
|
| 361 |
+
(0.019208 NT10 NT10 NT12)
|
| 362 |
+
(0.000942866 NT10 NT10 NT18)
|
| 363 |
+
(0.00149941 NT10 NT11 NT01)
|
| 364 |
+
(0.00624706 NT10 NT11 NT02)
|
| 365 |
+
(0.0381755 NT10 NT11 NT11)
|
| 366 |
+
(0.00754256 NT10 NT11 NT12)
|
| 367 |
+
(0.00139213 NT10 NT15 NT02)
|
| 368 |
+
(0.000523505 NT10 NT15 NT06)
|
| 369 |
+
(0.0015256 NT10 NT15 NT10)
|
| 370 |
+
(0.00119525 NT10 NT15 NT12)
|
| 371 |
+
(0.00683524 NT10 NT16 NT02)
|
| 372 |
+
(0.000398591 NT10 NT16 NT04)
|
| 373 |
+
(0.0701558 NT10 NT16 NT07)
|
| 374 |
+
(0.00198721 NT10 NT16 NT11)
|
| 375 |
+
(0.0075364 NT10 NT16 NT12)
|
| 376 |
+
(0.0186618 NT10 n)
|
| 377 |
+
(0.000591828 NT10 uh)
|
| 378 |
+
(0.157827 NT11 NT11 NT11)
|
| 379 |
+
(0.0422576 NT11 NT15 NT11)
|
| 380 |
+
(0.00247895 NT11 NT15 NT16)
|
| 381 |
+
(0.000257833 NT11 dt)
|
| 382 |
+
(0.754818 NT11 cd)
|
| 383 |
+
(0.0421123 NT11 r)
|
| 384 |
+
(0.00236916 NT12 NT01 NT07)
|
| 385 |
+
(0.000118511 NT12 NT02 NT16)
|
| 386 |
+
(0.00638739 NT12 NT04 NT02)
|
| 387 |
+
(0.0055731 NT12 NT04 NT04)
|
| 388 |
+
(0.0340903 NT12 NT04 NT12)
|
| 389 |
+
(0.00102031 NT12 NT04 NT15)
|
| 390 |
+
(0.00143793 NT12 NT04 NT16)
|
| 391 |
+
(0.000102621 NT12 NT04 NT17)
|
| 392 |
+
(0.0032774 NT12 NT06 NT04)
|
| 393 |
+
(0.000366976 NT12 NT07 NT07)
|
| 394 |
+
(0.00218153 NT12 NT07 NT11)
|
| 395 |
+
(0.0117989 NT12 NT11 NT07)
|
| 396 |
+
(0.00303601 NT12 NT12 NT12)
|
| 397 |
+
(0.0747798 NT12 NT13 NT03)
|
| 398 |
+
(0.000232806 NT12 NT15 NT01)
|
| 399 |
+
(0.341016 NT12 NT15 NT02)
|
| 400 |
+
(0.0190932 NT12 NT15 NT06)
|
| 401 |
+
(0.100931 NT12 NT15 NT07)
|
| 402 |
+
(0.193386 NT12 NT15 NT10)
|
| 403 |
+
(0.0142796 NT12 NT15 NT11)
|
| 404 |
+
(0.000915196 NT12 NT16 NT07)
|
| 405 |
+
(0.000299768 NT12 NT16 NT11)
|
| 406 |
+
(0.0135637 NT12 NT16 NT12)
|
| 407 |
+
(0.115493 NT12 n)
|
| 408 |
+
(0.00344871 NT12 v)
|
| 409 |
+
(0.0262404 NT12 punc)
|
| 410 |
+
(0.000493049 NT12 in)
|
| 411 |
+
(0.00235382 NT12 j)
|
| 412 |
+
(0.0192274 NT12 r)
|
| 413 |
+
(0.00199831 NT12 prp)
|
| 414 |
+
(0.000209376 NT13 NT11 NT15)
|
| 415 |
+
(0.00188858 NT13 NT13 NT03)
|
| 416 |
+
(0.540855 NT13 punc)
|
| 417 |
+
(0.00804226 NT13 cc)
|
| 418 |
+
(0.000413617 NT14 NT00 NT09)
|
| 419 |
+
(0.0218326 NT14 NT00 NT14)
|
| 420 |
+
(0.000451496 NT14 NT00 NT18)
|
| 421 |
+
(0.00149459 NT14 NT01 NT07)
|
| 422 |
+
(0.00384046 NT14 NT01 NT17)
|
| 423 |
+
(0.00138254 NT14 NT02 NT09)
|
| 424 |
+
(0.0525259 NT14 NT03 NT14)
|
| 425 |
+
(0.000893974 NT14 NT04 NT02)
|
| 426 |
+
(0.000175088 NT14 NT04 NT06)
|
| 427 |
+
(0.000478859 NT14 NT04 NT08)
|
| 428 |
+
(0.00086439 NT14 NT04 NT09)
|
| 429 |
+
(0.00529624 NT14 NT04 NT10)
|
| 430 |
+
(0.000476852 NT14 NT04 NT12)
|
| 431 |
+
(0.00549502 NT14 NT04 NT14)
|
| 432 |
+
(0.0281873 NT14 NT05 NT14)
|
| 433 |
+
(0.76715 NT14 NT06 NT17)
|
| 434 |
+
(0.00303311 NT14 NT07 NT07)
|
| 435 |
+
(0.00027137 NT14 NT07 NT09)
|
| 436 |
+
(0.000748841 NT14 NT07 NT12)
|
| 437 |
+
(0.0874896 NT14 NT07 NT17)
|
| 438 |
+
(0.00416962 NT14 NT09 NT14)
|
| 439 |
+
(0.00175999 NT14 NT10 NT09)
|
| 440 |
+
(0.000710869 NT14 NT11 NT17)
|
| 441 |
+
(0.000723932 NT14 NT12 NT07)
|
| 442 |
+
(0.00440147 NT14 NT12 NT14)
|
| 443 |
+
(0.000761726 NT14 NT14 NT09)
|
| 444 |
+
(0.00084762 NT14 NT14 NT17)
|
| 445 |
+
(0.000323644 NT14 NT15 NT02)
|
| 446 |
+
(0.00264492 NT14 NT15 NT14)
|
| 447 |
+
(0.000238841 NT14 NT16 NT07)
|
| 448 |
+
(0.000126025 NT14 NT16 NT09)
|
| 449 |
+
(0.000217731 NT14 r)
|
| 450 |
+
(0.00024161 NT14 wrb)
|
| 451 |
+
(0.000366989 NT15 NT04 NT04)
|
| 452 |
+
(0.00127143 NT15 NT04 NT15)
|
| 453 |
+
(0.00137902 NT15 NT11 NT07)
|
| 454 |
+
(0.000109067 NT15 NT15 NT04)
|
| 455 |
+
(0.00380199 NT15 NT15 NT06)
|
| 456 |
+
(0.000193842 NT15 NT15 NT15)
|
| 457 |
+
(0.000253898 NT15 NT15 NT16)
|
| 458 |
+
(0.00556123 NT15 v)
|
| 459 |
+
(0.0798535 NT15 punc)
|
| 460 |
+
(0.557206 NT15 in)
|
| 461 |
+
(0.0519477 NT15 cc)
|
| 462 |
+
(0.170466 NT15 of)
|
| 463 |
+
(0.113587 NT15 to)
|
| 464 |
+
(0.0125211 NT15 wrb)
|
| 465 |
+
(0.00146961 NT15 pdt)
|
| 466 |
+
(0.000682686 NT16 NT01 NT16)
|
| 467 |
+
(0.000353409 NT16 NT02 NT02)
|
| 468 |
+
(0.0034721 NT16 NT02 NT05)
|
| 469 |
+
(0.00392739 NT16 NT04 NT04)
|
| 470 |
+
(0.0225952 NT16 NT04 NT16)
|
| 471 |
+
(0.00368407 NT16 NT05 NT16)
|
| 472 |
+
(0.000275916 NT16 NT06 NT05)
|
| 473 |
+
(0.0263102 NT16 NT07 NT05)
|
| 474 |
+
(0.00344251 NT16 NT07 NT12)
|
| 475 |
+
(0.00271063 NT16 NT07 NT16)
|
| 476 |
+
(0.000950873 NT16 NT10 NT13)
|
| 477 |
+
(0.0229124 NT16 NT11 NT07)
|
| 478 |
+
(0.0173136 NT16 NT11 NT11)
|
| 479 |
+
(0.0094147 NT16 NT11 NT16)
|
| 480 |
+
(0.00210054 NT16 NT13 NT03)
|
| 481 |
+
(0.000417271 NT16 NT15 NT01)
|
| 482 |
+
(0.0100377 NT16 NT15 NT11)
|
| 483 |
+
(0.000679194 NT16 NT16 NT05)
|
| 484 |
+
(0.00203961 NT16 NT16 NT11)
|
| 485 |
+
(0.00352444 NT16 NT16 NT12)
|
| 486 |
+
(0.0133536 NT16 NT16 NT16)
|
| 487 |
+
(0.0041124 NT16 n)
|
| 488 |
+
(0.0518387 NT16 v)
|
| 489 |
+
(0.0133556 NT16 punc)
|
| 490 |
+
(0.746857 NT16 j)
|
| 491 |
+
(0.0325454 NT16 cd)
|
| 492 |
+
(0.000994964 NT16 r)
|
| 493 |
+
(0.000325555 NT17 NT03 NT09)
|
| 494 |
+
(0.000431668 NT17 NT03 NT17)
|
| 495 |
+
(0.000283523 NT17 NT04 NT01)
|
| 496 |
+
(0.00308221 NT17 NT04 NT02)
|
| 497 |
+
(0.000106449 NT17 NT04 NT07)
|
| 498 |
+
(0.584517 NT17 NT04 NT08)
|
| 499 |
+
(0.0389749 NT17 NT04 NT09)
|
| 500 |
+
(0.00927257 NT17 NT04 NT10)
|
| 501 |
+
(0.000698039 NT17 NT04 NT11)
|
| 502 |
+
(0.0594712 NT17 NT04 NT14)
|
| 503 |
+
(0.000381951 NT17 NT04 NT16)
|
| 504 |
+
(0.248255 NT17 NT04 NT17)
|
| 505 |
+
(0.000264379 NT17 NT05 NT08)
|
| 506 |
+
(0.00194384 NT17 NT05 NT10)
|
| 507 |
+
(0.000308808 NT17 NT05 NT14)
|
| 508 |
+
(0.000271388 NT17 NT07 NT08)
|
| 509 |
+
(0.000131093 NT17 NT07 NT10)
|
| 510 |
+
(0.00011195 NT17 NT07 NT17)
|
| 511 |
+
(0.000462643 NT17 NT08 NT09)
|
| 512 |
+
(0.00153331 NT17 NT11 NT07)
|
| 513 |
+
(0.00214335 NT17 NT11 NT11)
|
| 514 |
+
(0.000307068 NT17 NT11 NT12)
|
| 515 |
+
(0.000550528 NT17 NT15 NT10)
|
| 516 |
+
(0.000125644 NT17 NT16 NT02)
|
| 517 |
+
(0.000474489 NT17 NT17 NT09)
|
| 518 |
+
(0.00032483 NT17 NT17 NT18)
|
| 519 |
+
(0.045027 NT17 v)
|
| 520 |
+
(0.00425503 NT18 NT07 NT18)
|
| 521 |
+
(0.978831 NT18 NT13 NT00)
|
| 522 |
+
(0.00130119 NT18 NT13 NT03)
|
| 523 |
+
(0.0155958 NT18 NT17 NT13)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/sec.B.hept.ngrambin
ADDED
|
Binary file (545 Bytes). View file
|
|
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/sec.ts20.quad.ngrambin
ADDED
|
Binary file (34.4 kB). View file
|
|
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/singing-mode.scm
ADDED
|
@@ -0,0 +1,673 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;;
|
| 3 |
+
;;; Festival Singing Mode
|
| 4 |
+
;;;
|
| 5 |
+
;;; Written by Dominic Mazzoni
|
| 6 |
+
;;; Carnegie Mellon University
|
| 7 |
+
;;; 11-752 - "Speech: Phonetics, Prosody, Perception and Synthesis"
|
| 8 |
+
;;; Spring 2001
|
| 9 |
+
;;;
|
| 10 |
+
;;; Extended by Milan Zamazal <pdm@brailcom.org>, 2006:
|
| 11 |
+
;;; - Slur support.
|
| 12 |
+
;;; - Czech support.
|
| 13 |
+
;;; - Some cleanup.
|
| 14 |
+
;;; - Print debugging information only when singing-debug is true.
|
| 15 |
+
;;;
|
| 16 |
+
;;; This code is public domain; anyone may use it freely.
|
| 17 |
+
;;;
|
| 18 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 19 |
+
|
| 20 |
+
(require_module 'rxp)
|
| 21 |
+
|
| 22 |
+
(xml_register_id "-//SINGING//DTD SINGING mark up//EN"
|
| 23 |
+
(path-append xml_dtd_dir "Singing.v0_1.dtd")
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
(xml_register_id "-//SINGING//ENTITIES Added Latin 1 for SINGING//EN"
|
| 27 |
+
(path-append xml_dtd_dir "sable-latin.ent")
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
;; Set this to t to enable debugging messages:
|
| 31 |
+
(defvar singing-debug nil)
|
| 32 |
+
|
| 33 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 34 |
+
;;
|
| 35 |
+
;; XML parsing functions
|
| 36 |
+
;;
|
| 37 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 38 |
+
|
| 39 |
+
;;
|
| 40 |
+
;; singing_xml_targets
|
| 41 |
+
;;
|
| 42 |
+
;; This variable defines the actions that are to be taken when
|
| 43 |
+
;; parsing each of our XML tags: SINGING, PITCH, DURATION, and REST.
|
| 44 |
+
;;
|
| 45 |
+
;; When we get the pitch and duration of each token, we store them
|
| 46 |
+
;; in features of the token. Later our intonation and duration
|
| 47 |
+
;; functions access these features.
|
| 48 |
+
;;
|
| 49 |
+
|
| 50 |
+
(defvar singing_xml_elements
|
| 51 |
+
'(
|
| 52 |
+
("(SINGING" (ATTLIST UTT)
|
| 53 |
+
(set! singing_pitch_att_list nil)
|
| 54 |
+
(set! singing_dur_att_list nil)
|
| 55 |
+
(set! singing_global_time 0.0)
|
| 56 |
+
(set! singing_bpm (get-bpm ATTLIST))
|
| 57 |
+
(set! singing_bps (/ singing_bpm 50.0)) ;; change this back to 60
|
| 58 |
+
nil)
|
| 59 |
+
|
| 60 |
+
(")SINGING" (ATTLIST UTT)
|
| 61 |
+
(xxml_synth UTT) ;; Synthesize the remaining tokens
|
| 62 |
+
nil)
|
| 63 |
+
|
| 64 |
+
("(PITCH" (ATTLIST UTT)
|
| 65 |
+
(set! singing_pitch_att_list ATTLIST)
|
| 66 |
+
UTT)
|
| 67 |
+
|
| 68 |
+
(")PITCH" (ATTLIST UTT)
|
| 69 |
+
(let ((freq (get-freqs singing_pitch_att_list)))
|
| 70 |
+
(if singing-debug
|
| 71 |
+
(begin
|
| 72 |
+
(print "freqs")
|
| 73 |
+
(print freq)))
|
| 74 |
+
(singing-append-feature! UTT 'freq freq))
|
| 75 |
+
UTT)
|
| 76 |
+
|
| 77 |
+
("(DURATION" (ATTLIST UTT)
|
| 78 |
+
(set! singing_dur_att_list ATTLIST)
|
| 79 |
+
UTT)
|
| 80 |
+
|
| 81 |
+
(")DURATION" (ATTLIST UTT)
|
| 82 |
+
(let ((dur (get-durs singing_dur_att_list)))
|
| 83 |
+
(if singing-debug
|
| 84 |
+
(begin
|
| 85 |
+
(print "durs")
|
| 86 |
+
(print dur)))
|
| 87 |
+
(singing-append-feature! UTT 'dur dur))
|
| 88 |
+
UTT)
|
| 89 |
+
|
| 90 |
+
("(REST" (ATTLIST UTT)
|
| 91 |
+
(let ((dur (get-durs ATTLIST)))
|
| 92 |
+
(if singing-debug
|
| 93 |
+
(begin
|
| 94 |
+
(print "rest durs")
|
| 95 |
+
(print dur)))
|
| 96 |
+
(singing-append-feature! UTT 'rest (caar dur)))
|
| 97 |
+
UTT)
|
| 98 |
+
))
|
| 99 |
+
|
| 100 |
+
;;
|
| 101 |
+
;; get-bpm
|
| 102 |
+
;;
|
| 103 |
+
;; Given the attribute list of a SINGING tag, returns the beats
|
| 104 |
+
;; per minute of the song from the BPM parameter.
|
| 105 |
+
;;
|
| 106 |
+
|
| 107 |
+
(define (get-bpm atts)
|
| 108 |
+
(parse-number (car (car (cdr (assoc 'BPM atts))))))
|
| 109 |
+
|
| 110 |
+
;;
|
| 111 |
+
;; get-durs
|
| 112 |
+
;;
|
| 113 |
+
;; Given the attribute list of a DURATION tag, returns a list of
|
| 114 |
+
;; durations in seconds for the syllables of the word enclosed by
|
| 115 |
+
;; this tag.
|
| 116 |
+
;;
|
| 117 |
+
;; It first looks for a BEATS parameter, and converts these to
|
| 118 |
+
;; seconds using BPM, which was set in the SINGING tag. If this
|
| 119 |
+
;; is not present, it looks for the SECONDS parameter.
|
| 120 |
+
;;
|
| 121 |
+
|
| 122 |
+
(define (get-durs atts)
|
| 123 |
+
(let ((seconds (car (car (cdr (assoc 'SECONDS atts)))))
|
| 124 |
+
(beats (car (car (cdr (assoc 'BEATS atts))))))
|
| 125 |
+
(if (equal? beats 'X)
|
| 126 |
+
(mapcar (lambda (lst) (mapcar parse-number lst))
|
| 127 |
+
(string->list seconds))
|
| 128 |
+
(mapcar (lambda (lst)
|
| 129 |
+
(mapcar (lambda (x) (/ (parse-number x) singing_bps)) lst))
|
| 130 |
+
(string->list beats)))))
|
| 131 |
+
|
| 132 |
+
;;
|
| 133 |
+
;; get-freqs
|
| 134 |
+
;;
|
| 135 |
+
;; Given the attribute list of a PITCH tag, returns a list of
|
| 136 |
+
;; frequencies in Hertz for the syllables of the word enclosed by
|
| 137 |
+
;; this tag.
|
| 138 |
+
;;
|
| 139 |
+
;; It first looks for a NOTE parameter, which can contain a MIDI
|
| 140 |
+
;; note of the form "C4", "D#3", or "Ab6", and if this is not
|
| 141 |
+
;; present it looks for the FREQ parameter.
|
| 142 |
+
;;
|
| 143 |
+
|
| 144 |
+
(define (get-freqs atts)
|
| 145 |
+
(let ((freqs (car (car (cdr (assoc 'FREQ atts)))))
|
| 146 |
+
(notes (car (car (cdr (assoc 'NOTE atts))))))
|
| 147 |
+
(if (equal? notes 'X)
|
| 148 |
+
(mapcar (lambda (lst) (mapcar parse-number lst))
|
| 149 |
+
(string->list freqs))
|
| 150 |
+
(mapcar (lambda (lst) (mapcar note->freq lst))
|
| 151 |
+
(string->list notes)))))
|
| 152 |
+
|
| 153 |
+
;;
|
| 154 |
+
;; note->freq
|
| 155 |
+
;;
|
| 156 |
+
;; Converts a string representing a MIDI note such as "C4" and
|
| 157 |
+
;; turns it into a frequency. We use the convention that
|
| 158 |
+
;; A5=440 (some call this note A3).
|
| 159 |
+
;;
|
| 160 |
+
|
| 161 |
+
(define (note->freq note)
|
| 162 |
+
(if singing-debug
|
| 163 |
+
(format t "note is %l\n" note))
|
| 164 |
+
(set! note (format nil "%s" note))
|
| 165 |
+
(if singing-debug
|
| 166 |
+
(print_string note))
|
| 167 |
+
(let (l octave notename midinote thefreq)
|
| 168 |
+
(set! l (string-length note))
|
| 169 |
+
(set! octave (substring note (- l 1) 1))
|
| 170 |
+
(set! notename (substring note 0 (- l 1)))
|
| 171 |
+
(set! midinote (+ (* 12 (parse-number octave))
|
| 172 |
+
(notename->midioffset notename)))
|
| 173 |
+
(set! thefreq (midinote->freq midinote))
|
| 174 |
+
(if singing-debug
|
| 175 |
+
(format t "note %s freq %f\n" note thefreq))
|
| 176 |
+
thefreq))
|
| 177 |
+
|
| 178 |
+
;;
|
| 179 |
+
;; midinote->freq
|
| 180 |
+
;;
|
| 181 |
+
;; Converts a MIDI note number (1 - 127) into a frequency. We use
|
| 182 |
+
;; the convention that 69 = "A5" =440 Hz.
|
| 183 |
+
;;
|
| 184 |
+
|
| 185 |
+
(define (midinote->freq midinote)
|
| 186 |
+
(* 440.0 (pow 2.0 (/ (- midinote 69) 12))))
|
| 187 |
+
|
| 188 |
+
;;
|
| 189 |
+
;; notename->midioffset
|
| 190 |
+
;;
|
| 191 |
+
;; Utility function that looks up the name of a note like "F#" and
|
| 192 |
+
;; returns its offset from C.
|
| 193 |
+
;;
|
| 194 |
+
|
| 195 |
+
(define (notename->midioffset notename)
|
| 196 |
+
(parse-number (car (cdr (assoc_string notename note_names)))))
|
| 197 |
+
|
| 198 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 199 |
+
;;
|
| 200 |
+
;; Pitch modification functions
|
| 201 |
+
;;
|
| 202 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 203 |
+
|
| 204 |
+
;;
|
| 205 |
+
;; singing_f0_targets
|
| 206 |
+
;;
|
| 207 |
+
;; This function replaces the normal intonation function used in
|
| 208 |
+
;; festival. For each syllable, it extracts the frequency that
|
| 209 |
+
;; was calculated from the XML tags and stored in the token this
|
| 210 |
+
;; syllable comes from, and sets this frequency as both the start
|
| 211 |
+
;; and end f0 target. Really straightforward!
|
| 212 |
+
;;
|
| 213 |
+
|
| 214 |
+
(defvar singing-last-f0 nil)
|
| 215 |
+
(define (singing_f0_targets utt syl)
|
| 216 |
+
"(singing_f0_targets utt syl)"
|
| 217 |
+
(let ((start (item.feat syl 'syllable_start))
|
| 218 |
+
(end (item.feat syl 'syllable_end))
|
| 219 |
+
(freqs (mapcar parse-number (syl->freq syl)))
|
| 220 |
+
(durs (syl->durations syl)))
|
| 221 |
+
(let ((total-durs (apply + durs))
|
| 222 |
+
(total-time (- end start))
|
| 223 |
+
(time start)
|
| 224 |
+
(prev-segment (item.prev (item.relation (item.daughter1 (item.relation syl 'SylStructure)) 'Segment)))
|
| 225 |
+
(last-f0 singing-last-f0))
|
| 226 |
+
(if freqs
|
| 227 |
+
(begin
|
| 228 |
+
(set! singing-last-f0 (car (last freqs)))
|
| 229 |
+
(append (if (and last-f0
|
| 230 |
+
prev-segment
|
| 231 |
+
(item.prev prev-segment)
|
| 232 |
+
(string-equal (item.feat prev-segment 'name)
|
| 233 |
+
(car (car (cdr (car (PhoneSet.description '(silences))))))))
|
| 234 |
+
(let ((s (item.feat prev-segment "p.end"))
|
| 235 |
+
(e (item.feat prev-segment "end")))
|
| 236 |
+
(list (list (+ s (* (- e s) 0.8)) last-f0)
|
| 237 |
+
(list (+ s (* (- e s) 0.9)) (car freqs)))))
|
| 238 |
+
(apply append
|
| 239 |
+
(mapcar (lambda (d f)
|
| 240 |
+
(let ((range (* (/ d total-durs) total-time))
|
| 241 |
+
(old-time time))
|
| 242 |
+
(set! time (+ time range))
|
| 243 |
+
(let ((range-fraction (* 0.1 range)))
|
| 244 |
+
(list (list (+ old-time range-fraction) f)
|
| 245 |
+
(list (- time range-fraction) f)))))
|
| 246 |
+
durs freqs))))))))
|
| 247 |
+
|
| 248 |
+
;;
|
| 249 |
+
;; syl->freq
|
| 250 |
+
;;
|
| 251 |
+
;; Given a syllable, looks up the frequency in its token. The token
|
| 252 |
+
;; stores a list of all of the frequencies associated with its
|
| 253 |
+
;; syllables, so this syllable grabs the frequency out of the list
|
| 254 |
+
;; corresponding to its index within the word. (This assumes that
|
| 255 |
+
;; a frequency was given for each syllable, and that a token
|
| 256 |
+
;; corresponds directly to a word. Singing-mode is not guaranteed
|
| 257 |
+
;; to work at all if either of these things are not true.)
|
| 258 |
+
;;
|
| 259 |
+
|
| 260 |
+
(define (syl->freq syl)
|
| 261 |
+
(let ((index (item.feat syl "R:Syllable.pos_in_word"))
|
| 262 |
+
(freqs (singing-feat syl "R:SylStructure.parent.R:Token.parent.freq")))
|
| 263 |
+
(nth index freqs)))
|
| 264 |
+
|
| 265 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 266 |
+
;;
|
| 267 |
+
;; Duration modification functions
|
| 268 |
+
;;
|
| 269 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 270 |
+
|
| 271 |
+
;;
|
| 272 |
+
;; singing_duration_method
|
| 273 |
+
;;
|
| 274 |
+
;; Calculates the duration of each phone in the utterance, in three
|
| 275 |
+
;; passes. Consult the three functions it calls, below, to see what
|
| 276 |
+
;; each one does.
|
| 277 |
+
;;
|
| 278 |
+
|
| 279 |
+
(define (singing_duration_method utt)
|
| 280 |
+
(mapcar singing_adjcons_syllable (utt.relation.items utt 'Syllable))
|
| 281 |
+
(singing_do_initial utt (car (utt.relation.items utt 'Token)))
|
| 282 |
+
(mapcar singing_do_syllable (utt.relation.items utt 'Syllable))
|
| 283 |
+
(mapcar singing_fix_segment (utt.relation.items utt 'Segment))
|
| 284 |
+
utt)
|
| 285 |
+
|
| 286 |
+
;;
|
| 287 |
+
;; singing_adjcons_syllable
|
| 288 |
+
;;
|
| 289 |
+
;; First pass. Looks at the first phone of each syllable and
|
| 290 |
+
;; adjusts the starting time of this syllable such that the
|
| 291 |
+
;; perceived start time of the first phone is at the beginning
|
| 292 |
+
;; of the originally intended start time of the syllable.
|
| 293 |
+
;;
|
| 294 |
+
;; If this is not done, telling it to say the word "ta" at time
|
| 295 |
+
;; 2.0 actually doesn't "sound" like it says the "t" sound until
|
| 296 |
+
;; about 2.1 seconds.
|
| 297 |
+
;;
|
| 298 |
+
;; This function has a little bit of duplicated code from
|
| 299 |
+
;; singing_do_syllable, below - it could be modularized a little
|
| 300 |
+
;; better.
|
| 301 |
+
;;
|
| 302 |
+
|
| 303 |
+
(define (singing_adjcons_syllable syl)
|
| 304 |
+
(let ((totlen (apply + (mapcar (lambda (s)
|
| 305 |
+
(get_avg_duration (item.feat s "name")))
|
| 306 |
+
(item.leafs
|
| 307 |
+
(item.relation syl 'SylStructure)))))
|
| 308 |
+
(syldur (apply + (syl->durations syl)))
|
| 309 |
+
;; figure out the offset of the first phone
|
| 310 |
+
(phone1 (item.daughter1 (item.relation syl 'SylStructure)))
|
| 311 |
+
(prevsyl (item.prev (item.relation syl 'Syllable))))
|
| 312 |
+
(let ((offset (get_duration_offset (item.feat phone1 "name"))))
|
| 313 |
+
(if singing-debug
|
| 314 |
+
(format t "offset: %f\n" offset) )
|
| 315 |
+
(if (< syldur totlen)
|
| 316 |
+
(set! offset (* offset (/ syldur totlen))))
|
| 317 |
+
(if singing-debug
|
| 318 |
+
(format t "Want to adjust syl by %f\n" offset))
|
| 319 |
+
(if prevsyl
|
| 320 |
+
(begin
|
| 321 |
+
(item.set_feat prevsyl 'subtractoffset offset)
|
| 322 |
+
(item.set_feat syl 'addoffset offset))))))
|
| 323 |
+
|
| 324 |
+
;;
|
| 325 |
+
;; singing_do_syllable
|
| 326 |
+
;;
|
| 327 |
+
;; Second pass. For each syllable, adds up the amount of time
|
| 328 |
+
;; that would normally be spent in consonants and vowels, based
|
| 329 |
+
;; on the average durations of these phones. Then, if the
|
| 330 |
+
;; intended length of this syllable is longer than this total,
|
| 331 |
+
;; stretch only the vowels; otherwise shrink all phones
|
| 332 |
+
;; proportionally. This function actually sets the "end" time
|
| 333 |
+
;; of each phone using a global "singing_global_time" variable.
|
| 334 |
+
;;
|
| 335 |
+
;; We also handle rests at this point, which are tagged onto the
|
| 336 |
+
;; end of the previous token.
|
| 337 |
+
;;
|
| 338 |
+
|
| 339 |
+
(defvar singing-max-short-vowel-length 0.11)
|
| 340 |
+
|
| 341 |
+
(define (singing_do_initial utt token)
|
| 342 |
+
(if (equal? (item.name token) "")
|
| 343 |
+
(let ((restlen (car (item.feat token 'rest))))
|
| 344 |
+
(if singing-debug
|
| 345 |
+
(format t "restlen %l\n" restlen))
|
| 346 |
+
(if (> restlen 0)
|
| 347 |
+
(let ((silence (car (car (cdr (assoc 'silences (PhoneSet.description)))))))
|
| 348 |
+
(set! singing_global_time restlen)
|
| 349 |
+
(item.relation.insert (utt.relation.first utt 'Segment) 'Segment
|
| 350 |
+
(list silence (list (list "end" singing_global_time)))
|
| 351 |
+
'before))))))
|
| 352 |
+
|
| 353 |
+
(define (singing_do_syllable syl)
|
| 354 |
+
(let ((conslen 0.0)
|
| 355 |
+
(vowlen 0.0)
|
| 356 |
+
(segments (item.leafs (item.relation syl 'SylStructure))))
|
| 357 |
+
;; if there are no vowels, turn a middle consonant into a vowel;
|
| 358 |
+
;; hopefully this works well for languages where syllables may be
|
| 359 |
+
;; created by some consonants too
|
| 360 |
+
(let ((segments* segments)
|
| 361 |
+
(vowel-found nil))
|
| 362 |
+
(while (and segments* (not vowel-found))
|
| 363 |
+
(if (equal? "+" (item.feat (car segments*) "ph_vc"))
|
| 364 |
+
(set! vowel-found t)
|
| 365 |
+
(set! segments* (cdr segments*))))
|
| 366 |
+
(if (not vowel-found)
|
| 367 |
+
(item.set_feat (nth (nint (/ (- (length segments) 1) 2))
|
| 368 |
+
segments)
|
| 369 |
+
"singing-vc" "+")))
|
| 370 |
+
;; sum up the length of all of the vowels and consonants in
|
| 371 |
+
;; this syllable
|
| 372 |
+
(mapcar (lambda (s)
|
| 373 |
+
(let ((slen (get_avg_duration (item.feat s "name"))))
|
| 374 |
+
(if (or (equal? "+" (item.feat s "ph_vc"))
|
| 375 |
+
(equal? "+" (item.feat s "singing-vc")))
|
| 376 |
+
(set! vowlen (+ vowlen slen))
|
| 377 |
+
(set! conslen (+ conslen slen)))))
|
| 378 |
+
segments)
|
| 379 |
+
(let ((totlen (+ conslen vowlen))
|
| 380 |
+
(syldur (apply + (syl->durations syl)))
|
| 381 |
+
(addoffset (item.feat syl 'addoffset))
|
| 382 |
+
(subtractoffset (item.feat syl 'subtractoffset))
|
| 383 |
+
offset)
|
| 384 |
+
(set! offset (- subtractoffset addoffset))
|
| 385 |
+
(if singing-debug
|
| 386 |
+
(format t "Vowlen: %f conslen: %f totlen: %f\n" vowlen conslen totlen))
|
| 387 |
+
(if (< offset (/ syldur 2.0))
|
| 388 |
+
(begin
|
| 389 |
+
(set! syldur (- syldur offset))
|
| 390 |
+
(if singing-debug
|
| 391 |
+
(format t "Offset: %f\n" offset))))
|
| 392 |
+
(if singing-debug
|
| 393 |
+
(format t "Syldur: %f\n" syldur))
|
| 394 |
+
(if (> totlen syldur)
|
| 395 |
+
;; if the total length of the average durations in the syllable is
|
| 396 |
+
;; greater than the total desired duration of the syllable, stretch
|
| 397 |
+
;; the time proportionally for each phone
|
| 398 |
+
(let ((stretch (/ syldur totlen)))
|
| 399 |
+
(mapcar (lambda (s)
|
| 400 |
+
(let ((slen (* stretch (get_avg_duration (item.feat s "name")))))
|
| 401 |
+
(set! singing_global_time (+ slen singing_global_time))
|
| 402 |
+
(item.set_feat s 'end singing_global_time)))
|
| 403 |
+
(item.leafs (item.relation syl 'SylStructure))))
|
| 404 |
+
;; otherwise, stretch the vowels and not the consonants
|
| 405 |
+
(let ((voweltime (- syldur conslen)))
|
| 406 |
+
(let ((vowelstretch (/ voweltime vowlen))
|
| 407 |
+
(phones (mapcar car (car (cdar (PhoneSet.description '(phones)))))))
|
| 408 |
+
(mapcar (lambda (s)
|
| 409 |
+
(let ((slen (get_avg_duration (item.feat s "name"))))
|
| 410 |
+
(if (or (equal? "+" (item.feat s "ph_vc"))
|
| 411 |
+
(equal? "+" (item.feat s "singing-vc")))
|
| 412 |
+
(begin
|
| 413 |
+
(set! slen (* vowelstretch slen))
|
| 414 |
+
;; If the sound is long enough, better results
|
| 415 |
+
;; may be achieved by using longer versions of
|
| 416 |
+
;; the vowels.
|
| 417 |
+
(if (> slen singing-max-short-vowel-length)
|
| 418 |
+
(let ((sname (string-append (item.feat s "name") ":")))
|
| 419 |
+
(if (member_string sname phones)
|
| 420 |
+
(item.set_feat s "name" sname))))))
|
| 421 |
+
(set! singing_global_time (+ slen singing_global_time))
|
| 422 |
+
(item.set_feat s 'end singing_global_time)))
|
| 423 |
+
segments))))))
|
| 424 |
+
(let ((restlen (car (syl->rest syl))))
|
| 425 |
+
(if singing-debug
|
| 426 |
+
(format t "restlen %l\n" restlen))
|
| 427 |
+
(if (> restlen 0)
|
| 428 |
+
(let ((lastseg (item.daughtern (item.relation syl 'SylStructure)))
|
| 429 |
+
(silence (car (car (cdr (assoc 'silences (PhoneSet.description))))))
|
| 430 |
+
(singing_global_time* singing_global_time))
|
| 431 |
+
(let ((seg (item.relation lastseg 'Segment))
|
| 432 |
+
(extra-pause-length 0.00001))
|
| 433 |
+
(set! singing_global_time (+ restlen singing_global_time))
|
| 434 |
+
(item.insert seg (list silence (list (list "end" singing_global_time))) 'after)
|
| 435 |
+
;; insert a very short extra pause to avoid after-effects, especially
|
| 436 |
+
;; after vowels
|
| 437 |
+
(if (and seg
|
| 438 |
+
(equal? (item.feat seg "ph_vc") "+")
|
| 439 |
+
(< extra-pause-length restlen))
|
| 440 |
+
(item.insert seg (list silence (list (list "end" (+ singing_global_time*
|
| 441 |
+
extra-pause-length))))
|
| 442 |
+
'after)))))))
|
| 443 |
+
|
| 444 |
+
;;
|
| 445 |
+
;; singing_fix_segment
|
| 446 |
+
;;
|
| 447 |
+
;; Third pass. Finds any segments (phones) that we didn't catch earlier
|
| 448 |
+
;; (say if they didn't belong to a syllable, like silence) and sets them
|
| 449 |
+
;; to zero duration
|
| 450 |
+
;;
|
| 451 |
+
|
| 452 |
+
(define (singing_fix_segment seg)
|
| 453 |
+
(if (equal? 0.0 (item.feat seg 'end))
|
| 454 |
+
(if (equal? nil (item.prev seg))
|
| 455 |
+
(item.set_feat seg 'end 0.0)
|
| 456 |
+
(item.set_feat seg 'end (item.feat (item.prev seg) 'end)))
|
| 457 |
+
(if singing-debug
|
| 458 |
+
(format t "segment: %s end: %f\n" (item.name seg) (item.feat seg 'end)))))
|
| 459 |
+
|
| 460 |
+
;; returns the duration of a syllable (stored in its token)
|
| 461 |
+
(define (syl->durations syl)
|
| 462 |
+
(let ((index (item.feat syl "R:Syllable.pos_in_word"))
|
| 463 |
+
(durs (singing-feat syl "R:SylStructure.parent.R:Token.parent.dur")))
|
| 464 |
+
(mapcar parse-number (nth index durs))))
|
| 465 |
+
|
| 466 |
+
;; returns the duration of the rest following a syllable
|
| 467 |
+
(define (syl->rest syl)
|
| 468 |
+
(let ((index (item.feat syl "R:Syllable.pos_in_word"))
|
| 469 |
+
(durs (singing-feat syl "R:SylStructure.parent.R:Token.parent.dur"))
|
| 470 |
+
(pauselen (singing-feat syl "R:SylStructure.parent.R:Token.parent.rest")))
|
| 471 |
+
(if (equal? index (- (length durs) 1))
|
| 472 |
+
(list (or pauselen 0.0))
|
| 473 |
+
(list 0.0))))
|
| 474 |
+
|
| 475 |
+
;; get the average duration of a phone
|
| 476 |
+
(define (get_avg_duration phone)
|
| 477 |
+
(let ((pd (assoc_string phone phoneme_durations)))
|
| 478 |
+
(if pd
|
| 479 |
+
(car (cdr pd))
|
| 480 |
+
0.08)))
|
| 481 |
+
|
| 482 |
+
;; get the duration offset of a phone (see the description above)
|
| 483 |
+
(define (get_duration_offset phone)
|
| 484 |
+
(parse-number (car (cdr (assoc_string phone phoneme_offsets*)))))
|
| 485 |
+
|
| 486 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 487 |
+
;;
|
| 488 |
+
;; Other utility functions
|
| 489 |
+
;;
|
| 490 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 491 |
+
|
| 492 |
+
(define (char-quote string)
|
| 493 |
+
(if (member string '("*" "+" "?" "[" "]" "."))
|
| 494 |
+
(string-append "[" string "]")
|
| 495 |
+
string))
|
| 496 |
+
|
| 497 |
+
(define (split-string string separator)
|
| 498 |
+
(if (string-matches string (string-append ".+" (char-quote separator) ".+"))
|
| 499 |
+
(cons (string-before string separator)
|
| 500 |
+
(split-string (string-after string separator) separator))
|
| 501 |
+
;; We have to convert the weird XML attribute value type to string
|
| 502 |
+
(list (string-append string ""))))
|
| 503 |
+
|
| 504 |
+
(define (string->list string)
|
| 505 |
+
(mapcar (lambda (s) (split-string s "+")) (split-string string ",")))
|
| 506 |
+
|
| 507 |
+
(define (singing-append-feature! utt feature value)
|
| 508 |
+
(let ((tokens (utt.relation.items utt 'Token)))
|
| 509 |
+
(if tokens
|
| 510 |
+
;; we have to wrap value into a list to work around a Festival bug
|
| 511 |
+
(item.set_feat (car (last tokens)) feature (list value))
|
| 512 |
+
(begin
|
| 513 |
+
(utt.relation.append utt 'Token '("" ((name "") (whitespace "")
|
| 514 |
+
(prepunctuation "") (punc ""))))
|
| 515 |
+
(item.set_feat (car (last (utt.relation.items utt 'Token))) feature (list value))))))
|
| 516 |
+
|
| 517 |
+
(define (singing-feat item feature)
|
| 518 |
+
(let ((value (item.feat item feature)))
|
| 519 |
+
(if (equal? value 0)
|
| 520 |
+
nil
|
| 521 |
+
(car value))))
|
| 522 |
+
|
| 523 |
+
(define (current-language)
|
| 524 |
+
(cadr (car (assoc 'language (voice.description current-voice)))))
|
| 525 |
+
|
| 526 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 527 |
+
;;
|
| 528 |
+
;; Initializing and exiting singing mode
|
| 529 |
+
;;
|
| 530 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 531 |
+
|
| 532 |
+
;;
|
| 533 |
+
;; singing_init_func
|
| 534 |
+
;;
|
| 535 |
+
|
| 536 |
+
(defvar singing_previous_eou_tree nil)
|
| 537 |
+
|
| 538 |
+
(define (singing_init_func)
|
| 539 |
+
"(singing_init_func) - Initialization for Singing mode"
|
| 540 |
+
(if (not (symbol-bound? 'phoneme_durations))
|
| 541 |
+
(set! phoneme_durations '()))
|
| 542 |
+
;; use our intonation function
|
| 543 |
+
(Parameter.set 'Int_Method 'General)
|
| 544 |
+
(Parameter.set 'Int_Target_Method Int_Targets_General)
|
| 545 |
+
(set! int_general_params `((targ_func ,singing_f0_targets)))
|
| 546 |
+
(set! singing-last-f0 nil)
|
| 547 |
+
;; use our duration function
|
| 548 |
+
(Parameter.set 'Duration_Method singing_duration_method)
|
| 549 |
+
;; set phoneme corrections for the current language
|
| 550 |
+
(let ((language 'english
|
| 551 |
+
; (cadr (assoc 'language
|
| 552 |
+
; (cadr (voice.description current-voice))))
|
| 553 |
+
))
|
| 554 |
+
(set! phoneme_offsets* (cdr (assoc language phoneme_offsets))))
|
| 555 |
+
;; avoid splitting to multiple utterances with insertion of unwanted pauses
|
| 556 |
+
(set! singing_previous_eou_tree eou_tree)
|
| 557 |
+
(set! eou_tree nil)
|
| 558 |
+
;; use our xml parsing function
|
| 559 |
+
(set! singing_previous_elements xxml_elements)
|
| 560 |
+
(set! xxml_elements singing_xml_elements))
|
| 561 |
+
|
| 562 |
+
;;
|
| 563 |
+
;; singing_exit_func
|
| 564 |
+
;;
|
| 565 |
+
|
| 566 |
+
(define (singing_exit_func)
|
| 567 |
+
"(singing_exit_func) - Exit function for Singing mode"
|
| 568 |
+
(set! eou_tree singing_previous_eou_tree)
|
| 569 |
+
(set! xxml_elements singing_previous_elements))
|
| 570 |
+
|
| 571 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 572 |
+
;;
|
| 573 |
+
;; Data tables
|
| 574 |
+
;;
|
| 575 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 576 |
+
|
| 577 |
+
(defvar note_names
|
| 578 |
+
'((C 0)
|
| 579 |
+
(C# 1)
|
| 580 |
+
(Db 1)
|
| 581 |
+
(D 2)
|
| 582 |
+
(D# 3)
|
| 583 |
+
(Eb 3)
|
| 584 |
+
(E 4)
|
| 585 |
+
(E# 5)
|
| 586 |
+
(Fb 4)
|
| 587 |
+
(F 5)
|
| 588 |
+
(F# 6)
|
| 589 |
+
(Gb 6)
|
| 590 |
+
(G 7)
|
| 591 |
+
(G# 8)
|
| 592 |
+
(Ab 8)
|
| 593 |
+
(A 9)
|
| 594 |
+
(A# 10)
|
| 595 |
+
(Bb 10)
|
| 596 |
+
(B 11)
|
| 597 |
+
(B# 12)
|
| 598 |
+
(Cb 11)))
|
| 599 |
+
|
| 600 |
+
;;
|
| 601 |
+
;; The following list contains the offset into each phone that best
|
| 602 |
+
;; represents the perceptual onset of the phone. This is important
|
| 603 |
+
;; to know to get durations right in singing. For example, if the
|
| 604 |
+
;; offset for "t" is .060, and you want to start a "t" sound at
|
| 605 |
+
;; time 2.0 seconds, you should actually start the phone playing
|
| 606 |
+
;; at time 1.940 seconds in order for it to sound like the onset of
|
| 607 |
+
;; the "t" is really right at 2.0.
|
| 608 |
+
;;
|
| 609 |
+
;; These were derived empically by looking at and listening to the
|
| 610 |
+
;; waveforms of each phone for mwm's voice.
|
| 611 |
+
;;
|
| 612 |
+
|
| 613 |
+
(defvar phoneme_offsets
|
| 614 |
+
`((english (t 0.050)
|
| 615 |
+
(T 0.050)
|
| 616 |
+
(d 0.090)
|
| 617 |
+
(D 0.090)
|
| 618 |
+
(p 0.080)
|
| 619 |
+
(b 0.080)
|
| 620 |
+
(k 0.090)
|
| 621 |
+
(g 0.100)
|
| 622 |
+
(9r 0.050) ;; r
|
| 623 |
+
(l 0.030)
|
| 624 |
+
(f 0.050)
|
| 625 |
+
(v 0.050)
|
| 626 |
+
(s 0.040)
|
| 627 |
+
(S 0.040)
|
| 628 |
+
(z 0.040)
|
| 629 |
+
(Z 0.040)
|
| 630 |
+
(n 0.040)
|
| 631 |
+
(N 0.040)
|
| 632 |
+
(m 0.040)
|
| 633 |
+
(j 0.090)
|
| 634 |
+
(E 0.0)
|
| 635 |
+
(> 0.0)
|
| 636 |
+
(>i 0.0)
|
| 637 |
+
(aI 0.0)
|
| 638 |
+
(& 0.0)
|
| 639 |
+
(3r 0.0)
|
| 640 |
+
(tS 0.0)
|
| 641 |
+
(oU 0.0)
|
| 642 |
+
(aU 0.0)
|
| 643 |
+
(A 0.0)
|
| 644 |
+
(ei 0.0)
|
| 645 |
+
(iU 0.0)
|
| 646 |
+
(U 0.0)
|
| 647 |
+
(@ 0.0)
|
| 648 |
+
(h 0.0)
|
| 649 |
+
(u 0.0)
|
| 650 |
+
(^ 0.0)
|
| 651 |
+
(I 0.0)
|
| 652 |
+
(dZ 0.0)
|
| 653 |
+
(i: 0.0)
|
| 654 |
+
(w 0.0)
|
| 655 |
+
(pau 0.0)
|
| 656 |
+
(brth 0.0)
|
| 657 |
+
(h# 0.0)
|
| 658 |
+
)))
|
| 659 |
+
|
| 660 |
+
(defvar phoneme_offsets* nil)
|
| 661 |
+
|
| 662 |
+
;;
|
| 663 |
+
;; Declare the new mode to Festival
|
| 664 |
+
;;
|
| 665 |
+
|
| 666 |
+
(set! tts_text_modes
|
| 667 |
+
(cons `(singing ;; mode name
|
| 668 |
+
((init_func ,singing_init_func)
|
| 669 |
+
(exit_func ,singing_exit_func)
|
| 670 |
+
(analysis_type xml)))
|
| 671 |
+
tts_text_modes))
|
| 672 |
+
|
| 673 |
+
(provide 'singing-mode)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/siod.scm
ADDED
|
@@ -0,0 +1,638 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 6 |
+
;;; DO NOT EDIT THIS FILE ON PAIN OF MORE PAIN.
|
| 7 |
+
;;;
|
| 8 |
+
;;; The master copy of this file is in ../../speech_tools/lib/siod/siod.scm
|
| 9 |
+
;;; and is copied here at build time.
|
| 10 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
;; SIOD: Scheme In One Defun -*-mode: view-*-
|
| 24 |
+
;;
|
| 25 |
+
;; * COPYRIGHT (c) 1989-1992 BY *
|
| 26 |
+
;; * PARADIGM ASSOCIATES INCORPORATED, CAMBRIDGE, MASSACHUSETTS. *
|
| 27 |
+
;; * See the source file SLIB.C for more information. *
|
| 28 |
+
;;
|
| 29 |
+
;; A fair amount of modifications and tidy up was done by AWB, particularly
|
| 30 |
+
;; * adding documentation strings to all functions/variable
|
| 31 |
+
;; * removing some example functions not relevant to Festival (or siod)
|
| 32 |
+
;; * addition of new functions (require provide etc)
|
| 33 |
+
|
| 34 |
+
;(puts ";; Optional Runtime Library for Release 2.8
|
| 35 |
+
;")
|
| 36 |
+
|
| 37 |
+
(define list
|
| 38 |
+
(lambda n
|
| 39 |
+
"(list A0 A1 ...)
|
| 40 |
+
Return list containing A0 A1 ..."
|
| 41 |
+
n))
|
| 42 |
+
|
| 43 |
+
(define (caar x)
|
| 44 |
+
"(caar X)
|
| 45 |
+
Return the (car (car X))."
|
| 46 |
+
(car (car x)))
|
| 47 |
+
(define (cadr x)
|
| 48 |
+
"(cadr X)
|
| 49 |
+
Return the (car (cdr X))."
|
| 50 |
+
(car (cdr x)))
|
| 51 |
+
(define (cdar x)
|
| 52 |
+
"(cdar X)
|
| 53 |
+
Return the (cdr (car X))."
|
| 54 |
+
(cdr (car x)))
|
| 55 |
+
(define (cddr x)
|
| 56 |
+
"(cddr X)
|
| 57 |
+
Return the (cdr (cdr X))."
|
| 58 |
+
(cdr (cdr x)))
|
| 59 |
+
|
| 60 |
+
(define (caddr x)
|
| 61 |
+
"(caddr X)
|
| 62 |
+
Return the (car (cdr (cdr X)))."
|
| 63 |
+
(car (cdr (cdr x))))
|
| 64 |
+
(define (cdddr x)
|
| 65 |
+
"(cdddr X)
|
| 66 |
+
Return the (cdr (cdr (cdr X)))."
|
| 67 |
+
(cdr (cdr (cdr x))))
|
| 68 |
+
|
| 69 |
+
(define consp pair?)
|
| 70 |
+
|
| 71 |
+
(define (replace before after)
|
| 72 |
+
"(replace BEFORE AFTER)
|
| 73 |
+
Destructively replace contents of cons cell BEFORE with those of
|
| 74 |
+
AFTER."
|
| 75 |
+
(set-car! before (car after))
|
| 76 |
+
(set-cdr! before (cdr after))
|
| 77 |
+
after)
|
| 78 |
+
|
| 79 |
+
(define (prognify forms)
|
| 80 |
+
(if (null? (cdr forms))
|
| 81 |
+
(car forms)
|
| 82 |
+
(cons 'begin forms)))
|
| 83 |
+
|
| 84 |
+
(define (defmac-macro form)
|
| 85 |
+
"(defmac-macro MACRONAME FORM)
|
| 86 |
+
Define a macro. Macro expand FORM in-line."
|
| 87 |
+
(let ((sname (car (cadr form)))
|
| 88 |
+
(argl (cdr (cadr form)))
|
| 89 |
+
(fname nil)
|
| 90 |
+
(body (prognify (cddr form))))
|
| 91 |
+
(set! fname (symbolconc sname '-macro))
|
| 92 |
+
(list 'begin
|
| 93 |
+
(list 'define (cons fname argl)
|
| 94 |
+
(list 'replace (car argl) body))
|
| 95 |
+
(list 'define sname (list 'quote fname)))))
|
| 96 |
+
|
| 97 |
+
(define defmac 'defmac-macro)
|
| 98 |
+
|
| 99 |
+
(defmac (push form)
|
| 100 |
+
(list 'set! (caddr form)
|
| 101 |
+
(list 'cons (cadr form) (caddr form))))
|
| 102 |
+
|
| 103 |
+
(defmac (pop form)
|
| 104 |
+
(list 'let (list (list 'tmp (cadr form)))
|
| 105 |
+
(list 'set! (cadr form) '(cdr tmp))
|
| 106 |
+
'(car tmp)))
|
| 107 |
+
|
| 108 |
+
;;; Have to set var-docstrings to nil first as defvar requires it to be set
|
| 109 |
+
(set! var-docstrings nil)
|
| 110 |
+
(define (add-doc-var varname docstring)
|
| 111 |
+
"(add-doc-var VARNAME DOCSTRING)
|
| 112 |
+
Add document string DOCSTRING to VARNAME. If DOCSTRING is nil
|
| 113 |
+
this has no effect. If VARNAME already has a document string replace
|
| 114 |
+
it with DOCSTRING."
|
| 115 |
+
(if (null? docstring)
|
| 116 |
+
t
|
| 117 |
+
(let ((lpair (assq varname var-docstrings)))
|
| 118 |
+
(if lpair
|
| 119 |
+
(set-cdr! lpair docstring)
|
| 120 |
+
(set! var-docstrings (cons (cons varname docstring)
|
| 121 |
+
var-docstrings))))))
|
| 122 |
+
|
| 123 |
+
(set! boundp symbol-bound?)
|
| 124 |
+
|
| 125 |
+
(defmac (defvar form)
|
| 126 |
+
(begin ;; always add the documentation string
|
| 127 |
+
(add-doc-var (cadr form) (car (cdddr form)))
|
| 128 |
+
(list 'or
|
| 129 |
+
(list 'symbol-bound? (list 'quote (cadr form)))
|
| 130 |
+
(list 'define (cadr form) (caddr form)))))
|
| 131 |
+
|
| 132 |
+
(defvar var-docstrings nil
|
| 133 |
+
"var-docstrings
|
| 134 |
+
An assoc-list of variable names and their documentation strings.")
|
| 135 |
+
|
| 136 |
+
(defmac (defun form)
|
| 137 |
+
(cons 'define
|
| 138 |
+
(cons (cons (cadr form) (caddr form))
|
| 139 |
+
(cdddr form))))
|
| 140 |
+
|
| 141 |
+
(defmac (setq form)
|
| 142 |
+
(let ((l (cdr form))
|
| 143 |
+
(result nil))
|
| 144 |
+
(define (loop)
|
| 145 |
+
(if l
|
| 146 |
+
(begin (push (list 'set! (car l) (cadr l)) result)
|
| 147 |
+
(set! l (cddr l))
|
| 148 |
+
(loop))))
|
| 149 |
+
(loop)
|
| 150 |
+
(prognify (reverse result))))
|
| 151 |
+
|
| 152 |
+
(define progn begin)
|
| 153 |
+
|
| 154 |
+
(defun atom (x)
|
| 155 |
+
"(atom X)
|
| 156 |
+
True if X is not a cons cells, nil otherwise."
|
| 157 |
+
(not (consp x)))
|
| 158 |
+
|
| 159 |
+
(define eq eq?)
|
| 160 |
+
|
| 161 |
+
(defmac (cond form)
|
| 162 |
+
(cond-convert (cdr form)))
|
| 163 |
+
|
| 164 |
+
(define null null?)
|
| 165 |
+
|
| 166 |
+
(defun cond-convert (l)
|
| 167 |
+
(if (null l)
|
| 168 |
+
()
|
| 169 |
+
(if (null (cdar l))
|
| 170 |
+
(if (null (cdr l))
|
| 171 |
+
(caar l)
|
| 172 |
+
(let ((rest (cond-convert (cdr l))))
|
| 173 |
+
(if (and (consp rest) (eq (car rest) 'or))
|
| 174 |
+
(cons 'or (cons (caar l) (cdr rest)))
|
| 175 |
+
(list 'or (caar l) rest))))
|
| 176 |
+
(if (or (eq (caar l) 't)
|
| 177 |
+
(and (consp (caar l)) (eq (car (caar l)) 'quote)))
|
| 178 |
+
(prognify (cdar l))
|
| 179 |
+
(list 'if
|
| 180 |
+
(caar l)
|
| 181 |
+
(prognify (cdar l))
|
| 182 |
+
(cond-convert (cdr l)))))))
|
| 183 |
+
|
| 184 |
+
(defmac (+internal-comma form)
|
| 185 |
+
(error 'comma-not-inside-backquote))
|
| 186 |
+
|
| 187 |
+
(define +internal-comma-atsign +internal-comma)
|
| 188 |
+
(define +internal-comma-dot +internal-comma)
|
| 189 |
+
|
| 190 |
+
(defmac (+internal-backquote form)
|
| 191 |
+
(backquotify (cdr form)))
|
| 192 |
+
|
| 193 |
+
(defun backquotify (x)
|
| 194 |
+
"(backquote FORM)
|
| 195 |
+
Backquote function for expanding forms in macros."
|
| 196 |
+
(let (a d aa ad dqp)
|
| 197 |
+
(cond ((atom x) (list 'quote x))
|
| 198 |
+
((eq (car x) '+internal-comma) (cdr x))
|
| 199 |
+
((or (atom (car x))
|
| 200 |
+
(not (or (eq (caar x) '+internal-comma-atsign)
|
| 201 |
+
(eq (caar x) '+internal-comma-dot))))
|
| 202 |
+
(setq a (backquotify (car x)) d (backquotify (cdr x))
|
| 203 |
+
ad (atom d) aa (atom a)
|
| 204 |
+
dqp (and (not ad) (eq (car d) 'quote)))
|
| 205 |
+
(cond ((and dqp (not (atom a)) (eq (car a) 'quote))
|
| 206 |
+
(list 'quote (cons (cadr a) (cadr d))))
|
| 207 |
+
((and dqp (null (cadr d)))
|
| 208 |
+
(list 'list a))
|
| 209 |
+
((and (not ad) (eq (car d) 'list))
|
| 210 |
+
(cons 'list (cons a (cdr d))))
|
| 211 |
+
(t (list 'cons a d))))
|
| 212 |
+
((eq (caar x) '+internal-comma-atsign)
|
| 213 |
+
(list 'append (cdar x) (backquotify (cdr x))))
|
| 214 |
+
((eq (caar x) '+internal-comma-dot)
|
| 215 |
+
(list 'nconc (cdar x)(backquotify (cdr x)))))))
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
(defun append n
|
| 219 |
+
"(append L0 L1 ...)
|
| 220 |
+
Append each list to the first list in turn."
|
| 221 |
+
(appendl n))
|
| 222 |
+
|
| 223 |
+
(defun appendl (l)
|
| 224 |
+
(cond ((null l) nil)
|
| 225 |
+
((null (cdr l)) (car l))
|
| 226 |
+
((null (cddr l))
|
| 227 |
+
(append2 (car l) (cadr l)))
|
| 228 |
+
('else
|
| 229 |
+
(append2 (car l) (appendl (cdr l))))))
|
| 230 |
+
|
| 231 |
+
(defun append2 (a b)
|
| 232 |
+
(if (null a)
|
| 233 |
+
b
|
| 234 |
+
(begin
|
| 235 |
+
(let ((x (reverse a))
|
| 236 |
+
(y b))
|
| 237 |
+
(while x
|
| 238 |
+
(set! y (cons (car x) y))
|
| 239 |
+
(set! x (cdr x)))
|
| 240 |
+
y))))
|
| 241 |
+
|
| 242 |
+
(defun rplacd (a b)
|
| 243 |
+
"(replacd A B)
|
| 244 |
+
Destructively replace the cdr of A with B."
|
| 245 |
+
(set-cdr! a b)
|
| 246 |
+
a)
|
| 247 |
+
|
| 248 |
+
(defun nconc (a b)
|
| 249 |
+
"(nconc A B)
|
| 250 |
+
Destructively append B to A, if A is nil return B."
|
| 251 |
+
(if (null a)
|
| 252 |
+
b
|
| 253 |
+
(rplacd (last a) b)))
|
| 254 |
+
|
| 255 |
+
(defun last (a)
|
| 256 |
+
"(last A)
|
| 257 |
+
Last (cdr) element in list A."
|
| 258 |
+
(cond ((null a) (error'null-arg-to-last))
|
| 259 |
+
((null (cdr a)) a)
|
| 260 |
+
((last (cdr a)))))
|
| 261 |
+
|
| 262 |
+
(define (remove i l)
|
| 263 |
+
"(remove ITEM LIST)
|
| 264 |
+
(Non-destructively) remove ITEM from LIST."
|
| 265 |
+
(cond
|
| 266 |
+
((null l) nil)
|
| 267 |
+
((eq? i (car l))
|
| 268 |
+
(cdr l))
|
| 269 |
+
(t
|
| 270 |
+
(cons (car l) (remove i (cdr l))))))
|
| 271 |
+
|
| 272 |
+
(define (remove-duplicates l)
|
| 273 |
+
"(remove-duplicates LIST)
|
| 274 |
+
Remove duplicate items in LIST."
|
| 275 |
+
(cond
|
| 276 |
+
((null l) l)
|
| 277 |
+
((member_string (car l) (cdr l))
|
| 278 |
+
(remove-duplicates (cdr l)))
|
| 279 |
+
(t
|
| 280 |
+
(cons (car l) (remove-duplicates (cdr l))))))
|
| 281 |
+
|
| 282 |
+
(define (nth n l)
|
| 283 |
+
"(nth N LIST)
|
| 284 |
+
Returns nth car of LIST, 0 is car."
|
| 285 |
+
(if (< n 1)
|
| 286 |
+
(car l)
|
| 287 |
+
(nth (- n 1) (cdr l))))
|
| 288 |
+
|
| 289 |
+
(define (position thing l)
|
| 290 |
+
"(position thing l)
|
| 291 |
+
What position is thing in l, -1 if it doesn't exist."
|
| 292 |
+
(let ((p 0) (m l))
|
| 293 |
+
(while (and m (not (equal? thing (car m))))
|
| 294 |
+
(set! p (+ 1 p))
|
| 295 |
+
(set! m (cdr m)))
|
| 296 |
+
(if m p nil)))
|
| 297 |
+
|
| 298 |
+
(define (nth_cdr n l)
|
| 299 |
+
"(nth_cdr N LIST)
|
| 300 |
+
Returns nth cdr of LIST, 0 is LIST."
|
| 301 |
+
(if (< n 1)
|
| 302 |
+
l
|
| 303 |
+
(nth_cdr (- n 1) (cdr l))))
|
| 304 |
+
|
| 305 |
+
(define (<= a b)
|
| 306 |
+
"(<= NUM1 NUM2)
|
| 307 |
+
Returns t if NUM1 is less than or equal to NUM2, nil otherwise. An error is
|
| 308 |
+
given is either argument is not a number."
|
| 309 |
+
(or (< a b)
|
| 310 |
+
(equal? a b)))
|
| 311 |
+
|
| 312 |
+
(define (>= a b)
|
| 313 |
+
"(>= NUM1 NUM2)
|
| 314 |
+
Returns t if NUM1 is greater than or equal to NUM2, nil otherwise.
|
| 315 |
+
An error is given is either argument is not a number."
|
| 316 |
+
(or (> a b)
|
| 317 |
+
(equal? a b)))
|
| 318 |
+
|
| 319 |
+
(define (approx-equal? a b diff)
|
| 320 |
+
"(approx-equal? a b diff)
|
| 321 |
+
True is the difference between a b is less than diff. This allows equality
|
| 322 |
+
between floats which may have been written out and read in and hence have
|
| 323 |
+
slightly different precision."
|
| 324 |
+
(< (if (> a b) (- a b) (- b a)) diff))
|
| 325 |
+
|
| 326 |
+
(define (assoc_string key alist)
|
| 327 |
+
"(assoc_string key alist)
|
| 328 |
+
Look up key in alist using string-equal. This allow indexing by
|
| 329 |
+
string rather than just symbols."
|
| 330 |
+
(cond
|
| 331 |
+
((null alist) nil)
|
| 332 |
+
((string-equal key (car (car alist))) (car alist))
|
| 333 |
+
(t (assoc_string key (cdr alist))))
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
(defvar *fasdump-hash* t)
|
| 337 |
+
|
| 338 |
+
(defun fasl-open (filename mode)
|
| 339 |
+
"(fasl-open FILENAME MODE)
|
| 340 |
+
Open fasl FILENAME as MODE. Returns a fasl-table."
|
| 341 |
+
(list (fopen filename mode)
|
| 342 |
+
(if (or (equal? mode "rb") *fasdump-hash*)
|
| 343 |
+
(cons-array 100))
|
| 344 |
+
;; If this is set NIL, then already hashed symbols will be
|
| 345 |
+
;; optimized, and additional ones will not.
|
| 346 |
+
0))
|
| 347 |
+
|
| 348 |
+
(defun fasl-close (table)
|
| 349 |
+
"(fasl-close TABLE)
|
| 350 |
+
Close fasl table."
|
| 351 |
+
(fclose (car table)))
|
| 352 |
+
|
| 353 |
+
(defun fasload args
|
| 354 |
+
"(fasload FILENAME ARGS)
|
| 355 |
+
Fast load FILENAME."
|
| 356 |
+
(let ((filename (car args))
|
| 357 |
+
(head (and (cadr args) (cons nil nil))))
|
| 358 |
+
(let ((table (fasl-open filename "rb"))
|
| 359 |
+
(exp)
|
| 360 |
+
(tail head))
|
| 361 |
+
(while (not (eq table (setq exp (fast-read table))))
|
| 362 |
+
(cond (head
|
| 363 |
+
(setq exp (cons exp nil))
|
| 364 |
+
(set-cdr! tail exp)
|
| 365 |
+
(setq tail exp))
|
| 366 |
+
('else
|
| 367 |
+
(eval exp))))
|
| 368 |
+
(fasl-close table)
|
| 369 |
+
(and head (cdr head)))))
|
| 370 |
+
|
| 371 |
+
(defun fasdump (filename forms)
|
| 372 |
+
"(fasdump FILENAME FORMS)
|
| 373 |
+
Fast dump FORMS into FILENAME."
|
| 374 |
+
(let ((table (fasl-open filename "wb"))
|
| 375 |
+
(l forms))
|
| 376 |
+
(while l
|
| 377 |
+
(fast-print (car l) table)
|
| 378 |
+
(set! l (cdr l)))
|
| 379 |
+
(fasl-close table)))
|
| 380 |
+
|
| 381 |
+
(defun compile-file (filename)
|
| 382 |
+
"(compile-file FILENAME)
|
| 383 |
+
Compile lisp forms in FILENAME.scm to FILENAME.bin."
|
| 384 |
+
(let ((forms (load (string-append filename ".scm") t)))
|
| 385 |
+
(puts "Saving forms
|
| 386 |
+
")
|
| 387 |
+
(fasdump (string-append filename ".bin")
|
| 388 |
+
forms)))
|
| 389 |
+
|
| 390 |
+
(defvar *properties* (cons-array 100)
|
| 391 |
+
"*properties*
|
| 392 |
+
Array for holding symbol property lists.")
|
| 393 |
+
|
| 394 |
+
(defun get (sym key)
|
| 395 |
+
"(get SYM KEY)
|
| 396 |
+
Get property named KEY for SYM."
|
| 397 |
+
(cdr (assq key (href *properties* sym))))
|
| 398 |
+
|
| 399 |
+
(defun putprop (sym val key)
|
| 400 |
+
"(putprop SYM VAL KEY)
|
| 401 |
+
Put property VAL named KEY for SYM."
|
| 402 |
+
(let ((alist (href *properties* sym)))
|
| 403 |
+
(let ((cell (assq key alist)))
|
| 404 |
+
(cond (cell
|
| 405 |
+
(set-cdr! cell val))
|
| 406 |
+
('else
|
| 407 |
+
(hset *properties* sym (cons (cons key val) alist))
|
| 408 |
+
val)))))
|
| 409 |
+
|
| 410 |
+
;;(define (mapcar1 f l1)
|
| 411 |
+
;; (and l1 (cons (f (car l1)) (mapcar1 f (cdr l1)))))
|
| 412 |
+
|
| 413 |
+
;; An iterative version of the above
|
| 414 |
+
(define (mapcar1 f l1)
|
| 415 |
+
(let ((l2 l1) (r nil))
|
| 416 |
+
(while l2
|
| 417 |
+
(set! r (cons (f (car l2)) r))
|
| 418 |
+
(set! l2 (cdr l2)))
|
| 419 |
+
(reverse r)))
|
| 420 |
+
|
| 421 |
+
;;(define (mapcar2 f l1 l2)
|
| 422 |
+
;; (and l1 l2 (cons (f (car l1) (car l2)) (mapcar2 f (cdr l1) (cdr l2)))))
|
| 423 |
+
|
| 424 |
+
;; An iterative version
|
| 425 |
+
(define (mapcar2 f l1 l2)
|
| 426 |
+
(let ((a1 l1) (a2 l2) (r nil))
|
| 427 |
+
(while a1
|
| 428 |
+
(set! r (cons (f (car a1) (car a2)) r))
|
| 429 |
+
(set! a1 (cdr a1))
|
| 430 |
+
(set! a2 (cdr a2)))
|
| 431 |
+
(reverse r)))
|
| 432 |
+
|
| 433 |
+
(define (mapcar . args)
|
| 434 |
+
"(mapcar FUNCTION ARGS [ARGS2])
|
| 435 |
+
Apply FUNCTION to each member of ARGS (and [ARGS2]), returning list of
|
| 436 |
+
return values."
|
| 437 |
+
(cond ((null args)
|
| 438 |
+
(error "too few arguments"))
|
| 439 |
+
((null (cdr args))
|
| 440 |
+
(error "too few arguments"))
|
| 441 |
+
((null (cdr (cdr args)))
|
| 442 |
+
(mapcar1 (car args) (car (cdr args))))
|
| 443 |
+
((null (cdr (cdr (cdr args))))
|
| 444 |
+
(mapcar2 (car args) (car (cdr args)) (car (cdr (cdr args)))))
|
| 445 |
+
('else
|
| 446 |
+
(error "two many arguments"))))
|
| 447 |
+
|
| 448 |
+
;; will be set automatically on start-up
|
| 449 |
+
(defvar libdir '<automatically_set>
|
| 450 |
+
"libdir
|
| 451 |
+
The pathname of the run-time libary directory. Note reseting is
|
| 452 |
+
almost definitely not what you want to do. This value is automatically
|
| 453 |
+
set at start up from the value specifed at compile-time or the value
|
| 454 |
+
specifed with --libdir on the command line. A number of other variables
|
| 455 |
+
depend on this value.")
|
| 456 |
+
|
| 457 |
+
(defvar load-path (list libdir)
|
| 458 |
+
"load-path
|
| 459 |
+
A list of directories containing .scm files. Used for various functions
|
| 460 |
+
such as load_library and require. Follows the same use as EMACS. By
|
| 461 |
+
default it is set up to the compile-time library directory but may be
|
| 462 |
+
changed by the user at run time, by adding a user's own library directory
|
| 463 |
+
or even replacing all of the standard library. [see Site initialization]")
|
| 464 |
+
|
| 465 |
+
;; will be set automatically on start-up
|
| 466 |
+
(defvar *ostype* 'unknown
|
| 467 |
+
"*ostype*
|
| 468 |
+
Contains the name of the operating system type that Festival is running
|
| 469 |
+
on, e.g. SunOS5, FreeBSD, linux etc. The value is taken from the Makefile
|
| 470 |
+
variable OSTYPE at compile time.")
|
| 471 |
+
|
| 472 |
+
(defvar etc-path (path-append libdir "etc/" *ostype*)
|
| 473 |
+
"etc-path
|
| 474 |
+
A list of directories where binaries specific to Festival may be located.
|
| 475 |
+
This variable is automatically set to LIBDIR/etc/OSTYPE/
|
| 476 |
+
and that path is added to the end of the UNIX PATH environment variable.")
|
| 477 |
+
|
| 478 |
+
(define (library_expand_filename fname)
|
| 479 |
+
"(library_expand_filename FILENAME)
|
| 480 |
+
Search for filename by appending FILENAME to each member of load-path.
|
| 481 |
+
Full expanded pathname is returned. If not found in load-path FILENAME
|
| 482 |
+
is returned."
|
| 483 |
+
(let ((p load-path)
|
| 484 |
+
(found nil))
|
| 485 |
+
(while (and p (null? found))
|
| 486 |
+
(let ((pot-file (path-append (car p) fname)))
|
| 487 |
+
(if (probe_file pot-file)
|
| 488 |
+
(setq found pot-file))
|
| 489 |
+
(setq p (cdr p))))
|
| 490 |
+
(if (null? found)
|
| 491 |
+
fname
|
| 492 |
+
found)))
|
| 493 |
+
|
| 494 |
+
(define (load_library fname)
|
| 495 |
+
"(load_library FILENAME)
|
| 496 |
+
Load file from library, appends FILENAME to each path in load-path
|
| 497 |
+
until a valid file is found. If none found loads name itself"
|
| 498 |
+
(load (library_expand_filename fname)))
|
| 499 |
+
|
| 500 |
+
(define (fasload_library fname)
|
| 501 |
+
"(fasload_library FILENAME)
|
| 502 |
+
Load binary file from library"
|
| 503 |
+
(fasload (library_expand_filename fname)))
|
| 504 |
+
|
| 505 |
+
(define (member item list)
|
| 506 |
+
"(member ITEM LIST)
|
| 507 |
+
Returns subset of LIST whose car is ITEM if it exists, nil otherwise."
|
| 508 |
+
(if (consp list)
|
| 509 |
+
(if (equal? item (car list))
|
| 510 |
+
list
|
| 511 |
+
(member item (cdr list)))
|
| 512 |
+
nil))
|
| 513 |
+
|
| 514 |
+
(define (member_string item list)
|
| 515 |
+
"(member_string STRING LIST)
|
| 516 |
+
Returns subset of LIST whose car is STRING if it exists, nil otherwise."
|
| 517 |
+
(if (consp list)
|
| 518 |
+
(if (string-equal item (car list))
|
| 519 |
+
list
|
| 520 |
+
(member_string item (cdr list)))
|
| 521 |
+
nil))
|
| 522 |
+
|
| 523 |
+
(defvar provided nil
|
| 524 |
+
"provided
|
| 525 |
+
List of file names (omitting .scm) that have been provided. This list
|
| 526 |
+
is checked by the require function to find out if a file needs to be
|
| 527 |
+
loaded. If that file is already in this list it is not loaded. Typically
|
| 528 |
+
a file will have (provide 'MYNAME) at its end so that a call to
|
| 529 |
+
(require 'MYNAME) will only load MYNAME.scm once.")
|
| 530 |
+
|
| 531 |
+
(define (require fname)
|
| 532 |
+
"(require FILENAME)
|
| 533 |
+
Checks if FNAME is already provided (member of variable provided) if not
|
| 534 |
+
loads it, appending \".scm\" to FILENAME. Uses load_library to find
|
| 535 |
+
the file."
|
| 536 |
+
(let ((bname (intern (basename fname))))
|
| 537 |
+
(if (null? (member bname provided))
|
| 538 |
+
(progn
|
| 539 |
+
;;; Compiled files aren't faster, so we don't do this
|
| 540 |
+
; (fasload_library (string-append fname ".bin"))
|
| 541 |
+
(load_library (string-append fname ".scm"))
|
| 542 |
+
't)
|
| 543 |
+
nil)))
|
| 544 |
+
|
| 545 |
+
(define (request fname)
|
| 546 |
+
"(request FILENAME)
|
| 547 |
+
Checks if FNAME is already provided (member of variable provided) if not
|
| 548 |
+
tries to loads it, appending \".scm\" to FILENAME. Uses load_library
|
| 549 |
+
to find the file. Unlike require, fname isn't found no error occurs"
|
| 550 |
+
(unwind-protect (require fname)))
|
| 551 |
+
|
| 552 |
+
(define (provide fname)
|
| 553 |
+
"(provide FILENAME)
|
| 554 |
+
Adds FNAME to the variable provided (if not already there). This means
|
| 555 |
+
that future calls to (require FILENAME) will not cause FILENAME to
|
| 556 |
+
be re-loaded."
|
| 557 |
+
(if (null? (member fname provided))
|
| 558 |
+
(set! provided (cons fname provided))))
|
| 559 |
+
|
| 560 |
+
(define (apply_hooks hooks obj)
|
| 561 |
+
"(apply_hooks HOOK OBJ)
|
| 562 |
+
Apply HOOK(s) to OBJ. HOOK is a function or list of functions that
|
| 563 |
+
take one argument."
|
| 564 |
+
(cond
|
| 565 |
+
((null? hooks) obj)
|
| 566 |
+
((consp hooks)
|
| 567 |
+
(apply_hooks (cdr hooks) ((car hooks) obj)))
|
| 568 |
+
(t (hooks obj))))
|
| 569 |
+
|
| 570 |
+
(define (apply func args)
|
| 571 |
+
"(apply FUNC ARGS)
|
| 572 |
+
Call FUNC with ARGS as arguments."
|
| 573 |
+
(eval
|
| 574 |
+
(cons func
|
| 575 |
+
(mapcar (lambda (a) (list 'quote a)) args))))
|
| 576 |
+
|
| 577 |
+
(defmac (autoload form)
|
| 578 |
+
"(autoload FUNCTION FILENAME DOCSTRING)
|
| 579 |
+
Define FUNCTION that when called automatically loads FILENAME
|
| 580 |
+
and calls FUNCTION (assumed to be defined in FILENAME)."
|
| 581 |
+
(list 'define
|
| 582 |
+
(cadr form)
|
| 583 |
+
(list
|
| 584 |
+
'lambda
|
| 585 |
+
'n
|
| 586 |
+
(list 'let (list (list 'me (cadr form)))
|
| 587 |
+
(list 'require (car (cdr (cdr form))))
|
| 588 |
+
(list 'if (list 'eq 'me (cadr form))
|
| 589 |
+
(list 'error
|
| 590 |
+
(list 'string-append
|
| 591 |
+
"autoload: \""
|
| 592 |
+
(car (cdr (cdr form)))
|
| 593 |
+
".scm\" does not define "
|
| 594 |
+
(list 'quote (cadr form)))))
|
| 595 |
+
|
| 596 |
+
(list 'apply (cadr form) 'n)))))
|
| 597 |
+
|
| 598 |
+
(define (:backtrace frame)
|
| 599 |
+
"(:backtrace [FRAME])
|
| 600 |
+
This function called *immediately* after an error will display a backtrace
|
| 601 |
+
of the functions evaluated before the error. With no arguments it
|
| 602 |
+
lists all stack frames, with the (possibly shortened) forms that were
|
| 603 |
+
evaluated at that level. With a numeric argument it displays
|
| 604 |
+
the form at that level in full. This function only works at
|
| 605 |
+
top level in the read-eval-print loop (command interpreter). Note
|
| 606 |
+
that any valid command will leave the backtrace stack empty. Also
|
| 607 |
+
note that backtrace itself does not reset the backtrace, unless you
|
| 608 |
+
make an error in calling it."
|
| 609 |
+
|
| 610 |
+
"The function is interpreted specially by the read-eval-interpreter
|
| 611 |
+
and hence has no body, its actual body is defined in
|
| 612 |
+
src/arch/siod-3.0/slib.cc."
|
| 613 |
+
)
|
| 614 |
+
|
| 615 |
+
(defvar hush_startup nil
|
| 616 |
+
"hush_startup
|
| 617 |
+
If set to non-nil, the copyright banner is not displayed at start up.")
|
| 618 |
+
|
| 619 |
+
(defvar editline_histsize 256
|
| 620 |
+
"editline_histsize
|
| 621 |
+
The number of lines to be saved in the users history file when a
|
| 622 |
+
Festival session ends. The histfile is \".festival_history\" in the
|
| 623 |
+
users home directory. Note this value is only checked when the
|
| 624 |
+
command interpreter is started, hence this should be set in a user's
|
| 625 |
+
\".festivalrc\" or system init file. Reseting it at the command
|
| 626 |
+
interpreter will have no effect.")
|
| 627 |
+
|
| 628 |
+
(defvar editline_no_echo (getenv "EMACS")
|
| 629 |
+
"editline_no_echo
|
| 630 |
+
When running under Emacs as an inferior process, we don't want to
|
| 631 |
+
echo the content of the line, only the prompt.")
|
| 632 |
+
|
| 633 |
+
(defvar ! nil
|
| 634 |
+
"!
|
| 635 |
+
In interactive mode, this variable's value is the return value of the
|
| 636 |
+
previously evaluated expression.")
|
| 637 |
+
|
| 638 |
+
(provide 'siod)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/siteinit.scm
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Site specific initialisation file
|
| 35 |
+
;;;
|
| 36 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 37 |
+
|
| 38 |
+
;; If festival's internal audio playing support doesn't work on your
|
| 39 |
+
;; machine you can make Festival use your own program to play waveform
|
| 40 |
+
;; files. Uncomment the following and change "play" to the name of
|
| 41 |
+
;; your local program that can play files
|
| 42 |
+
|
| 43 |
+
;(Parameter.set 'Audio_Required_Format 'riff)
|
| 44 |
+
;(Parameter.set 'Audio_Method 'Audio_Command)
|
| 45 |
+
|
| 46 |
+
;; Apple OSX (if you can file afplay)
|
| 47 |
+
;(Parameter.set 'Audio_Command "afplay $FILE")
|
| 48 |
+
|
| 49 |
+
;; SOX (play) often a good alternative on cygwin and linux
|
| 50 |
+
;(Parameter.set 'Audio_Command "play -q $FILE")
|
| 51 |
+
|
| 52 |
+
;; Windows 7 (when sox's play doesn't work -- but this might not exit)
|
| 53 |
+
;(Parameter.set 'Audio_Command "c:/Windows/System32/WindowsPowerShell/v1.0/powershell -c '(New-Object Media.Soundplayer C:/cygwin'$FILE').PlaySync(); Exit;'")
|
| 54 |
+
|
| 55 |
+
;; If you want a voice different from the system installed default
|
| 56 |
+
;; uncomment the following line and change the name to the voice you
|
| 57 |
+
;; want
|
| 58 |
+
|
| 59 |
+
;(set! voice_default 'voice_cmu_us_awb_arctic_hts)
|
| 60 |
+
|
| 61 |
+
(provide 'siteinit)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/soleml-mode.scm
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1998 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Support for an SGML based mark-up language used in the SOLE
|
| 35 |
+
;;; project. This is all still experimental.
|
| 36 |
+
;;;
|
| 37 |
+
;;; This currently treats one file as one utterance (to make dealing with
|
| 38 |
+
;;; the SOLE museaum database easy
|
| 39 |
+
|
| 40 |
+
(set! soleml_word_features_stack nil)
|
| 41 |
+
(defvar sole_current_node nil)
|
| 42 |
+
|
| 43 |
+
(define (soleml_token_to_words utt token name)
|
| 44 |
+
"(soleml_token_to_words utt token name)
|
| 45 |
+
SOLEML mode token specific analysis."
|
| 46 |
+
(cond
|
| 47 |
+
|
| 48 |
+
(t
|
| 49 |
+
(soleml_previous_token_to_words utt token name))))
|
| 50 |
+
|
| 51 |
+
(define (voice_soleml)
|
| 52 |
+
"(soleml_voice)
|
| 53 |
+
Speaker specific initialisation for SOLE museum data."
|
| 54 |
+
(voice_rab_diphone)
|
| 55 |
+
;; Utterances only come at end of file
|
| 56 |
+
(set! eou_tree '((0)))
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
(defvar soleml_elements
|
| 60 |
+
'(
|
| 61 |
+
("(SOLEML" (ATTLIST UTT)
|
| 62 |
+
;; required to identify type
|
| 63 |
+
(voice_soleml) ;; so we know what state we start in
|
| 64 |
+
(set! soleml_utt (Utterance Tokens nil))
|
| 65 |
+
(utt.stream.create soleml_utt 'Token)
|
| 66 |
+
(utt.relation.create soleml_utt 'SOLEML)
|
| 67 |
+
(set! sole_current_node
|
| 68 |
+
(utt.relation_append soleml_utt 'SOLEML (cons "sole-ml" ATTLIST)))
|
| 69 |
+
soleml_utt
|
| 70 |
+
)
|
| 71 |
+
(")SOLEML" (ATTLIST UTT)
|
| 72 |
+
;; required to identify end token
|
| 73 |
+
;; Don't really want to synthesize this
|
| 74 |
+
;; (xxml_synth UTT) ;; Synthesis the remaining tokens
|
| 75 |
+
(set! soleml_utt UTT)
|
| 76 |
+
UTT
|
| 77 |
+
)
|
| 78 |
+
;; Utterance break elements
|
| 79 |
+
("(LANGUAGE" (ATTLIST UTT)
|
| 80 |
+
;; Select a new language
|
| 81 |
+
(select_language (car (xxml_attval "NAME" ATTLIST)))
|
| 82 |
+
UTT)
|
| 83 |
+
("(VOICE" (ATTLIST UTT)
|
| 84 |
+
;;(xxml_synth UTT)
|
| 85 |
+
;; Select a new voice
|
| 86 |
+
(cond
|
| 87 |
+
((equal? (car (xxml_attval "NAME" ATTLIST)) 'male1)
|
| 88 |
+
(voice_soleml_diphone))
|
| 89 |
+
((equal? (car (xxml_attval "NAME" ATTLIST)) 'male2)
|
| 90 |
+
(voice_soleml_diphone))
|
| 91 |
+
((equal? (car (xxml_attval "NAME" ATTLIST)) 'male3)
|
| 92 |
+
(voice_soleml_diphone))
|
| 93 |
+
(t
|
| 94 |
+
(print "SOLEML: selecting unknown voice")
|
| 95 |
+
(voice_soleml_diphone)))
|
| 96 |
+
UTT)
|
| 97 |
+
;; phrase-boundary // mark on token (??)
|
| 98 |
+
;; punct-elem // mark on token
|
| 99 |
+
;; sem-elem
|
| 100 |
+
;; text-elem // ignore
|
| 101 |
+
;; rhet-elem has nucleus and satellite
|
| 102 |
+
;; anaphora-elem
|
| 103 |
+
;; syn-elem
|
| 104 |
+
;; info-struct-elem
|
| 105 |
+
;; other-elem
|
| 106 |
+
("(PUNCT-ELEM" (ATTLIST UTT)
|
| 107 |
+
(soleml_push_word_features)
|
| 108 |
+
(set! xxml_word_features
|
| 109 |
+
(cons (list "punct-elem" "1")
|
| 110 |
+
(soleml_conv_attlist ATTLIST)))
|
| 111 |
+
UTT)
|
| 112 |
+
(")PUNCT-ELEM" (ATTLIST UTT)
|
| 113 |
+
(set! xxml_word_features (soleml_pop_word_features))
|
| 114 |
+
UTT)
|
| 115 |
+
("(PHRASE-BOUNDARY" (ATTLIST UTT)
|
| 116 |
+
(if (string-equal "4" (car (xxml_attval "STRENGTH" ATTLIST)))
|
| 117 |
+
(begin
|
| 118 |
+
;; (xxml_synth UTT)
|
| 119 |
+
UTT)
|
| 120 |
+
(let ((last_token (car (last (utt.stream UTT 'Token)))))
|
| 121 |
+
(if last_token
|
| 122 |
+
(item.set_feat last_token "pbreak" "B"))
|
| 123 |
+
UTT)))
|
| 124 |
+
;; For each recursive element simply build a new node
|
| 125 |
+
("(RHET-ELEM" (ATTLIST UTT)
|
| 126 |
+
(let ((sdesc (list 'rhet-elem (soleml_conv_attlist ATTLIST))))
|
| 127 |
+
(set! sole_current_node
|
| 128 |
+
(node.append_daughter sole_current_node sdesc))
|
| 129 |
+
UTT))
|
| 130 |
+
(")RHET-ELEM" (ATTLIST UTT)
|
| 131 |
+
(set! sole_current_node (node.parent sole_current_node))
|
| 132 |
+
UTT)
|
| 133 |
+
("(RHET-EMPH" (ATTLIST UTT)
|
| 134 |
+
(let ((sdesc (list 'rhet-emph (soleml_conv_attlist ATTLIST))))
|
| 135 |
+
(set! sole_current_node
|
| 136 |
+
(node.append_daughter sole_current_node sdesc))
|
| 137 |
+
UTT))
|
| 138 |
+
(")RHET-EMPH" (ATTLIST UTT)
|
| 139 |
+
(set! sole_current_node (node.parent sole_current_node))
|
| 140 |
+
UTT)
|
| 141 |
+
("(ANAPHORA-ELEM" (ATTLIST UTT)
|
| 142 |
+
(let ((sdesc (list 'anaphora-elem (soleml_conv_attlist ATTLIST))))
|
| 143 |
+
(set! sole_current_node
|
| 144 |
+
(node.append_daughter sole_current_node sdesc))
|
| 145 |
+
UTT))
|
| 146 |
+
(")ANAPHORA-ELEM" (ATTLIST UTT)
|
| 147 |
+
(set! sole_current_node (node.parent sole_current_node))
|
| 148 |
+
UTT)
|
| 149 |
+
("(SYN-ELEM" (ATTLIST UTT)
|
| 150 |
+
(let ((sdesc (list 'syn-elem (soleml_conv_attlist ATTLIST))))
|
| 151 |
+
(set! sole_current_node
|
| 152 |
+
(node.append_daughter sole_current_node sdesc))
|
| 153 |
+
UTT))
|
| 154 |
+
(")SYN-ELEM" (ATTLIST UTT)
|
| 155 |
+
(set! sole_current_node (node.parent sole_current_node))
|
| 156 |
+
UTT)
|
| 157 |
+
("(CONNECTIVE" (ATTLIST UTT)
|
| 158 |
+
(let ((sdesc (list 'connective (soleml_conv_attlist ATTLIST))))
|
| 159 |
+
(set! sole_current_node
|
| 160 |
+
(node.append_daughter sole_current_node sdesc))
|
| 161 |
+
UTT))
|
| 162 |
+
(")CONNECTIVE" (ATTLIST UTT)
|
| 163 |
+
(set! sole_current_node (node.parent sole_current_node))
|
| 164 |
+
UTT)
|
| 165 |
+
("(TEXT-ELEM" (ATTLIST UTT)
|
| 166 |
+
(let ((sdesc (list 'text-elem (soleml_conv_attlist ATTLIST))))
|
| 167 |
+
(set! sole_current_node
|
| 168 |
+
(node.append_daughter sole_current_node sdesc))
|
| 169 |
+
UTT))
|
| 170 |
+
(")TEXT-ELEM" (ATTLIST UTT)
|
| 171 |
+
(set! sole_current_node (node.parent sole_current_node))
|
| 172 |
+
UTT)
|
| 173 |
+
("(SEM-ELEM" (ATTLIST UTT)
|
| 174 |
+
(let ((sdesc (list 'sem-elem (soleml_conv_attlist ATTLIST))))
|
| 175 |
+
(set! sole_current_node
|
| 176 |
+
(node.append_daughter sole_current_node sdesc))
|
| 177 |
+
UTT))
|
| 178 |
+
(")SEM-ELEM" (ATTLIST UTT)
|
| 179 |
+
(set! sole_current_node (node.parent sole_current_node))
|
| 180 |
+
UTT)
|
| 181 |
+
("(INFO-STRUCT-ELEM" (ATTLIST UTT)
|
| 182 |
+
(let ((sdesc (list 'info-struct-elem (soleml_conv_attlist ATTLIST))))
|
| 183 |
+
(set! sole_current_node
|
| 184 |
+
(node.append_daughter sole_current_node sdesc))
|
| 185 |
+
UTT))
|
| 186 |
+
(")INFO-STRUCT-ELEM" (ATTLIST UTT)
|
| 187 |
+
(set! sole_current_node (node.parent sole_current_node))
|
| 188 |
+
UTT)
|
| 189 |
+
("(OTHER-ELEM" (ATTLIST UTT)
|
| 190 |
+
(let ((sdesc (list 'other-elem (soleml_conv_attlist ATTLIST))))
|
| 191 |
+
(set! sole_current_node
|
| 192 |
+
(node.append_daughter sole_current_node sdesc))
|
| 193 |
+
UTT))
|
| 194 |
+
(")OTHER-ELEM" (ATTLIST UTT)
|
| 195 |
+
(set! sole_current_node (node.parent sole_current_node))
|
| 196 |
+
UTT)
|
| 197 |
+
("(NUCLEUS" (ATTLIST UTT)
|
| 198 |
+
(let ((sdesc (list 'nucleus (soleml_conv_attlist ATTLIST))))
|
| 199 |
+
(set! sole_current_node
|
| 200 |
+
(node.append_daughter sole_current_node sdesc))
|
| 201 |
+
UTT))
|
| 202 |
+
(")NUCLEUS" (ATTLIST UTT)
|
| 203 |
+
(set! sole_current_node (node.parent sole_current_node))
|
| 204 |
+
UTT)
|
| 205 |
+
("(SATELLITE" (ATTLIST UTT)
|
| 206 |
+
(let ((sdesc (list 'satellite (soleml_conv_attlist ATTLIST))))
|
| 207 |
+
(set! sole_current_node
|
| 208 |
+
(node.append_daughter sole_current_node sdesc))
|
| 209 |
+
UTT))
|
| 210 |
+
(")SATELLITE" (ATTLIST UTT)
|
| 211 |
+
(set! sole_current_node (node.parent sole_current_node))
|
| 212 |
+
UTT)
|
| 213 |
+
;; Other control functions (probably not used in SOLE)
|
| 214 |
+
("(CALL" (ATTLIST UTT)
|
| 215 |
+
;; (xxml_synth UTT)
|
| 216 |
+
(if (string-matches (car (xxml_attval "ENGID" ATTLIST)) "festival.*")
|
| 217 |
+
(let ((comstr ""))
|
| 218 |
+
(mapcar
|
| 219 |
+
(lambda (c) (set! comstr (string-append comstr " " c)))
|
| 220 |
+
(xxml_attval "COMMAND" ATTLIST))
|
| 221 |
+
(eval (read-from-string comstr))))
|
| 222 |
+
UTT)
|
| 223 |
+
("(DEFINE" (ATTLIST UTT)
|
| 224 |
+
;; (xxml_synth UTT)
|
| 225 |
+
(if (not (string-equal "NATIVE" (car (xxml_attval "SCHEME" ATTLIST))))
|
| 226 |
+
(format t "DEFINE: unsupported SCHEME %s, definition ignored\n"
|
| 227 |
+
(car (xxml_attval "SCHEME" ATTLIST)))
|
| 228 |
+
(lex.add.entry
|
| 229 |
+
(list
|
| 230 |
+
(car (xxml_attval "WORDS" ATTLIST)) ;; head form
|
| 231 |
+
nil ;; pos
|
| 232 |
+
(lex.syllabify.phstress (xxml_attval "PRONS" ATTLIST)))))
|
| 233 |
+
UTT)
|
| 234 |
+
("(SOUND" (ATTLIST UTT)
|
| 235 |
+
;; (xxml_synth UTT)
|
| 236 |
+
(if (not soleml_omitted_mode)
|
| 237 |
+
(apply_hooks tts_hooks
|
| 238 |
+
(eval (list 'Utterance 'Wave
|
| 239 |
+
(car (xxml_attval "SRC" ATTLIST))))))
|
| 240 |
+
UTT)
|
| 241 |
+
("(EMPH" (ATTLIST UTT)
|
| 242 |
+
;; Festival is particularly bad at adding specific emphasis
|
| 243 |
+
;; that's what happens when you use statistical methods that
|
| 244 |
+
;; don't include any notion of emphasis
|
| 245 |
+
;; This is *not* recursive
|
| 246 |
+
(soleml_push_word_features)
|
| 247 |
+
(set! xxml_word_features
|
| 248 |
+
(cons (list "EMPH" "1") xxml_word_features))
|
| 249 |
+
UTT)
|
| 250 |
+
(")EMPH" (ATTLIST UTT)
|
| 251 |
+
(set! xxml_word_features (soleml_pop_word_features))
|
| 252 |
+
UTT)
|
| 253 |
+
("(WORD" (ATTLIST UTT)
|
| 254 |
+
;; a word in-line
|
| 255 |
+
(let ((name (xxml_attval "NAME" ATTLIST))
|
| 256 |
+
(pos (xxml_attval "POS" ATTLIST))
|
| 257 |
+
(accent (xxml_attval "ACCENT" ATTLIST))
|
| 258 |
+
(tone (xxml_attval "TONE" ATTLIST))
|
| 259 |
+
(phonemes (xxml_attval "PHONEMES" ATTLIST))
|
| 260 |
+
token)
|
| 261 |
+
(utt.item.insert UTT 'Token) ;; add new Token
|
| 262 |
+
(set! token (utt.stream.tail UTT 'Token))
|
| 263 |
+
(item.set_name token (car name))
|
| 264 |
+
(if pos (item.set_feat token "pos" (car pos)))
|
| 265 |
+
(if accent (item.set_feat token "accent" (car accent)))
|
| 266 |
+
(if tone (item.set_feat token "tone" (car tone)))
|
| 267 |
+
(if phonemes (item.set_feat token "phonemes"
|
| 268 |
+
(format nil "%l" phonemes)))
|
| 269 |
+
UTT))
|
| 270 |
+
))
|
| 271 |
+
|
| 272 |
+
(define (soleml_init_func)
|
| 273 |
+
"(soleml_init_func)
|
| 274 |
+
Initialisation for SOLEML mode"
|
| 275 |
+
(voice_soleml)
|
| 276 |
+
(set! soleml_previous_elements xxml_elements)
|
| 277 |
+
(set! xxml_elements soleml_elements)
|
| 278 |
+
(set! xxml_token_hooks soleml_token_function)
|
| 279 |
+
(set! soleml_previous_token_to_words english_token_to_words)
|
| 280 |
+
(set! english_token_to_words soleml_token_to_words)
|
| 281 |
+
(set! token_to_words soleml_token_to_words))
|
| 282 |
+
|
| 283 |
+
(define (soleml_exit_func)
|
| 284 |
+
"(soleml_exit_func)
|
| 285 |
+
Exit function for SOLEML mode"
|
| 286 |
+
(set! xxml_elements soleml_previous_elements)
|
| 287 |
+
(set! token_to_words soleml_previous_token_to_words)
|
| 288 |
+
(set! english_token_to_words soleml_previous_token_to_words))
|
| 289 |
+
|
| 290 |
+
(define (soleml_token_function si)
|
| 291 |
+
"(soleml_token_function si)
|
| 292 |
+
This is called for each token found."
|
| 293 |
+
(node.append_daughter sole_current_node si))
|
| 294 |
+
|
| 295 |
+
(define (soleml_push_word_features)
|
| 296 |
+
"(soleml_push_word_features)
|
| 297 |
+
Save current word features on stack."
|
| 298 |
+
(set! soleml_word_features_stack
|
| 299 |
+
(cons xxml_word_features soleml_word_features_stack)))
|
| 300 |
+
|
| 301 |
+
(define (soleml_pop_word_features)
|
| 302 |
+
"(soleml_pop_word_features)
|
| 303 |
+
Pop word features from stack."
|
| 304 |
+
(let ((r (car soleml_word_features_stack)))
|
| 305 |
+
(set! soleml_word_features_stack (cdr soleml_word_features_stack))
|
| 306 |
+
r))
|
| 307 |
+
|
| 308 |
+
(define (soleml_conv_attlist alist)
|
| 309 |
+
"(soleml_conv_attlist alist)
|
| 310 |
+
Flatten alist arguments."
|
| 311 |
+
(cond
|
| 312 |
+
((null alist) nil)
|
| 313 |
+
((null (car (cdr (car alist))))
|
| 314 |
+
(soleml_conv_attlist (cdr alist)))
|
| 315 |
+
((equal? (length (car (cdr (car alist)))) 1)
|
| 316 |
+
(cons
|
| 317 |
+
(list (car (car alist)) (car (car (cdr (car alist)))))
|
| 318 |
+
(soleml_conv_attlist (cdr alist))))
|
| 319 |
+
(t
|
| 320 |
+
(cons
|
| 321 |
+
(list (car (car alist)) (format nil "%l" (car (cdr (car alist)))))
|
| 322 |
+
(soleml_conv_attlist (cdr alist))))))
|
| 323 |
+
|
| 324 |
+
(set! tts_text_modes
|
| 325 |
+
(cons
|
| 326 |
+
(list
|
| 327 |
+
'soleml ;; mode name
|
| 328 |
+
(list ;; email mode params
|
| 329 |
+
(list 'init_func soleml_init_func)
|
| 330 |
+
(list 'exit_func soleml_exit_func)
|
| 331 |
+
'(analysis_type xxml)
|
| 332 |
+
(list 'filter
|
| 333 |
+
(format nil "%s -D %s " sgml_parse_progname libdir))))
|
| 334 |
+
tts_text_modes))
|
| 335 |
+
|
| 336 |
+
(provide 'soleml-mode)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/speech.properties
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Register speech engines
|
| 2 |
+
cstr.festival.EngineCentral=cstr.festival.jsapi.EngineCentral
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/synthesis.scm
ADDED
|
@@ -0,0 +1,443 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;; ;;
|
| 3 |
+
;; Centre for Speech Technology Research ;;
|
| 4 |
+
;; University of Edinburgh, UK ;;
|
| 5 |
+
;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;; All Rights Reserved. ;;
|
| 7 |
+
;; ;;
|
| 8 |
+
;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;; the following conditions: ;;
|
| 14 |
+
;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;; conditions and the following disclaimer. ;;
|
| 16 |
+
;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;; derived from this software without specific prior written ;;
|
| 20 |
+
;; permission. ;;
|
| 21 |
+
;; ;;
|
| 22 |
+
;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;; THIS SOFTWARE. ;;
|
| 31 |
+
;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;; ;;
|
| 34 |
+
;; Author: Richard Caley (rjc@cstr.ed.ac.uk) ;;
|
| 35 |
+
;; Date: Fri Aug 15 1997 ;;
|
| 36 |
+
;; ------------------------------------------------------------------- ;;
|
| 37 |
+
;; New synthesis mainline. ;;
|
| 38 |
+
;; ;;
|
| 39 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 40 |
+
|
| 41 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 42 |
+
;; ;;
|
| 43 |
+
;; Hooks to add to the synthesis process. ;;
|
| 44 |
+
;; ;;
|
| 45 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 46 |
+
|
| 47 |
+
(defvar default_before_synth_hooks nil
|
| 48 |
+
"default_before_synth_hooks
|
| 49 |
+
The default list of functions to be run on all synthesized utterances
|
| 50 |
+
before synthesis starts.")
|
| 51 |
+
|
| 52 |
+
(defvar before_synth_hooks default_before_synth_hooks
|
| 53 |
+
"before_synth_hooks
|
| 54 |
+
List of functions to be run on synthesised utterances before synthesis
|
| 55 |
+
starts.")
|
| 56 |
+
|
| 57 |
+
(defvar default_after_analysis_hooks nil
|
| 58 |
+
"default_after_analysis_hooks
|
| 59 |
+
The default list of functions to be run on all synthesized utterances
|
| 60 |
+
after analysis but before synthesis.")
|
| 61 |
+
|
| 62 |
+
(defvar after_analysis_hooks default_after_analysis_hooks
|
| 63 |
+
"after_analysis_hooks
|
| 64 |
+
List of functions to be applied after analysis and before synthesis.")
|
| 65 |
+
|
| 66 |
+
(defvar default_after_synth_hooks nil
|
| 67 |
+
"default_after_synth_hooks
|
| 68 |
+
The default list of functions to be run on all synthesized utterances
|
| 69 |
+
after Wave_Synth. This will normally be nil but if for some reason you
|
| 70 |
+
need to change the gain or rescale *all* waveforms you could set the
|
| 71 |
+
function here, in your siteinit.scm.")
|
| 72 |
+
|
| 73 |
+
(defvar after_synth_hooks default_after_synth_hooks
|
| 74 |
+
"after_synth_hooks
|
| 75 |
+
List of functions to be applied after all synthesis modules have been
|
| 76 |
+
applied. This is primarily designed to allow waveform manipulation,
|
| 77 |
+
particularly resampling and volume changes.")
|
| 78 |
+
|
| 79 |
+
(defvar default_access_strategy 'ondemand
|
| 80 |
+
"default_access_strategy
|
| 81 |
+
How to access units from databases.")
|
| 82 |
+
|
| 83 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 84 |
+
;; ;;
|
| 85 |
+
;; Macro to define utterance types. ;;
|
| 86 |
+
;; ;;
|
| 87 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 88 |
+
|
| 89 |
+
(defmac (defUttType form)
|
| 90 |
+
(list 'defUttType_real
|
| 91 |
+
(list 'quote (cadr form))
|
| 92 |
+
(list 'quote (cddr form))))
|
| 93 |
+
|
| 94 |
+
(defvar UttTypes nil
|
| 95 |
+
"UttTypes
|
| 96 |
+
List of types and functions used by the utt.synth function to call
|
| 97 |
+
appropriate methods.")
|
| 98 |
+
|
| 99 |
+
(define (defUttType_real type form)
|
| 100 |
+
"(defUttType TYPE . BODY)
|
| 101 |
+
Define a new utterance type. TYPE is an atomic type that is specified
|
| 102 |
+
as the first argument to the function Utterance. BODY is evaluated
|
| 103 |
+
with argument utt, when utt.synth is called with an utterance of type
|
| 104 |
+
TYPE. You almost always require the function Initialize first.
|
| 105 |
+
[see Utterance types]"
|
| 106 |
+
;;; Yes I am cheating a bit with the macro/function name.
|
| 107 |
+
;;; should check about redefining and the syntax of the forms
|
| 108 |
+
(set! UttTypes
|
| 109 |
+
(cons
|
| 110 |
+
(cons type form)
|
| 111 |
+
UttTypes))
|
| 112 |
+
type)
|
| 113 |
+
|
| 114 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 115 |
+
;; ;;
|
| 116 |
+
;; Macro to define synthesis types. ;;
|
| 117 |
+
;; ;;
|
| 118 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 119 |
+
|
| 120 |
+
(defmac (defSynthType form)
|
| 121 |
+
(list 'defSynthType_real
|
| 122 |
+
(list 'quote (cadr form))
|
| 123 |
+
(list 'quote (cddr form))))
|
| 124 |
+
|
| 125 |
+
(defvar SynthTypes nil
|
| 126 |
+
"SynthTypes
|
| 127 |
+
List of synthesis types and functions used by the utt.synth function to
|
| 128 |
+
call appropriate methods for wave synthesis.")
|
| 129 |
+
|
| 130 |
+
(define (defSynthType_real type form)
|
| 131 |
+
"(defSynthType TYPE . BODY)
|
| 132 |
+
Define a new wave synthesis type. TYPE is an atomic type that
|
| 133 |
+
identifies the type of synthesis. BODY is evaluated with argument
|
| 134 |
+
utt, when utt.synth is called with an utterance of type TYPE.
|
| 135 |
+
[see Utterance types]"
|
| 136 |
+
|
| 137 |
+
(set! SynthTypes
|
| 138 |
+
(cons
|
| 139 |
+
(cons type form)
|
| 140 |
+
SynthTypes))
|
| 141 |
+
type)
|
| 142 |
+
|
| 143 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 144 |
+
;;;
|
| 145 |
+
;;; Some actual Utterance type definitions
|
| 146 |
+
;;;
|
| 147 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 148 |
+
|
| 149 |
+
(defUttType Words
|
| 150 |
+
(Initialize utt)
|
| 151 |
+
(POS utt)
|
| 152 |
+
(Phrasify utt)
|
| 153 |
+
(Word utt)
|
| 154 |
+
(Pauses utt)
|
| 155 |
+
(Intonation utt)
|
| 156 |
+
(PostLex utt)
|
| 157 |
+
(Duration utt)
|
| 158 |
+
(Int_Targets utt)
|
| 159 |
+
(Wave_Synth utt)
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
(defUttType Text
|
| 163 |
+
(Initialize utt)
|
| 164 |
+
(Text utt)
|
| 165 |
+
(Token_POS utt)
|
| 166 |
+
(Token utt)
|
| 167 |
+
(POS utt)
|
| 168 |
+
(Phrasify utt)
|
| 169 |
+
(Word utt)
|
| 170 |
+
(Pauses utt)
|
| 171 |
+
(Intonation utt)
|
| 172 |
+
(PostLex utt)
|
| 173 |
+
(Duration utt)
|
| 174 |
+
(Int_Targets utt)
|
| 175 |
+
(Wave_Synth utt)
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
(defUttType Tokens ;; This is used in tts_file, Tokens will be preloaded
|
| 179 |
+
(Token_POS utt) ;; when utt.synth is called
|
| 180 |
+
(Token utt)
|
| 181 |
+
(POS utt)
|
| 182 |
+
(Phrasify utt)
|
| 183 |
+
(Word utt)
|
| 184 |
+
(Pauses utt)
|
| 185 |
+
(Intonation utt)
|
| 186 |
+
(PostLex utt)
|
| 187 |
+
(Duration utt)
|
| 188 |
+
(Int_Targets utt)
|
| 189 |
+
(Wave_Synth utt)
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
(defUttType Concept ;; rather gradious name for when information has
|
| 193 |
+
(POS utt) ;; been preloaded (probably XML) to give a word
|
| 194 |
+
(Phrasify utt) ;; relation (SOLE uses this)
|
| 195 |
+
(Word utt)
|
| 196 |
+
(Pauses utt)
|
| 197 |
+
(Intonation utt)
|
| 198 |
+
(PostLex utt)
|
| 199 |
+
(Duration utt)
|
| 200 |
+
(Int_Targets utt)
|
| 201 |
+
(Wave_Synth utt)
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
(defUttType Phrase
|
| 205 |
+
(Initialize utt)
|
| 206 |
+
(Token_POS utt)
|
| 207 |
+
(Token utt)
|
| 208 |
+
(POS utt)
|
| 209 |
+
(Phrasify utt)
|
| 210 |
+
(Word utt)
|
| 211 |
+
(Pauses utt)
|
| 212 |
+
(Intonation utt)
|
| 213 |
+
(PostLex utt)
|
| 214 |
+
(Duration utt)
|
| 215 |
+
(Int_Targets utt)
|
| 216 |
+
(Wave_Synth utt)
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
(defUttType Segments
|
| 220 |
+
(Initialize utt)
|
| 221 |
+
(Wave_Synth utt)
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
(defUttType Phones
|
| 225 |
+
(Initialize utt)
|
| 226 |
+
(Fixed_Prosody utt)
|
| 227 |
+
(Wave_Synth utt)
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
(defUttType SegF0
|
| 231 |
+
(Wave_Synth utt)
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
(defUttType Wave
|
| 235 |
+
(Initialize utt))
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 240 |
+
;; ;;
|
| 241 |
+
;; And some synthesis types. ;;
|
| 242 |
+
;; ;;
|
| 243 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 244 |
+
|
| 245 |
+
(defSynthType Taylor
|
| 246 |
+
(Taylor_Synthesize utt)
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
(defSynthType UniSyn
|
| 250 |
+
(defvar UniSyn_module_hooks nil)
|
| 251 |
+
(Param.def "unisyn.window_name" "hanning")
|
| 252 |
+
(Param.def "unisyn.window_factor" 1.0)
|
| 253 |
+
(Parameter.def 'us_sigpr 'lpc)
|
| 254 |
+
|
| 255 |
+
(apply_hooks UniSyn_module_hooks utt) ;; for processing of diphone names
|
| 256 |
+
(us_get_diphones utt)
|
| 257 |
+
(us_unit_concat utt)
|
| 258 |
+
|
| 259 |
+
(if (not (member 'f0 (utt.relationnames utt)))
|
| 260 |
+
(targets_to_f0 utt))
|
| 261 |
+
;; temporary fix
|
| 262 |
+
(if (utt.relation.last utt 'Segment)
|
| 263 |
+
(set! pm_end (+ (item.feat (utt.relation.last utt 'Segment) "end") 0.02))
|
| 264 |
+
(set! pm_end 0.02))
|
| 265 |
+
|
| 266 |
+
(us_f0_to_pitchmarks utt 'f0 'TargetCoef pm_end)
|
| 267 |
+
(us_mapping utt 'segment_single)
|
| 268 |
+
(cond
|
| 269 |
+
((string-equal "td_psola" (Parameter.get 'us_sigpr))
|
| 270 |
+
;; Not in standard distribution, so has to be separate function
|
| 271 |
+
(us_tdpsola_synthesis utt 'analysis_period))
|
| 272 |
+
(t
|
| 273 |
+
;; All the rest
|
| 274 |
+
(us_generate_wave utt (Parameter.get 'us_sigpr)
|
| 275 |
+
'analysis_period)))
|
| 276 |
+
)
|
| 277 |
+
|
| 278 |
+
(defSynthType None
|
| 279 |
+
;; do nothing
|
| 280 |
+
utt
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
(defSynthType Standard
|
| 284 |
+
(print "synth method: Standard")
|
| 285 |
+
|
| 286 |
+
(let ((select (Parameter.get 'SelectionMethod)))
|
| 287 |
+
(if select
|
| 288 |
+
(progn
|
| 289 |
+
(print "select")
|
| 290 |
+
(apply select (list utt))
|
| 291 |
+
)
|
| 292 |
+
)
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
(let ((join (Parameter.get 'JoiningMethod)))
|
| 296 |
+
(if join
|
| 297 |
+
(progn
|
| 298 |
+
(print "join")
|
| 299 |
+
(apply join (list utt))
|
| 300 |
+
)
|
| 301 |
+
)
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
(let ((impose (Parameter.get 'ImposeMethod)))
|
| 305 |
+
(if impose
|
| 306 |
+
(progn
|
| 307 |
+
(print "impose")
|
| 308 |
+
(apply impose (list utt))
|
| 309 |
+
)
|
| 310 |
+
)
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
(let ((power (Parameter.get 'PowerSmoothMethod)))
|
| 314 |
+
(if power
|
| 315 |
+
(progn
|
| 316 |
+
(print "power")
|
| 317 |
+
(apply power (list utt))
|
| 318 |
+
)
|
| 319 |
+
)
|
| 320 |
+
)
|
| 321 |
+
|
| 322 |
+
(let ((wavesynthesis (Parameter.get 'WaveSynthesisMethod)))
|
| 323 |
+
(if wavesynthesis
|
| 324 |
+
(progn
|
| 325 |
+
(print "synthesis")
|
| 326 |
+
(apply wavesynthesis (list utt))
|
| 327 |
+
)
|
| 328 |
+
)
|
| 329 |
+
)
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
(defSynthType Minimal
|
| 333 |
+
(print "synth method: Minimal")
|
| 334 |
+
|
| 335 |
+
(let ((select (Parameter.get 'SelectionMethod)))
|
| 336 |
+
(if select
|
| 337 |
+
(progn
|
| 338 |
+
(print "select")
|
| 339 |
+
(apply select (list utt))
|
| 340 |
+
)
|
| 341 |
+
)
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
(let ((wavesynthesis (Parameter.get 'WaveSynthesisMethod)))
|
| 345 |
+
(if wavesynthesis
|
| 346 |
+
(progn
|
| 347 |
+
(print "synthesis")
|
| 348 |
+
(apply wavesynthesis (list utt "Unit" "Join" "Wave"))
|
| 349 |
+
)
|
| 350 |
+
)
|
| 351 |
+
)
|
| 352 |
+
)
|
| 353 |
+
|
| 354 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 355 |
+
;; ;;
|
| 356 |
+
;; Finally the actual driver function. ;;
|
| 357 |
+
;; ;;
|
| 358 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 359 |
+
|
| 360 |
+
(define (utt.synth utt)
|
| 361 |
+
|
| 362 |
+
"(utt.synth UTT)
|
| 363 |
+
The main synthesis function. Given UTT it will apply the
|
| 364 |
+
functions specified for UTT's type, as defined with deffUttType
|
| 365 |
+
and then those demanded by the voice. After modules have been
|
| 366 |
+
applied synth_hooks are applied to allow extra manipulation.
|
| 367 |
+
[see Utterance types]"
|
| 368 |
+
|
| 369 |
+
(apply_hooks before_synth_hooks utt)
|
| 370 |
+
|
| 371 |
+
(let ((type (utt.type utt)))
|
| 372 |
+
(let ((definition (assoc type UttTypes)))
|
| 373 |
+
(if (null? definition)
|
| 374 |
+
(error "Unknown utterance type" type)
|
| 375 |
+
(let ((body (eval (cons 'lambda
|
| 376 |
+
(cons '(utt) (cdr definition))))))
|
| 377 |
+
(body utt)))))
|
| 378 |
+
|
| 379 |
+
(apply_hooks after_synth_hooks utt)
|
| 380 |
+
utt)
|
| 381 |
+
|
| 382 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 383 |
+
;; ;;
|
| 384 |
+
;; And a couple of utility expressions. ;;
|
| 385 |
+
;; ;;
|
| 386 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 387 |
+
|
| 388 |
+
(define (SayText text)
|
| 389 |
+
"(SayText TEXT)
|
| 390 |
+
TEXT, a string, is rendered as speech."
|
| 391 |
+
(utt.play (utt.synth (eval (list 'Utterance 'Text text)))))
|
| 392 |
+
|
| 393 |
+
(define (SynthText text)
|
| 394 |
+
"(SynthText TEXT)
|
| 395 |
+
TEXT, a string, is rendered as speech."
|
| 396 |
+
(utt.synth (eval (list 'Utterance 'Text text))))
|
| 397 |
+
|
| 398 |
+
(define (SayPhones phones)
|
| 399 |
+
"(SayPhones PHONES)
|
| 400 |
+
PHONES is a list of phonemes. This uses the Phones type utterance
|
| 401 |
+
to synthesize and play the given phones. Fixed duration specified in
|
| 402 |
+
FP_duration and fixed monotone duration (FP_F0) are used to generate
|
| 403 |
+
prosody."
|
| 404 |
+
(utt.play (utt.synth (eval (list 'Utterance 'Phones phones)))))
|
| 405 |
+
|
| 406 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 407 |
+
;; ;;
|
| 408 |
+
;; This is the standard synthesis function. The Wave Synthesis may be ;;
|
| 409 |
+
;; more than a simple module ;;
|
| 410 |
+
;; ;;
|
| 411 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 412 |
+
|
| 413 |
+
|
| 414 |
+
(define (Wave_Synth utt)
|
| 415 |
+
"(Wave_Synth UTT)
|
| 416 |
+
Generate waveform from information in UTT, at least a Segment stream
|
| 417 |
+
must exist. The actual form of synthesis used depends on the Parameter
|
| 418 |
+
Synth_Method. If it is a function that is applied. If it is atom it
|
| 419 |
+
should be a SynthType as defined by defSynthType
|
| 420 |
+
[see Utterance types]"
|
| 421 |
+
(apply_hooks after_analysis_hooks utt)
|
| 422 |
+
(let ((method_val (Parameter.get 'Synth_Method)))
|
| 423 |
+
(cond
|
| 424 |
+
((null method_val)
|
| 425 |
+
(error "Undefined Synth_Method"))
|
| 426 |
+
((and (symbol? method_val) (symbol-bound? method_val))
|
| 427 |
+
;; Wish there was a function?
|
| 428 |
+
(apply (symbol-value method_val) (list utt)))
|
| 429 |
+
((member (typeof method_val) '(subr closure))
|
| 430 |
+
(apply method_val (list utt)))
|
| 431 |
+
(t ;; its a defined synthesis type
|
| 432 |
+
(let ((synthesis_modules (assoc_string method_val SynthTypes)))
|
| 433 |
+
(if (null? synthesis_modules)
|
| 434 |
+
(error (format nil "Undefined SynthType %s\n" method_val))
|
| 435 |
+
(let ((body (eval (cons 'lambda
|
| 436 |
+
(cons '(utt) (cdr synthesis_modules))))))
|
| 437 |
+
(body utt)))))))
|
| 438 |
+
utt)
|
| 439 |
+
|
| 440 |
+
(provide 'synthesis)
|
| 441 |
+
|
| 442 |
+
|
| 443 |
+
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/tilt.scm
ADDED
|
@@ -0,0 +1,972 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Author: Alan W Black, Kurt Dusterhoff, Janet Hitzeman
|
| 35 |
+
;;; Date: April 1999
|
| 36 |
+
;;;
|
| 37 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 38 |
+
;;;
|
| 39 |
+
;;; Tilt intonation modules, accent/boundary preditions and F0 generation
|
| 40 |
+
;;; The F0 generation is done using models as described in
|
| 41 |
+
;;; Dusterhoff, K. and Black, A. (1997). "Generating F0 contours for
|
| 42 |
+
;;; speech synthesis using the Tilt intonation theory"
|
| 43 |
+
;;; (http://www.cstr.ed.ac.uk/awb/papers/esca-int97.ps)
|
| 44 |
+
;;; Proceedings of ESCA Workshop of Intonation, pp 107-110, September,
|
| 45 |
+
;;; Athens, Greece.
|
| 46 |
+
;;;
|
| 47 |
+
;;; Intonation_Tilt assigns accents and boundaries by a CART tree
|
| 48 |
+
;;; the c and sil nodes are derived directly duration creation
|
| 49 |
+
;;;
|
| 50 |
+
;;; Int_Targets_Tilt generates the F0 using the CART trees as
|
| 51 |
+
;;; described in the paper referenced above.
|
| 52 |
+
;;;
|
| 53 |
+
;;; THIS CONTAINS *VERY* EXPERIMENTAL CODE
|
| 54 |
+
;;; it requires a thoroughly clean up and probably split into
|
| 55 |
+
;;; multiple files
|
| 56 |
+
|
| 57 |
+
(defvar int_tilt_params nil
|
| 58 |
+
"int_tilt_params
|
| 59 |
+
Parameters for tilt intonation model.")
|
| 60 |
+
|
| 61 |
+
(Parameter.def 'tilt_method 'cart)
|
| 62 |
+
|
| 63 |
+
(define (Intonation_Tilt utt)
|
| 64 |
+
"(Intonation_Tilt utt)
|
| 65 |
+
Assign accent and boundary IntEvents to each syllable, and fill in
|
| 66 |
+
spaces with silence and connections."
|
| 67 |
+
(let (accent boundary)
|
| 68 |
+
;; Create basic intonation relations
|
| 69 |
+
(utt.relation.create utt 'Intonation)
|
| 70 |
+
(utt.relation.create utt 'IntonationSyllable)
|
| 71 |
+
(mapcar
|
| 72 |
+
(lambda (syl)
|
| 73 |
+
;; If first syllable in phrase add phrase_start
|
| 74 |
+
(if (string-equal "pau"
|
| 75 |
+
(item.feat syl "R:SylStructure.daughter1_to.Segment.p.name"))
|
| 76 |
+
(tilt_add_intevent utt syl 'phrase_start))
|
| 77 |
+
|
| 78 |
+
(set! accent (wagon_predict syl tilt_a_cart_tree))
|
| 79 |
+
(set! boundary (wagon_predict syl tilt_b_cart_tree))
|
| 80 |
+
; (format t "%s: accent %s boundary %s\n"
|
| 81 |
+
; (item.feat syl "R:WordStructure.root.name")
|
| 82 |
+
; accent
|
| 83 |
+
; boundary)
|
| 84 |
+
(if (not (string-equal accent "0"))
|
| 85 |
+
(tilt_add_intevent utt syl accent))
|
| 86 |
+
(if (not (string-equal boundary "0"))
|
| 87 |
+
(if (and (string-equal boundary "afb")
|
| 88 |
+
(not (string-equal accent "0")))
|
| 89 |
+
(tilt_add_intevent utt syl "fb") ;; can't have a/afb
|
| 90 |
+
(tilt_add_intevent utt syl boundary)))
|
| 91 |
+
|
| 92 |
+
;; If last syllable in phrase add phrase_end
|
| 93 |
+
(if (string-equal "pau"
|
| 94 |
+
(item.feat syl "R:SylStructure.daughtern_to.Segment.n.name"))
|
| 95 |
+
(tilt_add_intevent utt syl 'phrase_end)))
|
| 96 |
+
(utt.relation.items utt 'Syllable))
|
| 97 |
+
;; (utt.relation.print utt 'Intonation)
|
| 98 |
+
utt))
|
| 99 |
+
|
| 100 |
+
(define (tilt_add_intevent utt syl name)
|
| 101 |
+
"(tilt_add_intevent utt syl name)
|
| 102 |
+
Add a new IntEvent related to syl with name."
|
| 103 |
+
(let (ie)
|
| 104 |
+
(set! ie (utt.relation.append utt 'Intonation (list name)))
|
| 105 |
+
(if (not (item.relation syl 'IntonationSyllable))
|
| 106 |
+
(utt.relation.append utt 'IntonationSyllable syl))
|
| 107 |
+
(item.relation.append_daughter syl 'IntonationSyllable ie)
|
| 108 |
+
(if (not (string-matches name "phrase_.*"))
|
| 109 |
+
(item.set_feat ie "int_event" 1))
|
| 110 |
+
ie))
|
| 111 |
+
|
| 112 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 113 |
+
;;;
|
| 114 |
+
;;; Fo generate through tilt parameters and F0 rendering
|
| 115 |
+
;;;
|
| 116 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 117 |
+
(define (Int_Targets_Tilt utt)
|
| 118 |
+
"(Int_Targets_Tilt utt)
|
| 119 |
+
Assign Tilt parameters to each IntEvent and then generate the
|
| 120 |
+
F0 contour and assign targets."
|
| 121 |
+
(utt.relation.set_feat utt "Intonation" "intonation_style" "tilt")
|
| 122 |
+
(tilt_assign_parameters utt)
|
| 123 |
+
; (tilt_F0_and_targets utt) ;; this has to be C++, sorry
|
| 124 |
+
; (tilt_map_f0_range utt)
|
| 125 |
+
(tilt_to_f0 utt "f0")
|
| 126 |
+
(tilt_validate utt)
|
| 127 |
+
utt
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
(define (tilt_validate utt)
|
| 131 |
+
"(tilt_validate utt)
|
| 132 |
+
Checks that the predicted tilt parameter fall with reasonable
|
| 133 |
+
limits and modify them where possible to be more reasonable."
|
| 134 |
+
(mapcar
|
| 135 |
+
(lambda (ie)
|
| 136 |
+
(cond
|
| 137 |
+
((string-equal (item.name ie) "phrase_end")
|
| 138 |
+
;; check previous event does overflow segments
|
| 139 |
+
)
|
| 140 |
+
(t
|
| 141 |
+
t))
|
| 142 |
+
)
|
| 143 |
+
(utt.relation.items utt 'Intonation))
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
(define (tilt_map_f0_range utt)
|
| 147 |
+
"(tilt_map_f0_range utt)
|
| 148 |
+
In order fo better trained models to be used for voices which don't
|
| 149 |
+
have the necessary data to train models from the targets may be mapped
|
| 150 |
+
to a different pitch range. Note this is not optimal as pitch ranges
|
| 151 |
+
don't map that easily, but the the results can sometimes be better than
|
| 152 |
+
using a less sophisticated F0 generation model. The method used
|
| 153 |
+
is to define the mean and standard deviation of the speaker the
|
| 154 |
+
model was trained on and the mean and standard deciation of the
|
| 155 |
+
desired speaker. Mapping is by converting the actual F0 value
|
| 156 |
+
to zscores (distance from mean in number of stddev) and back into
|
| 157 |
+
the other domain. The variable int_tilt_params is used to find
|
| 158 |
+
the values."
|
| 159 |
+
(let ((target_f0_mean (car (cdr (assoc 'target_f0_mean int_tilt_params))))
|
| 160 |
+
(target_f0_std (car (cdr (assoc 'target_f0_std int_tilt_params))))
|
| 161 |
+
(model_f0_std (car (cdr (assoc 'model_f0_std int_tilt_params))))
|
| 162 |
+
(model_f0_mean (car (cdr (assoc 'model_f0_mean int_tilt_params)))))
|
| 163 |
+
(if target_f0_mean ;; only if one is specified
|
| 164 |
+
(lambda (targ)
|
| 165 |
+
(item.set_name
|
| 166 |
+
targ
|
| 167 |
+
(+ target_f0_mean
|
| 168 |
+
(* target_f0_std
|
| 169 |
+
(/ (- (parse-number (item.name targ))
|
| 170 |
+
model_f0_mean)
|
| 171 |
+
model_f0_std)))))
|
| 172 |
+
(utt.relation.leafs utt 'Target))))
|
| 173 |
+
|
| 174 |
+
(define (tilt_assign_parameters utt)
|
| 175 |
+
"(tilt_assign_parameters utt)
|
| 176 |
+
Assigned tilt parameters to IntEvents, depending on the value
|
| 177 |
+
of the Parameter tilt_method uses wagon trees (cart) or linear
|
| 178 |
+
regression models (lr)."
|
| 179 |
+
(let ((method (Parameter.get 'tilt_method)))
|
| 180 |
+
(cond
|
| 181 |
+
((equal? method 'cart)
|
| 182 |
+
(tilt_assign_parameters_wagon utt))
|
| 183 |
+
((equal? method 'lr)
|
| 184 |
+
(tilt_assign_parameters_lr utt))
|
| 185 |
+
(t
|
| 186 |
+
(error "Tilt: unknown tilt param prediction method: " tilt_method)))))
|
| 187 |
+
|
| 188 |
+
(define (tilt_assign_parameters_wagon utt)
|
| 189 |
+
"(tilt_assign_parameters_wagon utt)
|
| 190 |
+
Assing parameters (start_f0, tilt, amplitude, peak_pos and duration)
|
| 191 |
+
to each IntEvent. Uses Wagon trees to predict values"
|
| 192 |
+
(mapcar
|
| 193 |
+
(lambda (ie)
|
| 194 |
+
(let ((param_trees (cdr (assoc_string (item.name ie)
|
| 195 |
+
tilt_param_trees))))
|
| 196 |
+
(item.set_feat ie "time_path" "IntonationSyllable")
|
| 197 |
+
(if (string-equal "1" (item.feat ie "int_event"))
|
| 198 |
+
(item.set_function ie "time" "unisyn_tilt_event_position")
|
| 199 |
+
(item.set_function ie "time" "unisyn_tilt_phrase_position"))
|
| 200 |
+
(cond
|
| 201 |
+
((null param_trees)
|
| 202 |
+
(format stderr "Tilt: unknown Intonation type %s, ignored\n"
|
| 203 |
+
(item.name ie))
|
| 204 |
+
;; *need* to assign default values
|
| 205 |
+
(item.set_feat ie "ev.f0" 100)
|
| 206 |
+
(item.set_feat ie "tilt.amp" 20.0)
|
| 207 |
+
(item.set_feat ie "tilt.dur" 0.25)
|
| 208 |
+
(item.set_feat ie "tilt.tilt" -0.2)
|
| 209 |
+
(item.set_feat ie "rel_pos" 0.0)
|
| 210 |
+
)
|
| 211 |
+
(t
|
| 212 |
+
(tilt_assign_params_wagon ie param_trees)))))
|
| 213 |
+
(utt.relation.items utt 'Intonation)))
|
| 214 |
+
|
| 215 |
+
(define (tilt_assign_params_wagon ie trees)
|
| 216 |
+
"(tilt_assign_params_wagon ie trees)
|
| 217 |
+
Assign the names parameters to ie using the trees and names in
|
| 218 |
+
trees."
|
| 219 |
+
(mapcar
|
| 220 |
+
(lambda (tree)
|
| 221 |
+
(let ((val (wagon_predict ie (car (cdr tree)))))
|
| 222 |
+
(item.set_feat ie (car tree) val)))
|
| 223 |
+
trees))
|
| 224 |
+
|
| 225 |
+
(define (tilt_assign_parameters_lr utt)
|
| 226 |
+
"(tilt_assign_parameters_lr utt)
|
| 227 |
+
Assing parameters (start_f0, tilt, amplitude, peak_pos and duration)
|
| 228 |
+
to each IntEvent. Prediction by linear regression models"
|
| 229 |
+
(mapcar
|
| 230 |
+
(lambda (ie)
|
| 231 |
+
(let ((param_lrmodels (cdr (assoc_string (item.name ie)
|
| 232 |
+
tilt_param_lrmodels))))
|
| 233 |
+
(cond
|
| 234 |
+
((null param_lrmodels)
|
| 235 |
+
(format stderr "Tilt: unknown IntEvent type %s, ignored\n"
|
| 236 |
+
(item.name ie))
|
| 237 |
+
;; *need* to assign default values
|
| 238 |
+
(item.set_feat ie "ev.f0" 100)
|
| 239 |
+
(item.set_feat ie "tilt.amp" 20.0)
|
| 240 |
+
(item.set_feat ie "tilt.dur" 0.25)
|
| 241 |
+
(item.set_feat ie "tilt.tilt" -0.2)
|
| 242 |
+
(item.set_feat ie "rel_pos" 0.0)
|
| 243 |
+
)
|
| 244 |
+
(t
|
| 245 |
+
(tilt_assign_params_lr ie param_lrmodels)))))
|
| 246 |
+
(utt.relation.items utt 'IntEvent)))
|
| 247 |
+
|
| 248 |
+
(define (tilt_assign_params_lr ie lrmodels)
|
| 249 |
+
"(tilt_assign_params_lr ie lrmodels)
|
| 250 |
+
Assign the names parameters to ie using the trees and names in
|
| 251 |
+
trees."
|
| 252 |
+
(mapcar
|
| 253 |
+
(lambda (lrm)
|
| 254 |
+
(let ((val (lr_predict ie (cdr lrm))))
|
| 255 |
+
(item.set_feat ie (car lrm) val)))
|
| 256 |
+
lrmodels))
|
| 257 |
+
|
| 258 |
+
(define (utt.save.tilt_events utt filename)
|
| 259 |
+
"(utt.save.til_events UTT FILENAME)
|
| 260 |
+
Save tilt events in UTT to FILENAME in a format suitable for
|
| 261 |
+
ev_synth."
|
| 262 |
+
(let ((fd (fopen filename "w")))
|
| 263 |
+
(format fd "#\n")
|
| 264 |
+
(mapcar
|
| 265 |
+
(lambda (ie)
|
| 266 |
+
(let ((name (item.name ie)))
|
| 267 |
+
(cond
|
| 268 |
+
((or (string-equal name "sil")
|
| 269 |
+
(string-equal name "c"))
|
| 270 |
+
(format fd " %2.4f 100 %s; tilt: %2.6f\n"
|
| 271 |
+
(item.feat ie 'end)
|
| 272 |
+
name
|
| 273 |
+
(item.feat ie "tilt_start_f0")))
|
| 274 |
+
(t ;; accent or boundary
|
| 275 |
+
(format fd " %2.4f 100 %s; tilt: %2.6f %2.6f %2.6f %2.6f %2.6f\n"
|
| 276 |
+
(item.feat ie 'end)
|
| 277 |
+
name
|
| 278 |
+
(item.feat ie "ev.f0")
|
| 279 |
+
(item.feat ie "tilt.amp")
|
| 280 |
+
(item.feat ie "tilt.dur")
|
| 281 |
+
(item.feat ie "tilt.tilt")
|
| 282 |
+
(item.feat ie "rel_pos"))))))
|
| 283 |
+
(utt.relation.items utt 'IntEvent))
|
| 284 |
+
(fclose fd)
|
| 285 |
+
utt))
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
;;;;;
|
| 289 |
+
;;; Some features which should be pruned
|
| 290 |
+
;;;;;
|
| 291 |
+
|
| 292 |
+
(def_feature_docstring 'Syllable.lisp_time_to_next_vowel
|
| 293 |
+
"Syllable.lisp_time_to_next_vowel syl
|
| 294 |
+
The time from vowel_start to next vowel_start")
|
| 295 |
+
(define (time_to_next_vowel syl)
|
| 296 |
+
"(time_to_next_vowel syl)
|
| 297 |
+
The time from vowel_start to next vowel_start"
|
| 298 |
+
(let (ttnv)
|
| 299 |
+
(if (string-equal "0" (item.feat syl "n.vowel_start"))
|
| 300 |
+
(set! ttnv 0.00)
|
| 301 |
+
(set! ttnv (- (item.feat syl "n.vowel_start")
|
| 302 |
+
(item.feat syl "vowel_start"))))
|
| 303 |
+
ttnv))
|
| 304 |
+
|
| 305 |
+
(def_feature_docstring 'Syllable.lisp_next_stress
|
| 306 |
+
"Syllable.lisp_next_stress
|
| 307 |
+
Number of syllables to next stressed syllable. 0 if this syllable is
|
| 308 |
+
stressed. It is effectively assumed the syllable after the last syllable
|
| 309 |
+
is stressed.")
|
| 310 |
+
(define (next_stress syl)
|
| 311 |
+
(cond
|
| 312 |
+
((null syl) 0)
|
| 313 |
+
((string-equal (item.feat syl 'stress_num) "1")
|
| 314 |
+
0)
|
| 315 |
+
(t
|
| 316 |
+
(+ 1 (next_stress (item.relation.next syl 'Syllable))))))
|
| 317 |
+
|
| 318 |
+
(def_feature_docstring 'Syllable.lisp_last_stress
|
| 319 |
+
"Syllable.lisp_last_stress
|
| 320 |
+
Number of syllables from previous stressed syllable. 0 if this syllable
|
| 321 |
+
is stressed. It is effectively assumed that the syllable before the
|
| 322 |
+
first syllable is stressed.")
|
| 323 |
+
(define (last_stress syl)
|
| 324 |
+
(cond
|
| 325 |
+
((null syl) 0)
|
| 326 |
+
((string-equal (item.feat syl 'stress_num) "1")
|
| 327 |
+
0)
|
| 328 |
+
(t
|
| 329 |
+
(+ 1 (last_stress (item.relation.prev syl 'Syllable))))))
|
| 330 |
+
|
| 331 |
+
|
| 332 |
+
(def_feature_docstring 'SylStructure.lisp_length_to_last_seg
|
| 333 |
+
"SylStructure.lisp_length_to_last_seg
|
| 334 |
+
Length from start of the vowel to start of last segment of syllable.")
|
| 335 |
+
(define (length_to_last_seg syl)
|
| 336 |
+
(- (item.feat syl "daughtern_to.Segment.start")
|
| 337 |
+
(item.feat syl "vowel_start")))
|
| 338 |
+
|
| 339 |
+
(def_feature_docstring 'SylStructure.lisp_get_rhyme_length
|
| 340 |
+
"Syllable.lisp_get_rhyme_length
|
| 341 |
+
Length from start of the vowel to end of syllable.")
|
| 342 |
+
(define (get_rhyme_length syl)
|
| 343 |
+
(- (item.feat syl 'end)
|
| 344 |
+
(item.feat syl 'vowel_start syl)))
|
| 345 |
+
|
| 346 |
+
(def_feature_docstring 'SylStructure.lisp_get_onset_length
|
| 347 |
+
"Syllable.lisp_get_onset_length
|
| 348 |
+
Length from start of syllable to start of vowel.")
|
| 349 |
+
(define (get_onset_length syl)
|
| 350 |
+
(cond
|
| 351 |
+
((< (- (item.feat syl 'vowel_start)
|
| 352 |
+
(item.feat syl 'start))
|
| 353 |
+
0.000)
|
| 354 |
+
0.000) ;; just in case
|
| 355 |
+
(t
|
| 356 |
+
(- (item.feat syl 'vowel_start)
|
| 357 |
+
(item.feat syl 'start)))))
|
| 358 |
+
|
| 359 |
+
(def_feature_docstring 'Syllable.lisp_tilt_accent
|
| 360 |
+
"Syllable.lisp_tilt_accent
|
| 361 |
+
Returns \"a\" if there is a tilt accent related to this syllable, 0
|
| 362 |
+
otherwise.")
|
| 363 |
+
(define (tilt_accent syl)
|
| 364 |
+
(let ((events (item.relation.daughters syl 'IntonationSyllable))
|
| 365 |
+
(r "0"))
|
| 366 |
+
(mapcar
|
| 367 |
+
(lambda (i)
|
| 368 |
+
(if (member_string (item.name i) tilt_accent_list)
|
| 369 |
+
(set! r "a")))
|
| 370 |
+
events)
|
| 371 |
+
r))
|
| 372 |
+
|
| 373 |
+
(def_feature_docstring 'Syllable.lisp_tilt_boundary
|
| 374 |
+
"Syllable.lisp_tilt_boundary
|
| 375 |
+
Returns boundary label if there is a tilt boundary related to this
|
| 376 |
+
syllable, 0 otherwise.")
|
| 377 |
+
(define (tilt_boundary syl)
|
| 378 |
+
(let ((events (item.relation.daughters syl 'IntonationSyllable))
|
| 379 |
+
(r "0"))
|
| 380 |
+
(mapcar
|
| 381 |
+
(lambda (i)
|
| 382 |
+
(let ((name (item.name i)))
|
| 383 |
+
(if (member_string name tilt_boundary_list)
|
| 384 |
+
(cond
|
| 385 |
+
((string-matches name "a.*")
|
| 386 |
+
(set! r (string-after name "a")))
|
| 387 |
+
((string-matches name "m.*")
|
| 388 |
+
(set! r (string-after name "m")))
|
| 389 |
+
(t
|
| 390 |
+
(set! r name))))))
|
| 391 |
+
events)
|
| 392 |
+
r))
|
| 393 |
+
|
| 394 |
+
(def_feature_docstring 'Syllable.lisp_tilt_accented
|
| 395 |
+
"Syllable.lisp_tilt_accented
|
| 396 |
+
Returns 1 if there is a tilt accent related to this syllable, 0
|
| 397 |
+
otherwise.")
|
| 398 |
+
(define (tilt_accented syl)
|
| 399 |
+
(let ((events (item.relation.daughters syl 'IntonationSyllable))
|
| 400 |
+
(r "0"))
|
| 401 |
+
(mapcar
|
| 402 |
+
(lambda (i)
|
| 403 |
+
(if (member_string (item.name i) tilt_accent_list)
|
| 404 |
+
(set! r "1")))
|
| 405 |
+
events)
|
| 406 |
+
r))
|
| 407 |
+
|
| 408 |
+
(def_feature_docstring 'Syllable.lisp_tilt_boundaried
|
| 409 |
+
"Syllable.lisp_tilt_boundaried
|
| 410 |
+
Returns 1 if there is a tilt boundary related to this syllable, 0
|
| 411 |
+
otherwise.")
|
| 412 |
+
(define (tilt_boundaried syl)
|
| 413 |
+
(let ((events (item.relation.daughters syl 'IntonationSyllable))
|
| 414 |
+
(r "0"))
|
| 415 |
+
(mapcar
|
| 416 |
+
(lambda (i)
|
| 417 |
+
(if (member_string (item.name i) tilt_boundary_list)
|
| 418 |
+
(set! r "1")))
|
| 419 |
+
events)
|
| 420 |
+
r))
|
| 421 |
+
|
| 422 |
+
(def_feature_docstring 'SylStructure.lisp_vowel_height
|
| 423 |
+
"SylStructure.lisp_vowel_height syl
|
| 424 |
+
Classifies vowels as high, low or mid")
|
| 425 |
+
(define (vowel_height syl)
|
| 426 |
+
(let ((vh (item.feat syl "daughtern.daughter1.daughter1.df.height")))
|
| 427 |
+
vh)
|
| 428 |
+
)
|
| 429 |
+
|
| 430 |
+
(def_feature_docstring 'SylStructure.lisp_vowel_frontness
|
| 431 |
+
"SylStructure.vowel_frontness syl
|
| 432 |
+
Classifies vowels as front, back or mid")
|
| 433 |
+
(define (vowel_frontness syl)
|
| 434 |
+
(let ((vf (item.feat syl "daughtern.daughter1.daughter1.df.front")))
|
| 435 |
+
vf)
|
| 436 |
+
)
|
| 437 |
+
|
| 438 |
+
(def_feature_docstring 'SylStructure.lisp_vowel_length
|
| 439 |
+
"SylStructure.vowel_length syl
|
| 440 |
+
Returns the df.length feature of a syllable's vowel")
|
| 441 |
+
(define (vowel_length syl)
|
| 442 |
+
(let ((vl (item.feat syl "daughtern.daughter1.daughter1.df.length")))
|
| 443 |
+
vl)
|
| 444 |
+
)
|
| 445 |
+
|
| 446 |
+
(defvar sonority_vless_obst '("f" "h" "hh" "k" "p" "s" "sh" "t" "th" "ch")
|
| 447 |
+
"sonority_vless_obst
|
| 448 |
+
List of voiceless obstruents for use in sonority scaling (only good w/ radio_speech)"
|
| 449 |
+
)
|
| 450 |
+
(defvar sonority_v_obst '("v" "b" "g" "z" "zh" "d" "dh" "jh")
|
| 451 |
+
"sonority_v_obst
|
| 452 |
+
List of voiced obstruents for use in sonority scaling (only good w/ radio_speech)"
|
| 453 |
+
)
|
| 454 |
+
(defvar sonority_nas '("m" "n" "ng" "nx" "em" "en")
|
| 455 |
+
"sonority_nas
|
| 456 |
+
List of nasals (only good w/ radio_speech)"
|
| 457 |
+
)
|
| 458 |
+
(defvar sonority_liq '("r" "l" "er" "el" "axr")
|
| 459 |
+
"sonority_liq
|
| 460 |
+
List of liquids (only good w/ radio_speech)"
|
| 461 |
+
)
|
| 462 |
+
(defvar sonority_glides '("y" "w")
|
| 463 |
+
"sonority_glides
|
| 464 |
+
List of glides (only good w/ radio_speech)"
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
+
(def_feature_docstring 'SylStructure.lisp_sonority_scale_coda
|
| 468 |
+
"SylStructure.sonority_scale_coda syl
|
| 469 |
+
Returns value on sonority scale (1 -6, where 6 is most sonorous)
|
| 470 |
+
for the coda of a syllable, based on least sonorant portion.")
|
| 471 |
+
(define (sonority_scale_coda syl)
|
| 472 |
+
(let ((segs (item.daughters (item.daughtern (item.daughtern syl))))
|
| 473 |
+
(scale 6))
|
| 474 |
+
(mapcar
|
| 475 |
+
(lambda (seg)
|
| 476 |
+
(cond
|
| 477 |
+
((member_string (item.name seg) sonority_vless_obst)
|
| 478 |
+
(if (> scale 1)
|
| 479 |
+
(set! scale 1)))
|
| 480 |
+
((member_string (item.name seg) sonority_v_obst)
|
| 481 |
+
(if (> scale 2)
|
| 482 |
+
(set! scale 2)))
|
| 483 |
+
((member_string (item.name seg) sonority_nas)
|
| 484 |
+
(if (> scale 3)
|
| 485 |
+
(set! scale 3)))
|
| 486 |
+
((member_string (item.name seg) sonority_liq)
|
| 487 |
+
(if (> scale 4)
|
| 488 |
+
(set! scale 4)))
|
| 489 |
+
((member_string (item.name seg) sonority_glides)
|
| 490 |
+
(if (> scale 5)
|
| 491 |
+
(set! scale 5)))
|
| 492 |
+
(t
|
| 493 |
+
(if (> scale 6)
|
| 494 |
+
(set! scale 6)))
|
| 495 |
+
)
|
| 496 |
+
)
|
| 497 |
+
segs)
|
| 498 |
+
scale))
|
| 499 |
+
|
| 500 |
+
(def_feature_docstring 'SylStructure.lisp_sonority_scale_onset
|
| 501 |
+
"SylStructure.sonority_scale_onset syl
|
| 502 |
+
Returns value on sonority scale (1 -6, where 6 is most sonorous)
|
| 503 |
+
for the onset of a syllable, based on least sonorant portion.")
|
| 504 |
+
(define (sonority_scale_onset syl)
|
| 505 |
+
(if (string-equal "Onset" (item.feat (item.daughter1 syl) "sylval"))
|
| 506 |
+
(let ((segs (item.daughters (item.daughter1 syl)))
|
| 507 |
+
(scale 6))
|
| 508 |
+
(mapcar
|
| 509 |
+
(lambda (seg)
|
| 510 |
+
(cond
|
| 511 |
+
((member_string (item.name seg) sonority_vless_obst)
|
| 512 |
+
(if (> scale 1)
|
| 513 |
+
(set! scale 1)))
|
| 514 |
+
((member_string (item.name seg) sonority_v_obst)
|
| 515 |
+
(if (> scale 2)
|
| 516 |
+
(set! scale 2)))
|
| 517 |
+
((member_string (item.name seg) sonority_nas)
|
| 518 |
+
(if (> scale 3)
|
| 519 |
+
(set! scale 3)))
|
| 520 |
+
((member_string (item.name seg) sonority_liq)
|
| 521 |
+
(if (> scale 4)
|
| 522 |
+
(set! scale 4)))
|
| 523 |
+
((member_string (item.name seg) sonority_glides)
|
| 524 |
+
(if (> scale 5)
|
| 525 |
+
(set! scale 5)))
|
| 526 |
+
(t (set! scale 6))
|
| 527 |
+
)
|
| 528 |
+
)
|
| 529 |
+
segs)
|
| 530 |
+
scale)
|
| 531 |
+
0))
|
| 532 |
+
|
| 533 |
+
(def_feature_docstring 'SylStructure.lisp_num_postvocalic_c
|
| 534 |
+
"SylStructure.lisp_num_postvocalic_c
|
| 535 |
+
Finds the number of postvocalic consonants in a syllable.")
|
| 536 |
+
(define (num_postvocalic_c syl)
|
| 537 |
+
"(num_postvocalic_c syl)
|
| 538 |
+
Finds the number of postvocalic consonants in a syllable."
|
| 539 |
+
(let (segs (npc 0))
|
| 540 |
+
(set! segs (item.daughters (item.daughtern (item.daughtern syl))))
|
| 541 |
+
(mapcar
|
| 542 |
+
(lambda (seg)
|
| 543 |
+
(set! npc (+ npc 1))
|
| 544 |
+
)
|
| 545 |
+
segs)
|
| 546 |
+
npc))
|
| 547 |
+
|
| 548 |
+
|
| 549 |
+
(def_feature_docstring 'SylStructure.lisp_syl_numphones
|
| 550 |
+
"SylStructure.lisp_syl_numphones syl
|
| 551 |
+
Finds the number segments in a syllable.")
|
| 552 |
+
(define (syl_numphones syl)
|
| 553 |
+
(length (mt_segs_from_syl syl))
|
| 554 |
+
)
|
| 555 |
+
|
| 556 |
+
(def_feature_docstring 'Segment.lisp_pos_in_syl
|
| 557 |
+
"Segment.lisp_pos_in_syl seg
|
| 558 |
+
Finds the position in a syllable of a segment - returns a number.")
|
| 559 |
+
(define (pos_in_syl seg)
|
| 560 |
+
(let ((segments (mt_segs_from_syl
|
| 561 |
+
(item.relation (item.parent_to
|
| 562 |
+
(item.relation seg 'SylStructure)
|
| 563 |
+
'Syllable)
|
| 564 |
+
'SylStructure)))
|
| 565 |
+
(seg_count 1))
|
| 566 |
+
(mapcar
|
| 567 |
+
(lambda (s)
|
| 568 |
+
(if (not (eqv? s seg))
|
| 569 |
+
(set! seg_count (+ 1.0 seg_count))
|
| 570 |
+
nil))
|
| 571 |
+
segs)
|
| 572 |
+
seg_count))
|
| 573 |
+
|
| 574 |
+
(def_feature_docstring 'Intonation.lisp_peak_anchor_segment_type
|
| 575 |
+
"Intonation.peak_anchor_segment_type ie
|
| 576 |
+
Determines whether the segment anchor for a peak
|
| 577 |
+
is the first consonant of a syl - C0 -, the
|
| 578 |
+
vowel of a syl - V0 -, or segments after that
|
| 579 |
+
- C1->X,V1->X. If the segment is in a following syl,
|
| 580 |
+
the return value will be preceded by a 1 - e.g. 1V1")
|
| 581 |
+
(define (peak_anchor_segment_type ie)
|
| 582 |
+
(let ( syl peak_anchor_num numsegs peak_anchor_type)
|
| 583 |
+
(set! peak_anchor_num (peak_segment_anchor ie))
|
| 584 |
+
|
| 585 |
+
|
| 586 |
+
(if (> 9 peak_anchor_num)
|
| 587 |
+
(set! syl (item.relation
|
| 588 |
+
(item.parent (item.relation ie "IntonationSyllable"))
|
| 589 |
+
"SylStructure")))
|
| 590 |
+
(if (> 9 peak_anchor_num)
|
| 591 |
+
(set! numsegs (item.feat syl "syl_numphones")))
|
| 592 |
+
|
| 593 |
+
(cond
|
| 594 |
+
((< 9 peak_anchor_num)
|
| 595 |
+
(set! peak_anchor_type "none"))
|
| 596 |
+
((> 0 peak_anchor_num)
|
| 597 |
+
(set! peak_anchor_type
|
| 598 |
+
(string-append
|
| 599 |
+
"-1" (get_anchor_value (item.prev syl)
|
| 600 |
+
(+ peak_anchor_num
|
| 601 |
+
(item.feat syl "p.syl_numphones"))))))
|
| 602 |
+
((< peak_anchor_num numsegs)
|
| 603 |
+
(set! peak_anchor_type (get_anchor_value syl numsegs)))
|
| 604 |
+
((> peak_anchor_num numsegs)
|
| 605 |
+
(set! peak_anchor_type
|
| 606 |
+
(string-append
|
| 607 |
+
"1" (get_anchor_value (item.next syl) (- peak_anchor_num numsegs)))))
|
| 608 |
+
(set! peak_anchor_type "none"))
|
| 609 |
+
; (format stderr "pat: %s\n" peak_anchor_type)
|
| 610 |
+
peak_anchor_type))
|
| 611 |
+
|
| 612 |
+
(define (get_anchor_value sylSyl seg_num)
|
| 613 |
+
"(get_anchor_value sylSyl seg_num)
|
| 614 |
+
Gets the c/v value of the segment within a syllable."
|
| 615 |
+
(let ((syl (item.relation sylSyl "SylStructure"))
|
| 616 |
+
(seg_val "none") segs (ccnt -1) (vcnt -1) (vpis 0))
|
| 617 |
+
(set! segs (mt_segs_from_syl sylSyl))
|
| 618 |
+
(mapcar
|
| 619 |
+
(lambda (seg)
|
| 620 |
+
(cond
|
| 621 |
+
((string-equal "consonant" (item.feat seg "df.type"))
|
| 622 |
+
(set! vcnt (+ 1 vcnt))
|
| 623 |
+
(set! vpis (item.feat seg "pos_in_syl")))
|
| 624 |
+
(t
|
| 625 |
+
(set! ccnt (+ 1 ccnt))))
|
| 626 |
+
(cond
|
| 627 |
+
((and
|
| 628 |
+
(eq (- seg_num 1.0) (item.feat seg "pos_in_syl"))
|
| 629 |
+
( string-equal "consonant" (item.feat seg "df.type")))
|
| 630 |
+
(set! seg_val (string-append "V" vcnt)))
|
| 631 |
+
((and
|
| 632 |
+
(eq (- seg_num 1.0) (item.feat seg "pos_in_syl"))
|
| 633 |
+
( string-equal "vowel" (item.feat seg "df.type")))
|
| 634 |
+
(set! seg_val (string-append "C" (- (item.feat seg "pos_in_syl")
|
| 635 |
+
vpis) "V" vcnt)))
|
| 636 |
+
(t nil))
|
| 637 |
+
)
|
| 638 |
+
segs)
|
| 639 |
+
seg_val))
|
| 640 |
+
|
| 641 |
+
(define (peak_segment_anchor ie)
|
| 642 |
+
"peak_segment_anchor ie
|
| 643 |
+
Determines what segment acts as the anchor for a peak.
|
| 644 |
+
Returns number of segments from start of accented syllable
|
| 645 |
+
to peak."
|
| 646 |
+
; (format stderr "accent: %s\n"
|
| 647 |
+
; (item.name ie))
|
| 648 |
+
(let ((pk_pos (item.feat ie "position"))
|
| 649 |
+
(peak_seg_anchor 11))
|
| 650 |
+
(if
|
| 651 |
+
(or
|
| 652 |
+
(string-equal "phrase_start" (item.name ie))
|
| 653 |
+
(string-equal "phrase_end" (item.name ie))
|
| 654 |
+
(string-equal "pause" (item.name ie)))
|
| 655 |
+
(set! peak_seg_anchor 10)
|
| 656 |
+
(set! peak_seg_anchor (find_peak_seg_anchor ie pk_pos)))
|
| 657 |
+
peak_seg_anchor))
|
| 658 |
+
|
| 659 |
+
(define (find_peak_seg_anchor ie pk_pos)
|
| 660 |
+
"find_peak_seg_anchor ie pk_pos
|
| 661 |
+
Part of the workings of peak_segment_anchor."
|
| 662 |
+
(let (( syl (item.relation
|
| 663 |
+
(item.parent (item.relation ie 'IntonationSyllable))
|
| 664 |
+
'SylStructure))
|
| 665 |
+
(seg_anchor 11))
|
| 666 |
+
(cond
|
| 667 |
+
((not (eq 9.0 (segs_to_peak syl pk_pos)))
|
| 668 |
+
(set! seg_anchor (segs_to_peak syl pk_pos)))
|
| 669 |
+
|
| 670 |
+
((and (item.prev syl)
|
| 671 |
+
(not (eq 9.0 (segs_to_peak (item.prev syl) pk_pos))))
|
| 672 |
+
; (format stderr "%s\n" (item.name (item.prev syl)))
|
| 673 |
+
(set! seg_anchor (* -1
|
| 674 |
+
(- (+ 1 (item.feat syl "p.syl_numphones"))
|
| 675 |
+
(segs_to_peak (item.prev syl) pk_pos)))))
|
| 676 |
+
|
| 677 |
+
((and (item.next syl)
|
| 678 |
+
(> pk_pos (item.feat syl "n.start")))
|
| 679 |
+
; (format stderr "%s\n" (item.name (item.next syl)))
|
| 680 |
+
(set! seg_anchor (+ 1
|
| 681 |
+
(item.feat syl "syl_numphones")
|
| 682 |
+
(segs_to_peak (item.next syl) pk_pos))))
|
| 683 |
+
(t
|
| 684 |
+
(format stderr "No seg anchor could be found\n")))
|
| 685 |
+
; (format stderr "seg_anchor: %f\n" seg_anchor)
|
| 686 |
+
seg_anchor))
|
| 687 |
+
|
| 688 |
+
(define (segs_to_peak sylSyl pk_pos)
|
| 689 |
+
"(segs_to_peak sylSyl pk_pos)
|
| 690 |
+
Determines the number of segments from the start of a syllable
|
| 691 |
+
to an intonation peak"
|
| 692 |
+
(let ((syl (item.relation sylSyl "SylStructure"))
|
| 693 |
+
(segs_2_peak 9) segs)
|
| 694 |
+
(set! segs (mt_segs_from_syl syl))
|
| 695 |
+
(mapcar
|
| 696 |
+
(lambda (seg)
|
| 697 |
+
; (format stderr "seg_end: %f pk: %f\n" (item.feat seg "end")
|
| 698 |
+
; pk_pos)
|
| 699 |
+
(if (eq 1.0 (peak_wi_seg seg pk_pos))
|
| 700 |
+
(set! segs_2_peak (item.feat seg "pos_in_syl")))
|
| 701 |
+
; (format stderr "segs_2_peak: %f\n" segs_2_peak)
|
| 702 |
+
)
|
| 703 |
+
segs)
|
| 704 |
+
segs_2_peak))
|
| 705 |
+
|
| 706 |
+
(define (peak_wi_seg segment pk_pos)
|
| 707 |
+
"peak_wi_seg segment pk_pos
|
| 708 |
+
Finds if a peak occurs w/i a segment"
|
| 709 |
+
(let ((s_start (item.feat segment "start"))
|
| 710 |
+
(s_end (item.feat segment "end"))
|
| 711 |
+
(ret 0.0))
|
| 712 |
+
(if (and (< s_start pk_pos)
|
| 713 |
+
(< pk_pos s_end))
|
| 714 |
+
(set! ret 1.0)
|
| 715 |
+
nil)
|
| 716 |
+
ret))
|
| 717 |
+
|
| 718 |
+
(defvar tilt_accent_list '("a" "arb" "afb" "m" "mfb" "mrb")
|
| 719 |
+
"tilt_accent_list
|
| 720 |
+
List of events containing accents in tilt model.")
|
| 721 |
+
(defvar tilt_boundary_list '("rb" "arb" "afb" "fb" "mfb" "mrb")
|
| 722 |
+
"tilt_boundary_list
|
| 723 |
+
List of events containing boundaries in tilt model.")
|
| 724 |
+
|
| 725 |
+
(def_feature_docstring 'Intonation.lisp_last_tilt_accent
|
| 726 |
+
"Intonation.lisp_last_tilt_accent
|
| 727 |
+
Returns the most recent tilt accent.")
|
| 728 |
+
(define (last_tilt_accent intev)
|
| 729 |
+
(let ((pie (item.relation.prev intev 'Intonation)))
|
| 730 |
+
(cond
|
| 731 |
+
((not pie)
|
| 732 |
+
"0")
|
| 733 |
+
((member_string (item.name pie) tilt_accent_list)
|
| 734 |
+
(item.name pie))
|
| 735 |
+
(t (last_tilt_accent pie)))))
|
| 736 |
+
|
| 737 |
+
(def_feature_docstring 'Intonation.lisp_next_tilt_accent
|
| 738 |
+
"Intonation.lisp_next_tilt_accent
|
| 739 |
+
Returns the next tilt accent.")
|
| 740 |
+
(define (next_tilt_accent intev)
|
| 741 |
+
(let ((nie (item.relation.next intev 'Intonation)))
|
| 742 |
+
(cond
|
| 743 |
+
((not nie) "0")
|
| 744 |
+
((member_string (item.name nie) tilt_accent_list)
|
| 745 |
+
(item.name nie))
|
| 746 |
+
(t (next_tilt_accent nie)))))
|
| 747 |
+
|
| 748 |
+
(def_feature_docstring 'Intonation.lisp_last_tilt_boundary
|
| 749 |
+
"Intonation.lisp_last_tilt_boundary
|
| 750 |
+
Returns the most recent tilt boundary.")
|
| 751 |
+
(define (last_tilt_boundary intev)
|
| 752 |
+
(let ((pie (item.relation.prev intev 'Intonation)))
|
| 753 |
+
(cond
|
| 754 |
+
((not pie) "0")
|
| 755 |
+
((member_string (item.name pie) tilt_boundary_list)
|
| 756 |
+
(item.name pie))
|
| 757 |
+
(t (last_tilt_boundary pie)))))
|
| 758 |
+
|
| 759 |
+
(def_feature_docstring 'Intonation.lisp_next_tilt_boundary
|
| 760 |
+
"Intonation.lisp_next_tilt_boundary
|
| 761 |
+
Returns the next tilt boundary.")
|
| 762 |
+
(define (next_tilt_boundary intev)
|
| 763 |
+
(let ((nie (item.relation.next intev 'Intonation)))
|
| 764 |
+
(cond
|
| 765 |
+
((not nie) "0")
|
| 766 |
+
((member_string (item.name nie) tilt_boundary_list)
|
| 767 |
+
(item.name nie))
|
| 768 |
+
(t (next_tilt_boundary nie)))))
|
| 769 |
+
|
| 770 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 771 |
+
;;; Some basic function to metrical tree structure
|
| 772 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 773 |
+
(define (mt_syl_from_seg seg)
|
| 774 |
+
(if seg
|
| 775 |
+
(item.root (item.relation seg 'SylStructure))
|
| 776 |
+
nil))
|
| 777 |
+
(define (mt_word_from_syl syl)
|
| 778 |
+
(if syl
|
| 779 |
+
(item.root (item.relation syl 'WordStructure))
|
| 780 |
+
nil))
|
| 781 |
+
(define (mt_word_from_seg seg)
|
| 782 |
+
(mt_word_from_syl (mt_syl_from_seg seg)))
|
| 783 |
+
|
| 784 |
+
(define (mt_segs_from_syl s)
|
| 785 |
+
(cond
|
| 786 |
+
((null s) nil)
|
| 787 |
+
((member_string 'Segment (item.relations s))
|
| 788 |
+
(list s))
|
| 789 |
+
(t
|
| 790 |
+
(apply
|
| 791 |
+
append
|
| 792 |
+
(mapcar mt_segs_from_syl (item.relation.daughters s 'SylStructure))))))
|
| 793 |
+
|
| 794 |
+
(define (sylmtval s)
|
| 795 |
+
(let ((syl (mt_syl_from_seg s)))
|
| 796 |
+
(if syl
|
| 797 |
+
(item.feat syl "MetricalValue")
|
| 798 |
+
"0")))
|
| 799 |
+
|
| 800 |
+
(define (sylpmtval s)
|
| 801 |
+
(let ((syl (mt_syl_from_seg s)))
|
| 802 |
+
(if syl
|
| 803 |
+
(item.feat syl "R:MetricalTree.parent.MetricalValue")
|
| 804 |
+
"0")))
|
| 805 |
+
|
| 806 |
+
(define (mt_numsyls w)
|
| 807 |
+
(let ((s1 (item.daughter1_to (item.relation w 'WordStructure) 'Syllable))
|
| 808 |
+
(sn (item.daughtern_to (item.relation w 'WordStructure) 'Syllable))
|
| 809 |
+
(count 1))
|
| 810 |
+
(while (and s1 (not (equal? s1 sn)))
|
| 811 |
+
(set! count (+ 1 count))
|
| 812 |
+
(set! s1 (item.next s1)))
|
| 813 |
+
(if s1
|
| 814 |
+
count
|
| 815 |
+
0)))
|
| 816 |
+
|
| 817 |
+
(define (mt_seg_numsyls s)
|
| 818 |
+
(let ((w (mt_word_from_seg s)))
|
| 819 |
+
(if w
|
| 820 |
+
(mt_num_syls w)
|
| 821 |
+
0)))
|
| 822 |
+
|
| 823 |
+
|
| 824 |
+
;;; These functions should be sort out some time
|
| 825 |
+
|
| 826 |
+
;;; Difference between this syl and the next
|
| 827 |
+
;;; number of closing brackets, number of opening brackets
|
| 828 |
+
;;; difference between them
|
| 829 |
+
|
| 830 |
+
(define (mt_close n)
|
| 831 |
+
"(mt_close n)
|
| 832 |
+
The number of consituents this is the end of, Effectively the
|
| 833 |
+
number of closing brackets after this word."
|
| 834 |
+
(if (or (not n) (item.next n))
|
| 835 |
+
0
|
| 836 |
+
(+ 1 (mt_close (item.parent n)))))
|
| 837 |
+
|
| 838 |
+
(define (mt_open n)
|
| 839 |
+
"(mt_open n)
|
| 840 |
+
The number of consituents this is the start of, Effectively the
|
| 841 |
+
number of opening brackets before this word."
|
| 842 |
+
(if (or (not n) (item.prev n))
|
| 843 |
+
0
|
| 844 |
+
(+ 1 (mt_open (item.parent n)))))
|
| 845 |
+
|
| 846 |
+
(define (mt_postype syl)
|
| 847 |
+
"(mt_postype syl)
|
| 848 |
+
Returns single, initial, final or middle."
|
| 849 |
+
(let ((w (mt_word_from_syl syl))
|
| 850 |
+
(psw (mt_word_from_syl (item.relation.prev syl 'Syllable)))
|
| 851 |
+
(nsw (mt_word_from_syl (item.relation.next syl 'Syllable))))
|
| 852 |
+
(cond
|
| 853 |
+
((and (equal? w psw)
|
| 854 |
+
(equal? w nsw))
|
| 855 |
+
'middle)
|
| 856 |
+
((and (not (equal? w psw))
|
| 857 |
+
(not (equal? w nsw)))
|
| 858 |
+
'single)
|
| 859 |
+
((equal? w psw)
|
| 860 |
+
'final)
|
| 861 |
+
(t
|
| 862 |
+
'initial))))
|
| 863 |
+
|
| 864 |
+
(define (mt_accent syl)
|
| 865 |
+
"(mt_accent syl)
|
| 866 |
+
Accent or 0 if none."
|
| 867 |
+
(let ((a 0))
|
| 868 |
+
(mapcar
|
| 869 |
+
(lambda (i)
|
| 870 |
+
(if (string-matches (item.name i) "^a.*")
|
| 871 |
+
(set! a "a")))
|
| 872 |
+
(item.relation.daughters syl 'IntonationSyllable))
|
| 873 |
+
a))
|
| 874 |
+
|
| 875 |
+
(define (mt_break syl)
|
| 876 |
+
"(mt_break syl)
|
| 877 |
+
Break or 0 if none."
|
| 878 |
+
(let ((a 0))
|
| 879 |
+
(mapcar
|
| 880 |
+
(lambda (i)
|
| 881 |
+
(if (string-matches (item.name i) ".*b$")
|
| 882 |
+
(set! a (item.name i))))
|
| 883 |
+
(item.relation.daughters syl 'IntonationSyllable))
|
| 884 |
+
a))
|
| 885 |
+
|
| 886 |
+
(define (mt_ssyl_out s)
|
| 887 |
+
(cond
|
| 888 |
+
((null s) 0)
|
| 889 |
+
((not (string-equal
|
| 890 |
+
"0" (item.feat s "R:WordStructure.root.lisp_word_mt_break")))
|
| 891 |
+
0)
|
| 892 |
+
((string-equal "s" (item.feat s "MetricalValue"))
|
| 893 |
+
(+ 1 (mt_ssyl_out (item.relation.next s 'Syllable))))
|
| 894 |
+
(t
|
| 895 |
+
(mt_ssyl_out (item.relation.next s 'Syllable)))))
|
| 896 |
+
|
| 897 |
+
(define (mt_num_s s)
|
| 898 |
+
"(mt_num_s s)
|
| 899 |
+
The number of s MetricalValues from here to a w or top."
|
| 900 |
+
(cond
|
| 901 |
+
((null s) 0)
|
| 902 |
+
((string-equal "w" (item.feat s "MetricalValue"))
|
| 903 |
+
0)
|
| 904 |
+
(t
|
| 905 |
+
(+ 1 (mt_num_s (item.parent s))))))
|
| 906 |
+
|
| 907 |
+
(define (mt_num_w s)
|
| 908 |
+
"(mt_num_w s)
|
| 909 |
+
The number of w MetricalValues from here to a s or top."
|
| 910 |
+
(cond
|
| 911 |
+
((null s) 0)
|
| 912 |
+
((string-equal "s" (item.feat s "MetricalValue"))
|
| 913 |
+
0)
|
| 914 |
+
(t
|
| 915 |
+
(+ 1 (mt_num_w (item.parent s))))))
|
| 916 |
+
|
| 917 |
+
(define (mt_strong s)
|
| 918 |
+
"(mt_strong s)
|
| 919 |
+
1 if all MetricalValues a s to a word, 0 otherwise."
|
| 920 |
+
(cond
|
| 921 |
+
((string-equal "w" (item.feat s "MetricalValue"))
|
| 922 |
+
"0")
|
| 923 |
+
((member_string 'Word (item.relations s)) "1")
|
| 924 |
+
(t
|
| 925 |
+
(mt_strong (item.relation.parent s 'MetricalTree)))))
|
| 926 |
+
|
| 927 |
+
(define (mt_lssp s)
|
| 928 |
+
"(mt_lssp s)
|
| 929 |
+
1 if last stressed syllable in phrase, 0 otherwise."
|
| 930 |
+
(if (and (string-equal "s" (item.feat s "MetricalValue"))
|
| 931 |
+
(equal? 0 (mt_ssyl_out s)))
|
| 932 |
+
"1"
|
| 933 |
+
"0"))
|
| 934 |
+
|
| 935 |
+
(define (mt_fssw s)
|
| 936 |
+
"(mt_fssw s)
|
| 937 |
+
1 if first stressed syllable in word, 0 otherwise."
|
| 938 |
+
(if (and (string-equal "s" (item.feat s "MetricalValue"))
|
| 939 |
+
(mt_no_stress_before (item.relation.prev s 'Syllable)))
|
| 940 |
+
"1"
|
| 941 |
+
"0"))
|
| 942 |
+
|
| 943 |
+
(define (mt_nfssw s)
|
| 944 |
+
"(nfssw s)
|
| 945 |
+
1 if second or later stressed syllable in word, 0 otherwise."
|
| 946 |
+
(if (and (string-equal "s" (item.feat s "MetricalValue"))
|
| 947 |
+
(null (mt_no_stress_before (item.relation.prev s 'Syllable))))
|
| 948 |
+
"1"
|
| 949 |
+
"0"))
|
| 950 |
+
|
| 951 |
+
(define (mt_no_stress_before ss)
|
| 952 |
+
(cond
|
| 953 |
+
((null ss) t)
|
| 954 |
+
((not (string-equal
|
| 955 |
+
(item.feat ss "R:WordStructure.root.addr")
|
| 956 |
+
(item.feat (item.next ss) "R:WordStructure.root.addr")))
|
| 957 |
+
t)
|
| 958 |
+
((string-equal "s" (item.feat ss "MetricalValue"))
|
| 959 |
+
nil)
|
| 960 |
+
(t
|
| 961 |
+
(mt_no_stress_before (item.prev ss)))))
|
| 962 |
+
|
| 963 |
+
(define (word_mt_break w)
|
| 964 |
+
(cond
|
| 965 |
+
((string-equal "1" (item.feat w "sentence_end"))
|
| 966 |
+
"BB")
|
| 967 |
+
((string-equal "1" (item.feat w "phrase_end"))
|
| 968 |
+
"B")
|
| 969 |
+
(t
|
| 970 |
+
"0")))
|
| 971 |
+
|
| 972 |
+
(provide 'tilt)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/tobi.scm
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; A CART tree for predicting ToBI accents (learned from f2b)
|
| 35 |
+
;;; punctuation and minimal pos
|
| 36 |
+
;;;
|
| 37 |
+
|
| 38 |
+
; NON !H* L+H L*+
|
| 39 |
+
; NONE10265 583 66 40 0 0 10954 [10265/10954] 93.710
|
| 40 |
+
; H* 650 1805 61 57 0 0 2573 [1805/2573] 70.152
|
| 41 |
+
; !H* 317 241 125 42 0 0 725 [125/725] 17.241
|
| 42 |
+
; L+H* 457 486 76 80 0 0 1099 [80/1099] 7.279
|
| 43 |
+
; L* 45 113 14 4 0 0 176 [0/176] 0.000
|
| 44 |
+
; L*+H 6 6 0 1 0 0 13 [0/13] 0.000
|
| 45 |
+
; 11740 3234 342 224 0 0
|
| 46 |
+
;total 15540 correct 12275.000 78.990%
|
| 47 |
+
|
| 48 |
+
(set! f2b_int_accent_cart_tree
|
| 49 |
+
'
|
| 50 |
+
;; these first few lines are hand written to deal with emphasis (from ssml)
|
| 51 |
+
((R:SylStructure.parent.R:Token.parent.EMPH is 1)
|
| 52 |
+
(((NONE 0.0) (H* 1) (!H* 0.0) (L+H* 0) (L* 0) (L*+H 0) H*))
|
| 53 |
+
((n.R:SylStructure.parent.R:Token.parent.EMPH is 1)
|
| 54 |
+
(((NONE 1.0) (H* 0) (!H* 0.0) (L+H* 0) (L* 0) (L*+H 0) NONE))
|
| 55 |
+
((p.R:SylStructure.parent.R:Token.parent.EMPH is 1)
|
| 56 |
+
(((NONE 1.0) (H* 0) (!H* 0.0) (L+H* 0) (L* 0) (L*+H 0) NONE))
|
| 57 |
+
|
| 58 |
+
((ssyl_in is 10)
|
| 59 |
+
(((NONE 0.99726) (H* 0) (!H* 0.00273973) (L+H* 0) (L* 0) (L*+H 0) NONE))
|
| 60 |
+
((R:SylStructure.parent.gpos is to)
|
| 61 |
+
(((NONE 0.995984) (H* 0.00401606) (!H* 0) (L+H* 0) (L* 0) (L*+H 0) NONE))
|
| 62 |
+
((R:SylStructure.parent.gpos is cc)
|
| 63 |
+
(((NONE 0.987768) (H* 0.00611621) (!H* 0) (L+H* 0.00611621) (L* 0) (L*+H 0) NONE))
|
| 64 |
+
((ssyl_out is 10)
|
| 65 |
+
(((NONE 0.927273) (H* 0.0545455) (!H* 0) (L+H* 0.0181818) (L* 0) (L*+H 0) NONE))
|
| 66 |
+
((R:SylStructure.parent.gpos is in)
|
| 67 |
+
(((NONE 0.938322) (H* 0.0353618) (!H* 0.00493421) (L+H* 0.0197368) (L* 0.00164474) (L*+H 0) NONE))
|
| 68 |
+
((R:SylStructure.parent.gpos is wp)
|
| 69 |
+
(((NONE 0.895238) (H* 0.0857143) (!H* 0) (L+H* 0.0190476) (L* 0) (L*+H 0) NONE))
|
| 70 |
+
((R:SylStructure.parent.gpos is aux)
|
| 71 |
+
(((NONE 0.912281) (H* 0.0380117) (!H* 0.00584795) (L+H* 0.0350877) (L* 0.00584795) (L*+H 0.00292398) NONE))
|
| 72 |
+
((R:SylStructure.parent.gpos is det)
|
| 73 |
+
(((NONE 0.898004) (H* 0.0643016) (!H* 0.00332594) (L+H* 0.0332594) (L* 0) (L*+H 0.00110865) NONE))
|
| 74 |
+
((stress is 0)
|
| 75 |
+
(((NONE 0.978415) (H* 0.0144999) (!H* 0.00164772) (L+H* 0.00510793) (L* 0.000329544) (L*+H 0) NONE))
|
| 76 |
+
((R:SylStructure.parent.R:Word.p.gpos is 0)
|
| 77 |
+
(((NONE 0.209877) (H* 0.716049) (!H* 0) (L+H* 0.0617284) (L* 0.0123457) (L*+H 0) H*))
|
| 78 |
+
((R:SylStructure.parent.gpos is md)
|
| 79 |
+
(((NONE 0.693548) (H* 0.177419) (!H* 0.0322581) (L+H* 0.0967742) (L* 0) (L*+H 0) NONE))
|
| 80 |
+
((p.syl_break is 3)
|
| 81 |
+
((syl_break is 1)
|
| 82 |
+
(((NONE 0.4375) (H* 0.416667) (!H* 0) (L+H* 0.135417) (L* 0.0104167) (L*+H 0) NONE))
|
| 83 |
+
(((NONE 0.171171) (H* 0.666667) (!H* 0) (L+H* 0.144144) (L* 0.018018) (L*+H 0) H*)))
|
| 84 |
+
((pp.syl_break is 4)
|
| 85 |
+
((R:SylStructure.parent.R:Word.pp.gpos is in)
|
| 86 |
+
(((NONE 0.0980392) (H* 0.803922) (!H* 0) (L+H* 0.0784314) (L* 0.0196078) (L*+H 0) H*))
|
| 87 |
+
((syl_out is 0)
|
| 88 |
+
(((NONE 0.0185185) (H* 0.796296) (!H* 0.037037) (L+H* 0.0925926) (L* 0.0555556) (L*+H 0) H*))
|
| 89 |
+
((R:SylStructure.parent.R:Word.n.gpos is in)
|
| 90 |
+
(((NONE 0.132353) (H* 0.676471) (!H* 0) (L+H* 0.161765) (L* 0.0294118) (L*+H 0) H*))
|
| 91 |
+
((syl_break is 0)
|
| 92 |
+
(((NONE 0.125) (H* 0.633929) (!H* 0.0133929) (L+H* 0.183036) (L* 0.0401786) (L*+H 0.00446429) H*))
|
| 93 |
+
((n.stress is 0)
|
| 94 |
+
(((NONE 0.364865) (H* 0.567568) (!H* 0) (L+H* 0.0540541) (L* 0.0135135) (L*+H 0) H*))
|
| 95 |
+
((p.syl_break is 0)
|
| 96 |
+
(((NONE 0.612903) (H* 0.290323) (!H* 0) (L+H* 0.0967742) (L* 0) (L*+H 0) NONE))
|
| 97 |
+
(((NONE 0.32) (H* 0.44) (!H* 0.02) (L+H* 0.22) (L* 0) (L*+H 0) H*))))))))
|
| 98 |
+
((ssyl_in is 0)
|
| 99 |
+
(((NONE 0.167769) (H* 0.628926) (!H* 0.0214876) (L+H* 0.142975) (L* 0.0363636) (L*+H 0.00247934) H*))
|
| 100 |
+
((ssyl_out is 4)
|
| 101 |
+
(((NONE 0.490385) (H* 0.240385) (!H* 0.0961538) (L+H* 0.163462) (L* 0.00961538) (L*+H 0) NONE))
|
| 102 |
+
((pp.syl_break is 3)
|
| 103 |
+
((R:SylStructure.parent.R:Word.p.gpos is content)
|
| 104 |
+
(((NONE 0.346154) (H* 0.346154) (!H* 0.0769231) (L+H* 0.192308) (L* 0.0384615) (L*+H 0) NONE))
|
| 105 |
+
(((NONE 0.160714) (H* 0.571429) (!H* 0.0178571) (L+H* 0.178571) (L* 0.0714286) (L*+H 0) H*)))
|
| 106 |
+
((syl_in is 2)
|
| 107 |
+
((n.stress is 0)
|
| 108 |
+
((R:SylStructure.parent.R:Word.p.gpos is in)
|
| 109 |
+
(((NONE 0.218182) (H* 0.618182) (!H* 0.0363636) (L+H* 0.0909091) (L* 0.0181818) (L*+H 0.0181818) H*))
|
| 110 |
+
((syl_out is 2)
|
| 111 |
+
(((NONE 0.0961538) (H* 0.634615) (!H* 0.0961538) (L+H* 0.134615) (L* 0.0384615) (L*+H 0) H*))
|
| 112 |
+
((R:SylStructure.parent.R:Word.p.gpos is content)
|
| 113 |
+
((syl_out is 4)
|
| 114 |
+
(((NONE 0.56) (H* 0.12) (!H* 0.08) (L+H* 0.24) (L* 0) (L*+H 0) NONE))
|
| 115 |
+
(((NONE 0.262821) (H* 0.378205) (!H* 0.121795) (L+H* 0.192308) (L* 0.0448718) (L*+H 0) H*)))
|
| 116 |
+
(((NONE 0.161905) (H* 0.590476) (!H* 0.0285714) (L+H* 0.171429) (L* 0.047619) (L*+H 0) H*)))))
|
| 117 |
+
((n.syl_break is 0)
|
| 118 |
+
(((NONE 0.551724) (H* 0.293103) (!H* 0) (L+H* 0.155172) (L* 0) (L*+H 0) NONE))
|
| 119 |
+
(((NONE 0.408451) (H* 0.422535) (!H* 0.056338) (L+H* 0.112676) (L* 0) (L*+H 0) H*))))
|
| 120 |
+
((R:SylStructure.parent.R:Word.n.gpos is 0)
|
| 121 |
+
((syl_break is 0)
|
| 122 |
+
(((NONE 0.105263) (H* 0.315789) (!H* 0.157895) (L+H* 0.421053) (L* 0) (L*+H 0) L+H*))
|
| 123 |
+
(((NONE 0.641509) (H* 0.132075) (!H* 0.132075) (L+H* 0.0943396) (L* 0) (L*+H 0) NONE)))
|
| 124 |
+
((syl_break is 1)
|
| 125 |
+
((ssyl_in is 3)
|
| 126 |
+
(((NONE 0.638889) (H* 0.152778) (!H* 0.125) (L+H* 0.0833333) (L* 0) (L*+H 0) NONE))
|
| 127 |
+
((p.syl_break is 0)
|
| 128 |
+
(((NONE 0.551402) (H* 0.186916) (!H* 0.158879) (L+H* 0.0841122) (L* 0.0186916) (L*+H 0) NONE))
|
| 129 |
+
((n.stress is 0)
|
| 130 |
+
((pp.syl_break is 0)
|
| 131 |
+
(((NONE 0.413043) (H* 0.184783) (!H* 0.152174) (L+H* 0.23913) (L* 0.0108696) (L*+H 0) NONE))
|
| 132 |
+
(((NONE 0.2125) (H* 0.3375) (!H* 0.1875) (L+H* 0.2125) (L* 0.05) (L*+H 0) H*)))
|
| 133 |
+
(((NONE 0.449153) (H* 0.245763) (!H* 0.101695) (L+H* 0.20339) (L* 0) (L*+H 0) NONE)))))
|
| 134 |
+
((syl_out is 4)
|
| 135 |
+
((nn.syl_break is 0)
|
| 136 |
+
((pp.syl_break is 0)
|
| 137 |
+
(((NONE 0.45614) (H* 0.210526) (!H* 0.192982) (L+H* 0.140351) (L* 0) (L*+H 0) NONE))
|
| 138 |
+
(((NONE 0.288462) (H* 0.25) (!H* 0.0961538) (L+H* 0.346154) (L* 0) (L*+H 0.0192308) L+H*)))
|
| 139 |
+
(((NONE 0.163934) (H* 0.459016) (!H* 0.131148) (L+H* 0.245902) (L* 0) (L*+H 0) H*)))
|
| 140 |
+
((syl_out is 5)
|
| 141 |
+
((R:SylStructure.parent.R:Word.p.gpos is content)
|
| 142 |
+
(((NONE 0.372881) (H* 0.20339) (!H* 0.169492) (L+H* 0.220339) (L* 0.0338983) (L*+H 0) NONE))
|
| 143 |
+
(((NONE 0.0961538) (H* 0.673077) (!H* 0.115385) (L+H* 0.0961538) (L* 0.0192308) (L*+H 0) H*)))
|
| 144 |
+
((R:SylStructure.parent.R:Word.pp.gpos is in)
|
| 145 |
+
((syl_in is 4)
|
| 146 |
+
(((NONE 0.352113) (H* 0.422535) (!H* 0.15493) (L+H* 0.0704225) (L* 0) (L*+H 0) H*))
|
| 147 |
+
((syl_in is 3)
|
| 148 |
+
(((NONE 0.290323) (H* 0.467742) (!H* 0.0806452) (L+H* 0.145161) (L* 0.016129) (L*+H 0) H*))
|
| 149 |
+
((pp.syl_break is 0)
|
| 150 |
+
(((NONE 0.465517) (H* 0.293103) (!H* 0.172414) (L+H* 0.0689655) (L* 0) (L*+H 0) NONE))
|
| 151 |
+
((R:SylStructure.parent.R:Word.p.gpos is content)
|
| 152 |
+
(((NONE 0.18) (H* 0.36) (!H* 0.28) (L+H* 0.14) (L* 0.04) (L*+H 0) H*))
|
| 153 |
+
(((NONE 0.0877193) (H* 0.22807) (!H* 0.368421) (L+H* 0.298246) (L* 0.0175439) (L*+H 0) !H*))))))
|
| 154 |
+
((ssyl_out is 2)
|
| 155 |
+
((p.syl_break is 0)
|
| 156 |
+
(((NONE 0.634921) (H* 0.174603) (!H* 0.0793651) (L+H* 0.111111) (L* 0) (L*+H 0) NONE))
|
| 157 |
+
((pp.syl_break is 0)
|
| 158 |
+
(((NONE 0.388889) (H* 0.148148) (!H* 0.148148) (L+H* 0.259259) (L* 0.0185185) (L*+H 0.037037) NONE))
|
| 159 |
+
(((NONE 0.294118) (H* 0.137255) (!H* 0.215686) (L+H* 0.333333) (L* 0.0196078) (L*+H 0) L+H*))))
|
| 160 |
+
((R:SylStructure.parent.R:Word.pp.gpos is to)
|
| 161 |
+
(((NONE 0.0877193) (H* 0.350877) (!H* 0.210526) (L+H* 0.315789) (L* 0.0350877) (L*+H 0) H*))
|
| 162 |
+
((syl_break is 3)
|
| 163 |
+
((pp.syl_break is 0)
|
| 164 |
+
(((NONE 0.478261) (H* 0.141304) (!H* 0.195652) (L+H* 0.184783) (L* 0) (L*+H 0) NONE))
|
| 165 |
+
(((NONE 0.217822) (H* 0.366337) (!H* 0.257426) (L+H* 0.128713) (L* 0.029703) (L*+H 0) H*)))
|
| 166 |
+
((syl_in is 7)
|
| 167 |
+
((n.stress is 0)
|
| 168 |
+
((R:SylStructure.parent.R:Word.n.gpos is content)
|
| 169 |
+
(((NONE 0.117647) (H* 0.220588) (!H* 0.441176) (L+H* 0.176471) (L* 0.0441176) (L*+H 0) !H*))
|
| 170 |
+
(((NONE 0.415385) (H* 0.0461538) (!H* 0.2) (L+H* 0.246154) (L* 0.0923077) (L*+H 0) NONE)))
|
| 171 |
+
(((NONE 0.716981) (H* 0.113208) (!H* 0.0943396) (L+H* 0.0754717) (L* 0) (L*+H 0) NONE)))
|
| 172 |
+
((R:SylStructure.parent.R:Word.n.gpos is cc)
|
| 173 |
+
(((NONE 0.292308) (H* 0.184615) (!H* 0.276923) (L+H* 0.246154) (L* 0) (L*+H 0) NONE))
|
| 174 |
+
((nn.syl_break is 3)
|
| 175 |
+
(((NONE 0.2) (H* 0.333333) (!H* 0.283333) (L+H* 0.15) (L* 0.0333333) (L*+H 0) H*))
|
| 176 |
+
((ssyl_in is 4)
|
| 177 |
+
(((NONE 0.383838) (H* 0.151515) (!H* 0.212121) (L+H* 0.20202) (L* 0.050505) (L*+H 0) NONE))
|
| 178 |
+
((p.syl_break is 0)
|
| 179 |
+
((n.syl_break is 1)
|
| 180 |
+
(((NONE 0.526316) (H* 0.210526) (!H* 0.0921053) (L+H* 0.171053) (L* 0) (L*+H 0) NONE))
|
| 181 |
+
((ssyl_in is 3)
|
| 182 |
+
(((NONE 0.509804) (H* 0.0980392) (!H* 0.215686) (L+H* 0.156863) (L* 0.0196078) (L*+H 0) NONE))
|
| 183 |
+
((pp.syl_break is 0)
|
| 184 |
+
(((NONE 0.506667) (H* 0.173333) (!H* 0.106667) (L+H* 0.2) (L* 0.0133333) (L*+H 0) NONE))
|
| 185 |
+
((ssyl_in is 1)
|
| 186 |
+
(((NONE 0.1) (H* 0.4) (!H* 0.266667) (L+H* 0.188889) (L* 0.0444444) (L*+H 0) H*))
|
| 187 |
+
(((NONE 0.326316) (H* 0.210526) (!H* 0.221053) (L+H* 0.189474) (L* 0.0526316) (L*+H 0) NONE))))))
|
| 188 |
+
((R:SylStructure.parent.R:Word.p.gpos is in)
|
| 189 |
+
(((NONE 0.0625) (H* 0.296875) (!H* 0.265625) (L+H* 0.328125) (L* 0.046875) (L*+H 0) L+H*))
|
| 190 |
+
((syl_in is 6)
|
| 191 |
+
(((NONE 0.271739) (H* 0.152174) (!H* 0.358696) (L+H* 0.184783) (L* 0.0326087) (L*+H 0) !H*))
|
| 192 |
+
((syl_out is 2)
|
| 193 |
+
(((NONE 0.111111) (H* 0.361111) (!H* 0.319444) (L+H* 0.138889) (L* 0.0555556) (L*+H 0.0138889) H*))
|
| 194 |
+
((syl_in is 4)
|
| 195 |
+
(((NONE 0.224) (H* 0.152) (!H* 0.328) (L+H* 0.24) (L* 0.056) (L*+H 0) !H*))
|
| 196 |
+
((n.stress is 0)
|
| 197 |
+
((syl_in is 3)
|
| 198 |
+
(((NONE 0.0833333) (H* 0.333333) (!H* 0.233333) (L+H* 0.216667) (L* 0.133333) (L*+H 0) H*))
|
| 199 |
+
(((NONE 0.283465) (H* 0.188976) (!H* 0.23622) (L+H* 0.204724) (L* 0.0708661) (L*+H 0.015748) NONE)))
|
| 200 |
+
(((NONE 0.305263) (H* 0.284211) (!H* 0.210526) (L+H* 0.178947) (L* 0.0210526) (L*+H 0) NONE))))))))))))))))))))))))))))))))))))))))
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
; NON L-L L-H H-L
|
| 204 |
+
; NONE13017 0 0 0 0 0 13017 [13017/13017] 100.000
|
| 205 |
+
; H- 339 81 0 1 1 0 422 [81/422] 19.194
|
| 206 |
+
; L- 223 52 0 5 0 0 280 [0/280] 0.000
|
| 207 |
+
; L-L% 17 0 0 1057 96 0 1170 [1057/1170] 90.342
|
| 208 |
+
; L-H% 16 0 0 457 139 0 612 [139/612] 22.712
|
| 209 |
+
; H-L% 5 0 0 30 4 0 39 [0/39] 0.000
|
| 210 |
+
; 13617 133 0 1550 240 0
|
| 211 |
+
;total 15540 correct 14294.000 91.982%
|
| 212 |
+
(set! f2b_int_tone_cart_tree
|
| 213 |
+
'((lisp_syl_yn_question is 1)
|
| 214 |
+
(((H-H% 1.0) H-H%))
|
| 215 |
+
((R:SylStructure.parent.gpos is cc)
|
| 216 |
+
(((NONE 0.996942) (H- 0.0030581) (L- 0) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 217 |
+
((ssyl_in is 10)
|
| 218 |
+
(((NONE 0.989041) (H- 0.00273973) (L- 0) (L-L% 0.00273973) (L-H% 0.00547945) (H-L% 0) NONE))
|
| 219 |
+
((R:SylStructure.parent.gpos is md)
|
| 220 |
+
(((NONE 0.986014) (H- 0) (L- 0) (L-L% 0.00699301) (L-H% 0.00699301) (H-L% 0) NONE))
|
| 221 |
+
((p.old_syl_break is 4)
|
| 222 |
+
(((NONE 0.99462) (H- 0.00239091) (L- 0.00119546) (L-L% 0) (L-H% 0.00119546) (H-L% 0.000597729) NONE))
|
| 223 |
+
((R:SylStructure.parent.gpos is det)
|
| 224 |
+
(((NONE 0.984635) (H- 0.00512164) (L- 0.00384123) (L-L% 0.00384123) (L-H% 0.00256082) (H-L% 0) NONE))
|
| 225 |
+
((n.old_syl_break is 3)
|
| 226 |
+
(((NONE 0.981848) (H- 0.00495049) (L- 0.00330033) (L-L% 0.00660066) (L-H% 0.00330033) (H-L% 0) NONE))
|
| 227 |
+
((n.old_syl_break is 4)
|
| 228 |
+
(((NONE 0.986982) (H- 0.000591716) (L- 0.0100592) (L-L% 0.00118343) (L-H% 0.00118343) (H-L% 0) NONE))
|
| 229 |
+
((R:SylStructure.parent.gpos is in)
|
| 230 |
+
(((NONE 0.977865) (H- 0.00390625) (L- 0.00390625) (L-L% 0.0078125) (L-H% 0.00651042) (H-L% 0) NONE))
|
| 231 |
+
((old_syl_break is 4)
|
| 232 |
+
((R:SylStructure.parent.R:Word.n.gpos is 0)
|
| 233 |
+
(((NONE 0) (H- 0.00892857) (L- 0) (L-L% 0.982143) (L-H% 0.00892857) (H-L% 0) L-L%))
|
| 234 |
+
((R:SylStructure.parent.R:Word.p.gpos is aux)
|
| 235 |
+
(((NONE 0) (H- 0) (L- 0) (L-L% 0.761905) (L-H% 0.238095) (H-L% 0) L-L%))
|
| 236 |
+
((R:SylStructure.parent.R:Word.n.gpos is det)
|
| 237 |
+
(((NONE 0) (H- 0) (L- 0) (L-L% 0.652542) (L-H% 0.347458) (H-L% 0) L-L%))
|
| 238 |
+
((ssyl_in is 4)
|
| 239 |
+
(((NONE 0) (H- 0) (L- 0) (L-L% 0.682243) (L-H% 0.313084) (H-L% 0.0046729) L-L%))
|
| 240 |
+
((syl_in is 6)
|
| 241 |
+
(((NONE 0) (H- 0) (L- 0.00649351) (L-L% 0.688312) (L-H% 0.298701) (H-L% 0.00649351) L-L%))
|
| 242 |
+
((R:SylStructure.parent.R:Word.n.gpos is aux)
|
| 243 |
+
(((NONE 0) (H- 0) (L- 0) (L-L% 0.464286) (L-H% 0.535714) (H-L% 0) L-H%))
|
| 244 |
+
((syl_in is 5)
|
| 245 |
+
(((NONE 0) (H- 0) (L- 0) (L-L% 0.666667) (L-H% 0.322034) (H-L% 0.0112994) L-L%))
|
| 246 |
+
((sub_phrases is 2)
|
| 247 |
+
(((NONE 0) (H- 0) (L- 0) (L-L% 0.696429) (L-H% 0.267857) (H-L% 0.0357143) L-L%))
|
| 248 |
+
((R:SylStructure.parent.R:Word.p.gpos is det)
|
| 249 |
+
(((NONE 0) (H- 0) (L- 0) (L-L% 0.628866) (L-H% 0.350515) (H-L% 0.0206186) L-L%))
|
| 250 |
+
((sub_phrases is 0)
|
| 251 |
+
((R:SylStructure.parent.R:Word.n.gpos is in)
|
| 252 |
+
((n.old_syl_break is 0)
|
| 253 |
+
(((NONE 0) (H- 0) (L- 0) (L-L% 0.68254) (L-H% 0.31746) (H-L% 0) L-L%))
|
| 254 |
+
(((NONE 0) (H- 0.0147059) (L- 0) (L-L% 0.338235) (L-H% 0.632353) (H-L% 0.0147059) L-H%)))
|
| 255 |
+
((n.stress is 0)
|
| 256 |
+
(((NONE 0) (H- 0) (L- 0.0108303) (L-L% 0.599278) (L-H% 0.32491) (H-L% 0.064982) L-L%))
|
| 257 |
+
(((NONE 0) (H- 0) (L- 0) (L-L% 0.386364) (L-H% 0.579545) (H-L% 0.0340909) L-H%))))
|
| 258 |
+
(((NONE 0) (H- 0) (L- 0.00456621) (L-L% 0.652968) (L-H% 0.324201) (H-L% 0.0182648) L-L%))))))))))))
|
| 259 |
+
((R:SylStructure.parent.gpos is pps)
|
| 260 |
+
(((NONE 0.988764) (H- 0.011236) (L- 0) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 261 |
+
((syl_in is 0)
|
| 262 |
+
(((NONE 0.984848) (H- 0.0126263) (L- 0.00252525) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 263 |
+
((R:SylStructure.parent.gpos is content)
|
| 264 |
+
((R:SylStructure.parent.R:Word.nn.gpos is 0)
|
| 265 |
+
(((NONE 0.967914) (H- 0.0106952) (L- 0.0213904) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 266 |
+
((pp.old_syl_break is 4)
|
| 267 |
+
(((NONE 0.972315) (H- 0.0232558) (L- 0.00442968) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 268 |
+
((syl_in is 1)
|
| 269 |
+
(((NONE 0.951163) (H- 0.0372093) (L- 0.0116279) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 270 |
+
((nn.old_syl_break is 4)
|
| 271 |
+
(((NONE 0.956244) (H- 0.0127621) (L- 0.0291705) (L-L% 0) (L-H% 0) (H-L% 0.00182315) NONE))
|
| 272 |
+
((R:SylStructure.parent.R:Word.nn.gpos is in)
|
| 273 |
+
(((NONE 0.941919) (H- 0.0378788) (L- 0.020202) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 274 |
+
((R:SylStructure.parent.R:Word.p.gpos is cc)
|
| 275 |
+
(((NONE 0.919643) (H- 0.0714286) (L- 0.00892857) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 276 |
+
((nn.old_syl_break is 3)
|
| 277 |
+
(((NONE 0.927273) (H- 0.0472727) (L- 0.0254545) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 278 |
+
((R:SylStructure.parent.R:Word.nn.gpos is cc)
|
| 279 |
+
(((NONE 0.921569) (H- 0.0588235) (L- 0.0196078) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 280 |
+
((ssyl_in is 0)
|
| 281 |
+
(((NONE 0.911591) (H- 0.0825147) (L- 0.00589391) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 282 |
+
((R:SylStructure.parent.R:Word.nn.gpos is to)
|
| 283 |
+
(((NONE 0.912281) (H- 0.0350877) (L- 0.0526316) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 284 |
+
((R:SylStructure.parent.R:Word.pp.gpos is to)
|
| 285 |
+
(((NONE 0.894737) (H- 0.0526316) (L- 0.0526316) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 286 |
+
((R:SylStructure.parent.R:Word.p.gpos is in)
|
| 287 |
+
(((NONE 0.888554) (H- 0.0662651) (L- 0.0451807) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 288 |
+
((R:SylStructure.parent.R:Word.pp.gpos is in)
|
| 289 |
+
(((NONE 0.875817) (H- 0.0718954) (L- 0.0522876) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 290 |
+
((syl_in is 2)
|
| 291 |
+
(((NONE 0.869942) (H- 0.0867052) (L- 0.0433526) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 292 |
+
((R:SylStructure.parent.R:Word.nn.gpos is aux)
|
| 293 |
+
(((NONE 0.854839) (H- 0.0967742) (L- 0.0483871) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 294 |
+
((sub_phrases is 1)
|
| 295 |
+
(((NONE 0.836538) (H- 0.0721154) (L- 0.0913462) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 296 |
+
((R:SylStructure.parent.R:Word.pp.gpos is det)
|
| 297 |
+
(((NONE 0.832402) (H- 0.0949721) (L- 0.0726257) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 298 |
+
((ssyl_in is 4)
|
| 299 |
+
(((NONE 0.793103) (H- 0.103448) (L- 0.103448) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 300 |
+
((n.old_syl_break is 0)
|
| 301 |
+
(((NONE 0.850816) (H- 0.0839161) (L- 0.0652681) (L-L% 0) (L-H% 0) (H-L% 0) NONE))
|
| 302 |
+
((R:SylStructure.parent.R:Word.n.gpos is content)
|
| 303 |
+
(((NONE 0.889447) (H- 0.0753769) (L- 0.0251256) (L-L% 0) (L-H% 0) (H-L% 0.0100503) NONE))
|
| 304 |
+
((old_syl_break is 3)
|
| 305 |
+
(((NONE 0) (H- 0.609023) (L- 0.390977) (L-L% 0) (L-H% 0) (H-L% 0) H-))
|
| 306 |
+
(((NONE 1) (H- 0) (L- 0) (L-L% 0) (L-H% 0) (H-L% 0) NONE)))))))))))))))))))))))
|
| 307 |
+
(((NONE 0.978947) (H- 0.0131579) (L- 0.00789474) (L-L% 0) (L-H% 0) (H-L% 0) NONE)))))))))))))))
|
| 308 |
+
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
+
(defvar tobi_support_yn_questions t
|
| 312 |
+
"tobi_support_yn_questions
|
| 313 |
+
If set a crude final rise will be added at utterance that are judged
|
| 314 |
+
to be yesy/no questions. Namely ending in a ? and not starting with
|
| 315 |
+
a wh-for word.")
|
| 316 |
+
|
| 317 |
+
(define (first_word syl)
|
| 318 |
+
(let ((w (item.relation.parent syl 'SylStructure)))
|
| 319 |
+
(item.relation.first w 'Word)))
|
| 320 |
+
|
| 321 |
+
(define (syl_yn_question syl)
|
| 322 |
+
"(syl_yn_question utt syl)
|
| 323 |
+
Return 1 if this is the last syllable in a yes-no question. Basically
|
| 324 |
+
if it ends in question mark and doesn't start with a wh-woerd. This
|
| 325 |
+
isn't right but it depends on how much you want rising intonation."
|
| 326 |
+
(if (and
|
| 327 |
+
tobi_support_yn_questions
|
| 328 |
+
(member_string (item.feat syl "syl_break") '("4" "3"))
|
| 329 |
+
(not (member_string
|
| 330 |
+
(downcase (item.name (first_word syl)))
|
| 331 |
+
'("how" "why" "which" "who" "what" "where" "when")))
|
| 332 |
+
(string-matches
|
| 333 |
+
(item.feat syl "R:SylStructure.parent.R:Token.parent.punc")
|
| 334 |
+
".*\\?.*"))
|
| 335 |
+
"1"
|
| 336 |
+
"0"))
|
| 337 |
+
|
| 338 |
+
(provide 'tobi)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/tobi_rules.scm
ADDED
|
@@ -0,0 +1,1002 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;; Authors: Robert A. J. Clark and Alan W Black
|
| 34 |
+
;;; Modifications and Checking:
|
| 35 |
+
;;; Gregor Moehler (moehler@ims.uni-stuttgart.de)
|
| 36 |
+
;;; Matthew Stone (mdstone@cs.rutgers.edu)
|
| 37 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 38 |
+
;;;
|
| 39 |
+
;;; Generate F0 points from tobi labels using rules given in:
|
| 40 |
+
;;; Jilka, Moehler & Dogil (forthcomming in Speech Communications)
|
| 41 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 42 |
+
;;;
|
| 43 |
+
;;; *** Converted to new Relation architecture -- but not checked yet -- awb
|
| 44 |
+
;;; -> crude (beta) checking: gm in Dec. 98
|
| 45 |
+
;;;
|
| 46 |
+
;;; -> fixed TAKEOVER bug that used time value
|
| 47 |
+
;;; as pitch target (!) - MDS 1/02
|
| 48 |
+
;;; -> hacked around bunches of target overlap problems - MDS 1/02
|
| 49 |
+
;;; -> added primitive pitch range controls
|
| 50 |
+
;;;
|
| 51 |
+
;;; Known problems and bugs:
|
| 52 |
+
;;; Can't currently use voicing intervals which cross syllable boundaries,
|
| 53 |
+
;;; so pre/post-nuclear tones are currently places 0.2s before/after the
|
| 54 |
+
;;; nuclear tone even if no voicing occurs. Failing this they default a
|
| 55 |
+
;;; percentage of the voicing for that syllable.
|
| 56 |
+
;;;
|
| 57 |
+
;;; Don't know about target points ahead of the current syllable.
|
| 58 |
+
;;; (As you need to know what comes before them to calculate them)
|
| 59 |
+
;;; So: post accent tones are placed 0.2 ahead if following syllable exists
|
| 60 |
+
;;; ends before 0.2 from starred target and is not accented
|
| 61 |
+
;;; The H-target of the H+!H* is 0.2 sec instead of 0.15 sec before
|
| 62 |
+
;;; starred tone.
|
| 63 |
+
;;;
|
| 64 |
+
;;; Multi-utterance input has not been tested.
|
| 65 |
+
;;;
|
| 66 |
+
;;; !H- does not generate any targets
|
| 67 |
+
;;;
|
| 68 |
+
;;; Unfortunaltely some other modules may decide to put pauses in the
|
| 69 |
+
;;; middle of a phrase
|
| 70 |
+
;;;
|
| 71 |
+
;;; valleys are not tested yet
|
| 72 |
+
;;;
|
| 73 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 74 |
+
;;;
|
| 75 |
+
;;; To use this in a voice
|
| 76 |
+
;;; (require 'tobi_rules)
|
| 77 |
+
;;; And in the voice call
|
| 78 |
+
;;; (setup_tobi_f0_method)
|
| 79 |
+
;;; Set the following for your speaker's F0 range
|
| 80 |
+
;;; (Parameter.set 'Default_Topline 146)
|
| 81 |
+
;;; (Parameter.set 'Default_Start_Baseline 61)
|
| 82 |
+
;;; (Parameter.set 'Valley_Dip 75)
|
| 83 |
+
|
| 84 |
+
;; level of debug printout
|
| 85 |
+
(set! printdebug 0)
|
| 86 |
+
|
| 87 |
+
(define (setup_tobi_f0_method)
|
| 88 |
+
"(setup_tobi_f0_method)
|
| 89 |
+
Set up parameters for current voice to use the implementaion
|
| 90 |
+
of ToBI labels to F0 targets by rule."
|
| 91 |
+
(Parameter.set 'Int_Method Intonation_Tree)
|
| 92 |
+
(Parameter.set 'Int_Target_Method Int_Targets_General)
|
| 93 |
+
(set! int_accent_cart_tree no_int_cart_tree) ; NONE always
|
| 94 |
+
(set! int_tone_cart_tree no_int_cart_tree) ; NONE always
|
| 95 |
+
(set! int_general_params
|
| 96 |
+
(list
|
| 97 |
+
(list 'targ_func tobi_f0_targets))) ; we will return a list of f0 targets here
|
| 98 |
+
|
| 99 |
+
(Parameter.set 'Phrase_Method 'cart_tree)
|
| 100 |
+
(set! phrase_cart_tree tobi_label_phrase_cart_tree) ; redefines the phrasebreak tree
|
| 101 |
+
t)
|
| 102 |
+
|
| 103 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 104 |
+
;;;;;;
|
| 105 |
+
;;;;;; Define and set the new f0 rules
|
| 106 |
+
;;;;;;
|
| 107 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 108 |
+
|
| 109 |
+
;;; Set global parameters
|
| 110 |
+
;;; You may want to reset these for different speakers
|
| 111 |
+
|
| 112 |
+
(Parameter.set 'Default_Topline 146) ;146
|
| 113 |
+
(Parameter.set 'Default_Start_Baseline 61) ;61
|
| 114 |
+
(Parameter.set 'Current_Topline (Parameter.get 'Default_Topline))
|
| 115 |
+
(Parameter.set 'Current_Start_Baseline (Parameter.get 'Default_Start_Baseline))
|
| 116 |
+
(Parameter.set 'Current_End_Baseline (Parameter.get 'Current_Start_Baseline))
|
| 117 |
+
(Parameter.set 'Downstep_Factor 0.70)
|
| 118 |
+
(Parameter.set 'Valley_Dip 75)
|
| 119 |
+
;;; function to add target points on a given syllable and fill in
|
| 120 |
+
;;; targets where necessary
|
| 121 |
+
|
| 122 |
+
(define (tobi_f0_targets utt syl)
|
| 123 |
+
"(tobi_f0_targets UTT ITEM)
|
| 124 |
+
Returns a list of targets for the given syllable."
|
| 125 |
+
(if (and (>= printdebug 1)
|
| 126 |
+
(not(equal? 0 (item.feat syl "R:Intonation.daughter1.name"))))
|
| 127 |
+
(format t "### %l (%.2f %.2f) %l ptarg: %l ###\n" (item.name syl)
|
| 128 |
+
(item.feat syl "syllable_start")(item.feat syl "syllable_end")
|
| 129 |
+
(item.feat syl "R:Intonation.daughter1.name") (ttt_last_target_time syl)))
|
| 130 |
+
|
| 131 |
+
;; only continue if there is a Word related to this syllable
|
| 132 |
+
;; I know there always should be, but there might be a bug elsewhere
|
| 133 |
+
(cond
|
| 134 |
+
((not(equal? 0 (item.feat syl "R:SylStructure.parent.name")))
|
| 135 |
+
|
| 136 |
+
; get current label. This assumes that there is only one accent and
|
| 137 |
+
; one endtone on a syllable. Although there can be one of each.
|
| 138 |
+
(let ((voicing (ttt_get_voice_times syl)) ; voicing interval
|
| 139 |
+
(pvoicing (ttt_get_voice_times ; previous voicing
|
| 140 |
+
(item.relation.prev syl 'Syllable)))
|
| 141 |
+
(nvoicing (ttt_get_voice_times ; next voicing
|
| 142 |
+
(item.relation.next syl 'Syllable))))
|
| 143 |
+
|
| 144 |
+
; if first syl of phrase set Phrase_Start and Phrase_End parameters
|
| 145 |
+
; and reset downstep (currently does so on big and little breaks.)
|
| 146 |
+
; only assignes Default values at this stage
|
| 147 |
+
; maybe trained from CART later - first steps now - MDS
|
| 148 |
+
; following Moehler and Mayer, SSW 2001
|
| 149 |
+
(if (eq 0 (item.feat syl 'syl_in)) ;; GM maybe something better needed here?
|
| 150 |
+
(progn
|
| 151 |
+
(Parameter.set 'Phrase_Start (item.feat syl 'R:SylStructure.parent.R:Phrase.last.word_start))
|
| 152 |
+
(Parameter.set 'Phrase_End (item.feat syl 'R:SylStructure.parent.R:Phrase.last.word_end))
|
| 153 |
+
(Parameter.set 'Current_Topline
|
| 154 |
+
(/ (* (wagon syl ttt_topline_tree)
|
| 155 |
+
(Parameter.get 'Default_Topline)) 100))
|
| 156 |
+
(Parameter.set 'Current_Start_Baseline
|
| 157 |
+
(/ (* (wagon syl ttt_baseline_tree)
|
| 158 |
+
(Parameter.get 'Default_Start_Baseline)) 100))
|
| 159 |
+
(Parameter.set 'Current_End_Baseline
|
| 160 |
+
(Parameter.get 'Current_Start_Baseline))
|
| 161 |
+
(if (>= printdebug 3)
|
| 162 |
+
(begin
|
| 163 |
+
(print (format nil "new range: %f %f %f"
|
| 164 |
+
(Parameter.get 'Current_Topline)
|
| 165 |
+
(Parameter.get 'Current_Start_Baseline)
|
| 166 |
+
(Parameter.get 'Current_End_Baseline) )))) ))
|
| 167 |
+
|
| 168 |
+
; do stuff (should go only if there is an accent/boundary?)
|
| 169 |
+
(let ((new_targets
|
| 170 |
+
(ttt_to_targets syl (wagon syl ttt_starttone_tree)
|
| 171 |
+
voicing
|
| 172 |
+
pvoicing
|
| 173 |
+
nvoicing
|
| 174 |
+
'Starttones)))
|
| 175 |
+
|
| 176 |
+
(set! new_targets (append new_targets
|
| 177 |
+
(ttt_to_targets syl (wagon syl ttt_accent_tree)
|
| 178 |
+
voicing
|
| 179 |
+
pvoicing
|
| 180 |
+
nvoicing
|
| 181 |
+
'Accents)))
|
| 182 |
+
|
| 183 |
+
(set! new_targets (append new_targets
|
| 184 |
+
(ttt_to_targets syl (wagon syl ttt_endtone_tree)
|
| 185 |
+
voicing
|
| 186 |
+
pvoicing
|
| 187 |
+
nvoicing
|
| 188 |
+
'Endtones)))
|
| 189 |
+
|
| 190 |
+
(if (and(not(equal? new_targets nil))
|
| 191 |
+
(>= printdebug 2))
|
| 192 |
+
(begin
|
| 193 |
+
(format t ">> Targets: %l\n" new_targets)
|
| 194 |
+
(format t ">> LastTarget: %l\n" (last new_targets))
|
| 195 |
+
))
|
| 196 |
+
|
| 197 |
+
new_targets)))))
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
;;; CART tree to specify no accents
|
| 201 |
+
|
| 202 |
+
(set! no_int_cart_tree
|
| 203 |
+
'
|
| 204 |
+
((NONE)))
|
| 205 |
+
|
| 206 |
+
;;;
|
| 207 |
+
;;; Relate phrasing to boundary tones.
|
| 208 |
+
;;; Added downstepped tones - MDS
|
| 209 |
+
|
| 210 |
+
(set! tobi_label_phrase_cart_tree
|
| 211 |
+
'
|
| 212 |
+
((tone in ("L-" "H-" "!H-"))
|
| 213 |
+
((B))
|
| 214 |
+
((tone in ("H-H%" "H-L%" "!H-L%" "L-L%" "L-H%"))
|
| 215 |
+
((BB))
|
| 216 |
+
((NB)))))
|
| 217 |
+
|
| 218 |
+
;;;
|
| 219 |
+
;;; The other functions
|
| 220 |
+
;;;
|
| 221 |
+
|
| 222 |
+
;;; process a list of relative targets and convert to actual targets
|
| 223 |
+
|
| 224 |
+
(define (ttt_to_targets syl rlist voicing pvoicing nvoicing type)
|
| 225 |
+
"Takes a list of target sets and returns a list of targets."
|
| 226 |
+
(if (or (and (>= printdebug 2)
|
| 227 |
+
rlist (atom (caar rlist))
|
| 228 |
+
(not (equal? 'NONE (caar rlist))) (not (equal? '(NONE) (caar rlist))))
|
| 229 |
+
(>= printdebug 3))
|
| 230 |
+
(begin (print "Entering ttt_to_targets with:")
|
| 231 |
+
(print (format nil "rlist: %l vc: %l pvc: %l nvc: %l type: %s" rlist voicing pvoicing nvoicing type))))
|
| 232 |
+
(cond
|
| 233 |
+
;; nowt
|
| 234 |
+
((eq (length rlist) 0) ())
|
| 235 |
+
;; a single target set
|
| 236 |
+
((atom (car (car rlist)))
|
| 237 |
+
(cond
|
| 238 |
+
((eq type 'Accents)
|
| 239 |
+
(ttt_accent_set_to_targets syl rlist voicing pvoicing nvoicing))
|
| 240 |
+
((eq type 'Starttones)
|
| 241 |
+
(ttt_bound_set_to_targets syl rlist voicing pvoicing))
|
| 242 |
+
((eq type 'Endtones)
|
| 243 |
+
(ttt_bound_set_to_targets syl rlist voicing pvoicing))
|
| 244 |
+
(t (error "unknown target set encountered in ttt_to_targets"))))
|
| 245 |
+
;; list of target sets
|
| 246 |
+
((atom (car (car (car rlist))))
|
| 247 |
+
(append (ttt_to_targets syl (cdr rlist) voicing pvoicing nvoicing type)
|
| 248 |
+
(ttt_to_targets syl (car rlist) voicing pvoicing nvoicing type)))
|
| 249 |
+
;; error
|
| 250 |
+
(t (error "something strange has happened in ttt_to_targets"))))
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
;; process a starttone/endtone target set.
|
| 254 |
+
|
| 255 |
+
(define (ttt_bound_set_to_targets syl tset voicing pvoicing)
|
| 256 |
+
"takes a start/endtone target set and returns a list of target points."
|
| 257 |
+
(if (>= printdebug 3) (begin
|
| 258 |
+
(print "Entering ttt_bound_set_to_targets with:")
|
| 259 |
+
(pprintf (format nil "tset: %l vc: %l pvc: %l" tset voicing pvoicing))))
|
| 260 |
+
(cond
|
| 261 |
+
;; usually target given is NONE. (also ignore unknown!)
|
| 262 |
+
((or (eq (car (car tset)) 'NONE)
|
| 263 |
+
(eq (car (car tset)) 'UNKNOWN))
|
| 264 |
+
nil)
|
| 265 |
+
;; a pair of target pairs
|
| 266 |
+
((eq (length tset) 2)
|
| 267 |
+
(list (ttt_get_target (car tset) voicing)
|
| 268 |
+
(ttt_get_target (car (cdr tset)) voicing)))
|
| 269 |
+
;; single target pair
|
| 270 |
+
((eq (length tset) 1)
|
| 271 |
+
(cond
|
| 272 |
+
;; an actual target pair
|
| 273 |
+
((not (null (cdr (car tset))))
|
| 274 |
+
(list (ttt_get_target (car tset) voicing)))
|
| 275 |
+
;; a TAKEOVER marker
|
| 276 |
+
((eq (car (car tset)) 'TAKEOVER)
|
| 277 |
+
(list (list (ttt_interval_percent voicing 0)
|
| 278 |
+
(ttt_last_target_value syl))))
|
| 279 |
+
(t (error "unknown target pair in ttt_bound_set_to_targets"))))
|
| 280 |
+
(t (error "unknown target set type in ttt_bound_set_to_targets"))))
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
;; process an accent target set.
|
| 284 |
+
|
| 285 |
+
(define (ttt_accent_set_to_targets syl tset voicing pvoicing nvoicing)
|
| 286 |
+
"takes a accent target set and returns a list of target points."
|
| 287 |
+
(if (>= printdebug 3) (begin
|
| 288 |
+
(print "Entering ttt_accent_set_to_targets with:")
|
| 289 |
+
(pprintf (format nil "tset: %l vc: %l pvc: %l nvc: %l" tset voicing pvoicing nvoicing))))
|
| 290 |
+
(cond
|
| 291 |
+
;; single target in set
|
| 292 |
+
((null (cdr tset))
|
| 293 |
+
(cond
|
| 294 |
+
; target given is NONE.
|
| 295 |
+
((or (eq (car (car tset)) 'NONE)
|
| 296 |
+
(eq (car (car tset)) 'UNKNOWN)) nil)
|
| 297 |
+
; V1 marker
|
| 298 |
+
((eq (car (car tset)) 'V1)
|
| 299 |
+
(let ((target_time (+ (/ (- (next_accent_start syl)
|
| 300 |
+
(ttt_last_target_time syl))
|
| 301 |
+
2.0)
|
| 302 |
+
(ttt_last_target_time syl))))
|
| 303 |
+
(list (list target_time (ttt_accent_pitch (Parameter.get 'Valley_Dip) target_time)))))
|
| 304 |
+
; V2 marker
|
| 305 |
+
((eq (car (car tset)) 'V2)
|
| 306 |
+
(let ((target_time (+ (ttt_last_target_time syl) 0.25)))
|
| 307 |
+
(list (list target_time (ttt_accent_pitch (Parameter.get 'Valley_Dip) target_time)))))
|
| 308 |
+
; V3 marker
|
| 309 |
+
((eq (car (car tset)) 'V3)
|
| 310 |
+
(let ((target_time (- (next_accent_start syl) 0.25)))
|
| 311 |
+
(list (list target_time (ttt_accent_pitch (Parameter.get 'Valley_Dip) target_time)))))
|
| 312 |
+
; single target pair
|
| 313 |
+
(t (list (ttt_get_target (car tset) voicing)))))
|
| 314 |
+
;; a pair of targets
|
| 315 |
+
((length tset 2)
|
| 316 |
+
(cond
|
| 317 |
+
;; a *ed tone with PRE type tone (as in L+H*)
|
| 318 |
+
((eq (car (car tset)) 'PRE)
|
| 319 |
+
(let ((star_target (ttt_get_target (car (cdr tset)) voicing))
|
| 320 |
+
(last_target (parse-number(ttt_last_target_time syl))))
|
| 321 |
+
(cond
|
| 322 |
+
; normal 0.2s case (currently doesn't check for voicing)
|
| 323 |
+
((and (eqv? 0 (ip_initial syl))
|
| 324 |
+
(> (- (car star_target) 0.2) last_target))
|
| 325 |
+
(list (list (- (car star_target) 0.2)
|
| 326 |
+
(ttt_accent_pitch (car (cdr (car tset)))
|
| 327 |
+
(- (car star_target) 0.2))) ; the time
|
| 328 |
+
star_target))
|
| 329 |
+
|
| 330 |
+
; 90% prev voiced if not before last target - Added back in MDS,
|
| 331 |
+
; with parse-number added and new check for ip_initial
|
| 332 |
+
((and (eqv? 0 (ip_initial syl))
|
| 333 |
+
(> (parse-number (ttt_interval_percent pvoicing 90))
|
| 334 |
+
(parse-number (ttt_last_target_time syl))))
|
| 335 |
+
(list (list (ttt_interval_percent pvoicing 90)
|
| 336 |
+
(ttt_accent_pitch (car (cdr (car tset)))
|
| 337 |
+
(ttt_interval_percent pvoicing 90)))
|
| 338 |
+
star_target))
|
| 339 |
+
|
| 340 |
+
; otherwise (UNTESTED) [NOTE: Voicing for this syllable only]
|
| 341 |
+
(t
|
| 342 |
+
(list (list (ttt_interval_percent voicing 20)
|
| 343 |
+
(ttt_accent_pitch (car (cdr (car tset)))
|
| 344 |
+
(ttt_interval_percent voicing 20)))
|
| 345 |
+
star_target)))))
|
| 346 |
+
; a *ed tone with POST type tone (as L*+H)
|
| 347 |
+
((eq (car(car(cdr tset))) 'POST)
|
| 348 |
+
(let ((star_target (ttt_get_target (car tset) voicing))
|
| 349 |
+
(next_target nil ) ; interesting problem
|
| 350 |
+
(next_syl (item.next syl)))
|
| 351 |
+
|
| 352 |
+
(cond
|
| 353 |
+
; normal 0.2s case (UNTESTED)
|
| 354 |
+
((and (not (equal? next_syl nil))
|
| 355 |
+
(eq 0 (item.feat next_syl "accented")))
|
| 356 |
+
(cond
|
| 357 |
+
((< (+ (car star_target) 0.2) (item.feat next_syl "syllable_end"))
|
| 358 |
+
(list star_target
|
| 359 |
+
(list (+ (car star_target) 0.2)
|
| 360 |
+
(ttt_accent_pitch (car (cdr (car (cdr tset))))
|
| 361 |
+
(+ (car star_target) 0.2) ))))
|
| 362 |
+
(t
|
| 363 |
+
|
| 364 |
+
(list star_target
|
| 365 |
+
(list (ttt_interval_percent nvoicing 90)
|
| 366 |
+
(ttt_accent_pitch (car (cdr (car (cdr tset))))
|
| 367 |
+
(ttt_interval_percent nvoicing 90) ))))))
|
| 368 |
+
|
| 369 |
+
; 20% next voiced (BUG: Can't do this as the next target hasn't been
|
| 370 |
+
; calculated yet!)
|
| 371 |
+
(nil nil)
|
| 372 |
+
;otherwise (UNTESTED)
|
| 373 |
+
(t (list star_target
|
| 374 |
+
(list (ttt_interval_percent voicing 90)
|
| 375 |
+
(ttt_accent_pitch (car (cdr (car (cdr tset))))
|
| 376 |
+
(ttt_interval_percent voicing 90) )))))))
|
| 377 |
+
|
| 378 |
+
(t
|
| 379 |
+
;; This case didn't use to happen, but now must
|
| 380 |
+
;; to avoid +H's clobbering endtones - MDS's hack.
|
| 381 |
+
(list (ttt_get_target (car tset) voicing)
|
| 382 |
+
(ttt_get_target (cadr tset) voicing)))))
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
;; something else...
|
| 386 |
+
(t (error (format nil "unknown accent set in ttt_accent_set_to_targets: %l" tset)))))
|
| 387 |
+
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
(define (ttt_get_target pair voicing)
|
| 391 |
+
"Returns actual target pair, usually for a stared tone."
|
| 392 |
+
(if (>= printdebug 4) (begin
|
| 393 |
+
(print "Entering ttt_get_target with:")
|
| 394 |
+
(pprintf pair) (pprintf voicing)))
|
| 395 |
+
(list (ttt_interval_percent voicing (car pair))
|
| 396 |
+
(ttt_accent_pitch (car (cdr pair))
|
| 397 |
+
(ttt_interval_percent voicing (car pair)))))
|
| 398 |
+
|
| 399 |
+
(define (ttt_accent_pitch value time)
|
| 400 |
+
"Converts a accent pitch entry to a pitch value."
|
| 401 |
+
(if (>= printdebug 4) (begin
|
| 402 |
+
(print "Entering ttt_accent_pitch with:")
|
| 403 |
+
(pprintf value)))
|
| 404 |
+
(cond
|
| 405 |
+
;; a real value
|
| 406 |
+
((number? value)
|
| 407 |
+
(ttt_interval_percent (list (ttt_get_current_baseline time)
|
| 408 |
+
(Parameter.get 'Current_Topline))
|
| 409 |
+
value))
|
| 410 |
+
;; Downstep then Topline
|
| 411 |
+
((eq value 'DHIGH)
|
| 412 |
+
(progn
|
| 413 |
+
(Parameter.set 'Current_Topline (+ (ttt_get_current_baseline time)
|
| 414 |
+
(* (Parameter.get 'Downstep_Factor)
|
| 415 |
+
(- (Parameter.get 'Current_Topline)
|
| 416 |
+
(ttt_get_current_baseline time)))))
|
| 417 |
+
(ttt_interval_percent (list (ttt_get_current_baseline time)
|
| 418 |
+
(Parameter.get 'Current_Topline))
|
| 419 |
+
100)))
|
| 420 |
+
|
| 421 |
+
;; Unknown
|
| 422 |
+
(t (error "Unknown accent pitch value encountered"))))
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
(define (ttt_get_current_baseline v)
|
| 426 |
+
"Returns the current declined baseline at time v."
|
| 427 |
+
(if (>= printdebug 4) (begin
|
| 428 |
+
(print "Entering ttt_get_current_baseline with:")
|
| 429 |
+
(pprintf v)))
|
| 430 |
+
(let ((h (Parameter.get 'Current_Start_Baseline))
|
| 431 |
+
(l (Parameter.get 'Current_End_Baseline))
|
| 432 |
+
(e (Parameter.get 'Phrase_End))
|
| 433 |
+
(s (Parameter.get 'Phrase_Start)))
|
| 434 |
+
(- h (* (/ (- h l) (- e s)) (- v s)))))
|
| 435 |
+
|
| 436 |
+
;;; find the time n% through an inteval
|
| 437 |
+
|
| 438 |
+
(define (ttt_interval_percent pair percent)
|
| 439 |
+
"Returns the time that is percent percent thought the pair."
|
| 440 |
+
(if (>= printdebug 4) (begin
|
| 441 |
+
(print "Entering ttt_interval_percent with:")
|
| 442 |
+
(pprintf (format nil "%l, %l" pair percent))))
|
| 443 |
+
(cond
|
| 444 |
+
; no pair given: just return nil
|
| 445 |
+
((null pair) nil)
|
| 446 |
+
; otherwise do the calculation
|
| 447 |
+
(t (let ((start (car pair))
|
| 448 |
+
(end (car(cdr pair))))
|
| 449 |
+
(+ start (* (- end start) (/ percent 100)))))))
|
| 450 |
+
|
| 451 |
+
|
| 452 |
+
;;; Getting start and end voicing times in a syllable
|
| 453 |
+
|
| 454 |
+
(define (ttt_get_voice_times syl_item)
|
| 455 |
+
"Returns a pair of start time of first voiced phone in syllable and
|
| 456 |
+
end of last voiced phone in syllable, or nil if syllable is nil"
|
| 457 |
+
(cond
|
| 458 |
+
((null syl_item) nil)
|
| 459 |
+
(t (let ((segs (item.relation.daughters syl_item "SylStructure")))
|
| 460 |
+
(list
|
| 461 |
+
(item.feat (ttt_first_voiced segs) "segment_start")
|
| 462 |
+
(item.feat (ttt_first_voiced (reverse segs)) "end"))))))
|
| 463 |
+
|
| 464 |
+
(define (ttt_first_voiced segs)
|
| 465 |
+
"Returns first segment that is voiced (vowel or voiced consonant)
|
| 466 |
+
returns last segment if all are unvoiced."
|
| 467 |
+
(cond
|
| 468 |
+
((null (cdr segs))
|
| 469 |
+
(car segs)) ;; last possibility
|
| 470 |
+
((equal? "+" (item.feat (car segs) "ph_vc"))
|
| 471 |
+
(car segs))
|
| 472 |
+
((equal? "+" (item.feat (car segs) "ph_cvox"))
|
| 473 |
+
(car segs))
|
| 474 |
+
(t
|
| 475 |
+
(ttt_first_voiced (cdr segs)))))
|
| 476 |
+
|
| 477 |
+
;;; ttt_last_target has bifurcated into
|
| 478 |
+
;;; ttt_last_target_time and
|
| 479 |
+
;;; ttt_last_target_value
|
| 480 |
+
;;; to fix a place where f0 was set to last target time!
|
| 481 |
+
;;; - MDS
|
| 482 |
+
|
| 483 |
+
(define (ttt_last_target_time syl)
|
| 484 |
+
"Returns the end of the most recent previous target
|
| 485 |
+
in the utterance or nil if there is not one present
|
| 486 |
+
"
|
| 487 |
+
(if (>= printdebug 3)
|
| 488 |
+
(begin (print "Entering ttt_last_target_time")
|
| 489 |
+
(print syl))
|
| 490 |
+
)
|
| 491 |
+
(let ((target (ttt_last_target syl)))
|
| 492 |
+
(if (null? target)
|
| 493 |
+
nil
|
| 494 |
+
(item.feat target "R:Target.daughter1.pos"))))
|
| 495 |
+
|
| 496 |
+
(define (ttt_last_target_value syl)
|
| 497 |
+
"Returns the pitch of the most recent previous target
|
| 498 |
+
in the utterance or nil if there is not one present
|
| 499 |
+
"
|
| 500 |
+
(if (>= printdebug 3)
|
| 501 |
+
(begin (print "Entering ttt_last_target_time")
|
| 502 |
+
(print syl))
|
| 503 |
+
)
|
| 504 |
+
(let ((target (ttt_last_target syl)))
|
| 505 |
+
(if (null? target)
|
| 506 |
+
nil
|
| 507 |
+
(item.feat target "R:Target.daughter1.f0"))))
|
| 508 |
+
|
| 509 |
+
;; Changed to scan through segments in the segment relation,
|
| 510 |
+
;; to catch (notional) targets on pauses. - MDS
|
| 511 |
+
;;
|
| 512 |
+
;;; associated segments are:
|
| 513 |
+
;;; - the segments in the word
|
| 514 |
+
;;; - subsequent segments not in the syllable structure
|
| 515 |
+
;;; and on the first word, preceding segments
|
| 516 |
+
;;; not in the syllable structure
|
| 517 |
+
|
| 518 |
+
(define (ttt_collect_following seg accum)
|
| 519 |
+
(if (or (null? seg)
|
| 520 |
+
(not (null? (item.relation seg 'SylStructure))))
|
| 521 |
+
accum
|
| 522 |
+
(ttt_collect_following (item.next seg)
|
| 523 |
+
(cons seg accum))))
|
| 524 |
+
|
| 525 |
+
|
| 526 |
+
(define (ttt_last_target syl)
|
| 527 |
+
"Returns the most recent previous target
|
| 528 |
+
in the utterance or nil if there is not one present
|
| 529 |
+
"
|
| 530 |
+
(if (>= printdebug 3)
|
| 531 |
+
(begin (print "Entering ttt_last_target")
|
| 532 |
+
(print syl))
|
| 533 |
+
)
|
| 534 |
+
(let ((prev_syl (item.relation.prev syl 'Syllable)))
|
| 535 |
+
(cond
|
| 536 |
+
; ((symbol-bound? 'new_targets) (last (caar new_targets)))
|
| 537 |
+
((null prev_syl) nil)
|
| 538 |
+
((ttt_last_target_segs
|
| 539 |
+
(ttt_collect_following
|
| 540 |
+
(item.relation.next
|
| 541 |
+
(item.relation.daughtern prev_syl "SylStructure")
|
| 542 |
+
"Segment")
|
| 543 |
+
(reverse (item.relation.daughters prev_syl "SylStructure")))))
|
| 544 |
+
;list of segments of prev. syllable
|
| 545 |
+
;in reverse order, with pauses
|
| 546 |
+
;prepended.
|
| 547 |
+
(t (ttt_last_target prev_syl)))))
|
| 548 |
+
|
| 549 |
+
(define (ttt_last_target_segs segs)
|
| 550 |
+
"Returns the first target no earlier than seg
|
| 551 |
+
or nil if there is not one
|
| 552 |
+
"
|
| 553 |
+
(if (>= printdebug 4)
|
| 554 |
+
(begin (print "Entering ttt_last_target_segs with:")
|
| 555 |
+
(pprintf (format nil "%l" segs))
|
| 556 |
+
))
|
| 557 |
+
(cond
|
| 558 |
+
((null segs) nil)
|
| 559 |
+
((and (> (parse-number
|
| 560 |
+
(item.feat (car segs) "R:Target.daughter1.f0")) 0)
|
| 561 |
+
(eq 0 (item.feat (car segs) "R:SylStructure.parent.lisp_lh_condition"))
|
| 562 |
+
(eq 0 (item.feat (car segs) "R:SylStructure.parent.lisp_hl_condition"))
|
| 563 |
+
(eq 0 (item.feat (car segs) "R:SylStructure.parent.lisp_valley_condition")))
|
| 564 |
+
(car segs))
|
| 565 |
+
|
| 566 |
+
(t (ttt_last_target_segs (cdr segs)))))
|
| 567 |
+
|
| 568 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 569 |
+
;;;;;;
|
| 570 |
+
;;;;;; CART TREES (ttt - tobi to target)
|
| 571 |
+
;;;;;;
|
| 572 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 573 |
+
|
| 574 |
+
;;;
|
| 575 |
+
;;; Return a list of target lists. A target list comprises of a list
|
| 576 |
+
;;; of related targets (ie for the L and H in L+H*), just to confuse
|
| 577 |
+
;;; matters each target is also a list! (pos pitch)
|
| 578 |
+
;;;
|
| 579 |
+
|
| 580 |
+
|
| 581 |
+
(set! ttt_endtone_tree ; BUG: does it check the current syl for last accent?
|
| 582 |
+
'
|
| 583 |
+
((tobi_endtone is NONE) ; ususally none
|
| 584 |
+
((((NONE))))
|
| 585 |
+
((tobi_endtone is "H-H%") ; H-H%
|
| 586 |
+
((((100 120))))
|
| 587 |
+
((tobi_endtone is "L-L%") ; L-L%
|
| 588 |
+
((((100 -20))))
|
| 589 |
+
((tobi_endtone is "L-H%") ; L-H%
|
| 590 |
+
((lisp_last_accent > 2)
|
| 591 |
+
((lisp_last_accent_type is "L*")
|
| 592 |
+
((((0 25) (100 40)))) ; paper says 80 but AWB had 40
|
| 593 |
+
((((0 0) (100 40)))))
|
| 594 |
+
((lisp_last_accent_type is "L*")
|
| 595 |
+
((((100 40))))
|
| 596 |
+
((((50 0) (100 40))))))
|
| 597 |
+
((tobi_endtone is "H-L%") ; H-L%
|
| 598 |
+
((lisp_last_accent_type is "L*")
|
| 599 |
+
((tobi_accent is"L*")
|
| 600 |
+
((((50 100) (100 100))))
|
| 601 |
+
((((0 100) (100 100)))))
|
| 602 |
+
((((100 100)))))
|
| 603 |
+
((tobi_endtone is "!H-L%") ; !H-L%
|
| 604 |
+
((lisp_last_accent_type is "L*")
|
| 605 |
+
((tobi_accent is"L*")
|
| 606 |
+
((((50 DHIGH) (100 100))))
|
| 607 |
+
((((0 DHIGH) (100 100)))))
|
| 608 |
+
((((100 DHIGH)))))
|
| 609 |
+
((tobi_endtone is "H-")
|
| 610 |
+
((((100 100))))
|
| 611 |
+
((tobi_endtone is "!H-")
|
| 612 |
+
((((100 DHIGH))))
|
| 613 |
+
((tobi_endtone is "L-")
|
| 614 |
+
((((100 0))))
|
| 615 |
+
((((UNKNOWN))))))))))))))
|
| 616 |
+
|
| 617 |
+
(set! ttt_starttone_tree
|
| 618 |
+
'
|
| 619 |
+
((lisp_ip_initial = 1)
|
| 620 |
+
((tobi_endtone is "%H")
|
| 621 |
+
((((0 100))))
|
| 622 |
+
((p.tobi_endtone in ("H-" "!H-" "L-"))
|
| 623 |
+
((((TAKEOVER)))) ; takeover case
|
| 624 |
+
((tobi_accent is NONE)
|
| 625 |
+
((lisp_next_accent > 2) ; default cases (dep. on whether next target is low)
|
| 626 |
+
((lisp_next_accent_type in ("L*" "L*+H" "L*+!H" "L+H*" "L+!H*" "L-" "L-H%" "L-L%"))
|
| 627 |
+
((((0 50)(100 25))))
|
| 628 |
+
((((0 50)(100 75)))))
|
| 629 |
+
((lisp_next_accent_type in ("L*" "L*+H" "L*+!H" "L+H*" "L+!H*" "L-" "L-H%" "L-L%"))
|
| 630 |
+
((((0 30))))
|
| 631 |
+
((((0 70))))))
|
| 632 |
+
((tobi_accent in ("L*" "L*+H" "L*+!H" "L+H*" "L+!H*" "L-" "L-H%" "L-L%"))
|
| 633 |
+
((((0 30))))
|
| 634 |
+
((((0 70))))))))
|
| 635 |
+
((((NONE)))))) ; otherwise (and usually) nothing.
|
| 636 |
+
|
| 637 |
+
;; Redone after Jilka, Moehler and Dogil
|
| 638 |
+
;; - But treating one-syllable-ip's like
|
| 639 |
+
;; last-syllable-of-ip's in cases of
|
| 640 |
+
;; two tone switches per syllable (e.g. H* L-H%).
|
| 641 |
+
;; - And (hack) a 70% target for the initial
|
| 642 |
+
;; H*'s of phrases when the next accent is L+H*
|
| 643 |
+
;; - MDS
|
| 644 |
+
|
| 645 |
+
(set! ttt_accent_tree
|
| 646 |
+
'
|
| 647 |
+
((tobi_accent is "H*" ) ; H*
|
| 648 |
+
((lisp_ip_final = 1)
|
| 649 |
+
((lisp_ip_one_syllable_case = 1)
|
| 650 |
+
((((50 100))))
|
| 651 |
+
((((25 100)))))
|
| 652 |
+
((lisp_hstar_weak_target = 1)
|
| 653 |
+
((((60 70))))
|
| 654 |
+
((lisp_ip_initial = 1)
|
| 655 |
+
((((85 100))))
|
| 656 |
+
((((60 100)))))))
|
| 657 |
+
|
| 658 |
+
((tobi_accent is "!H*" ) ; !H*
|
| 659 |
+
((lisp_ip_final = 1)
|
| 660 |
+
((lisp_ip_one_syllable_case = 1)
|
| 661 |
+
((((50 DHIGH))))
|
| 662 |
+
((((25 DHIGH)))))
|
| 663 |
+
((lisp_ip_initial = 1)
|
| 664 |
+
((((85 DHIGH))))
|
| 665 |
+
((((60 DHIGH))))))
|
| 666 |
+
|
| 667 |
+
((tobi_accent is "L*" ) ; L*
|
| 668 |
+
((lisp_ip_final = 1)
|
| 669 |
+
((lisp_ip_one_syllable_case = 1)
|
| 670 |
+
((((50 0))))
|
| 671 |
+
((((25 0)))))
|
| 672 |
+
((lisp_ip_initial = 1)
|
| 673 |
+
((((85 0))))
|
| 674 |
+
((((60 0))))))
|
| 675 |
+
|
| 676 |
+
((tobi_accent is "L+H*") ; L+H*
|
| 677 |
+
((lisp_ip_final = 1)
|
| 678 |
+
((lisp_ip_one_syllable_case = 1)
|
| 679 |
+
((((PRE 20) (50 100)))) ; JMD estimated 70
|
| 680 |
+
((((PRE 20) (25 100)))))
|
| 681 |
+
((lisp_ip_initial = 1)
|
| 682 |
+
((((PRE 20) (90 100))))
|
| 683 |
+
((((PRE 20) (75 100))))))
|
| 684 |
+
|
| 685 |
+
((tobi_accent is "L+!H*") ; L+!H*
|
| 686 |
+
((lisp_ip_final = 1)
|
| 687 |
+
((lisp_ip_one_syllable_case = 1)
|
| 688 |
+
((((PRE 20) (70 DHIGH))))
|
| 689 |
+
((((PRE 20) (25 DHIGH)))))
|
| 690 |
+
((lisp_ip_initial = 1)
|
| 691 |
+
((((PRE 20) (90 DHIGH))))
|
| 692 |
+
((((PRE 20) (75 DHIGH))))))
|
| 693 |
+
|
| 694 |
+
((tobi_accent is "L*+H") ; L*+H
|
| 695 |
+
((lisp_ip_final = 1)
|
| 696 |
+
((lisp_ip_one_syllable_case = 1)
|
| 697 |
+
((((35 0) (80 100)))) ; POST would clobber endtones
|
| 698 |
+
((((15 0) (40 100))))) ; POST would clobber endtones - MDS
|
| 699 |
+
((lisp_ip_initial = 1)
|
| 700 |
+
((((55 0) (POST 100))))
|
| 701 |
+
((((40 0) (POST 100))))))
|
| 702 |
+
|
| 703 |
+
((tobi_accent is "L*+!H") ; L*+!H
|
| 704 |
+
((lisp_ip_final = 1)
|
| 705 |
+
((lisp_ip_one_syllable_case = 1)
|
| 706 |
+
((((35 0) (80 DHIGH)))) ; POST would clobber endtones - MDS
|
| 707 |
+
((((15 0) (40 DHIGH))))) ; POST would clobber endtones - MDS
|
| 708 |
+
((lisp_ip_initial = 1)
|
| 709 |
+
((((55 0) (POST DHIGH))))
|
| 710 |
+
((((40 0) (POST DHIGH))))))
|
| 711 |
+
|
| 712 |
+
((tobi_accent is "H+!H*") ; H+!H*
|
| 713 |
+
((lisp_ip_final = 1)
|
| 714 |
+
((lisp_ip_one_syllable_case = 1)
|
| 715 |
+
((((PRE 143) (60 DHIGH)))) ; the 143 is a hack to level out the downstep
|
| 716 |
+
((((PRE 143) (20 DHIGH)))))
|
| 717 |
+
((lisp_ip_initial = 1)
|
| 718 |
+
((((PRE 143) (90 DHIGH))))
|
| 719 |
+
((((PRE 143) (60 DHIGH))))))
|
| 720 |
+
|
| 721 |
+
((lisp_lh_condition = 1)
|
| 722 |
+
((((100 75))))
|
| 723 |
+
((lisp_lh_condition = 2)
|
| 724 |
+
((((0 90))))
|
| 725 |
+
((lisp_hl_condition = 1)
|
| 726 |
+
((((100 25))))
|
| 727 |
+
((lisp_valley_condition = 1)
|
| 728 |
+
((((V1 85))))
|
| 729 |
+
((lisp_valley_condition = 2)
|
| 730 |
+
((((V2 70))))
|
| 731 |
+
((lisp_valley_condition = 3)
|
| 732 |
+
((((V3 70))))
|
| 733 |
+
((tobi_accent is NONE) ; usually we find no accent
|
| 734 |
+
((((NONE))))
|
| 735 |
+
((((UNKNOWN)))))))))))))))))))) ; UNKNOWN TARGET FOUND
|
| 736 |
+
|
| 737 |
+
;;; Cart tree to "predict" pitch range
|
| 738 |
+
;;; Right now just accesses a feature
|
| 739 |
+
;;; "register" following Moehler & Mayer 2001.
|
| 740 |
+
;;; Register must be one of
|
| 741 |
+
;;; H - primary high register (default): 133% lowest, 92% highest
|
| 742 |
+
;;; H-H - expanded high register: 134% lowest, 100% highest
|
| 743 |
+
;;; H-L - lowered high register: 128% lowest, 87% highest
|
| 744 |
+
;;; L - primary low register: 100% lowest, 73% highest
|
| 745 |
+
;;; L-L and HL-L - low compressed: 100% lowest, 66% highest
|
| 746 |
+
;;; HL - expanded register: 100% lowest, 84% highest
|
| 747 |
+
;;; HL-H - complete register: 100% lowest, 96% highest
|
| 748 |
+
;;; For their speaker, ,BASELINE was 42% of PEAK
|
| 749 |
+
|
| 750 |
+
(set! ttt_topline_tree
|
| 751 |
+
'
|
| 752 |
+
((R:SylStructure.parent.register is "H")
|
| 753 |
+
(92)
|
| 754 |
+
((R:SylStructure.parent.register is "H-H")
|
| 755 |
+
(100)
|
| 756 |
+
((R:SylStructure.parent.register is "H-L")
|
| 757 |
+
(87)
|
| 758 |
+
((R:SylStructure.parent.register is "L")
|
| 759 |
+
(73)
|
| 760 |
+
((R:SylStructure.parent.register is "L-L")
|
| 761 |
+
(66)
|
| 762 |
+
((R:SylStructure.parent.register is "HL")
|
| 763 |
+
(84)
|
| 764 |
+
((R:SylStructure.parent.register is "HL-H")
|
| 765 |
+
(96)
|
| 766 |
+
(92)))))))))
|
| 767 |
+
|
| 768 |
+
(set! ttt_baseline_tree
|
| 769 |
+
'
|
| 770 |
+
((R:SylStructure.parent.register is "H")
|
| 771 |
+
(133)
|
| 772 |
+
((R:SylStructure.parent.register is "H-H")
|
| 773 |
+
(134)
|
| 774 |
+
((R:SylStructure.parent.register is "H-L")
|
| 775 |
+
(128)
|
| 776 |
+
((R:SylStructure.parent.register is "L")
|
| 777 |
+
(100)
|
| 778 |
+
((R:SylStructure.parent.register is "L-L")
|
| 779 |
+
(100)
|
| 780 |
+
((R:SylStructure.parent.register is "HL")
|
| 781 |
+
(100)
|
| 782 |
+
((R:SylStructure.parent.register is "HL-H")
|
| 783 |
+
(100)
|
| 784 |
+
(133)))))))))
|
| 785 |
+
|
| 786 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 787 |
+
;;;;;;
|
| 788 |
+
;;;;;; Lisp Feature functions.
|
| 789 |
+
;;;;;;
|
| 790 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 791 |
+
|
| 792 |
+
(define (valley_condition syl)
|
| 793 |
+
"(valley_condition syl)
|
| 794 |
+
Function to determine if a lowered target between two high target points
|
| 795 |
+
is needed in this syllable.
|
| 796 |
+
Returns: 0 - no target required
|
| 797 |
+
1 - the single target case
|
| 798 |
+
2 - the first of the two target case
|
| 799 |
+
3 - the second of the two target case
|
| 800 |
+
"
|
| 801 |
+
(if (>= printdebug 4)
|
| 802 |
+
(begin (print "Entering valley_condition")))
|
| 803 |
+
(cond
|
| 804 |
+
((and (eq 0 (item.feat syl 'accented))
|
| 805 |
+
(string-matches (next_accent_type syl)
|
| 806 |
+
"\\(H\\*\\|H\\-\\|H\\-L\\%\\|H\\-H\\%\\|\\!H\\*\\|\\!H\\-\\|\\!H\\-L\\%\\|\\!H\\-H\\%\\)")
|
| 807 |
+
(string-matches (last_accent_type syl)
|
| 808 |
+
"\\(H\\*\\|L\\+H\\*\\|L\\*\\+H\\\\|\\!H\\*\\|L\\+\\!H\\*\\|L\\*\\+\\!H\\)"))
|
| 809 |
+
;GM: excluded %H (returns nil for last target)
|
| 810 |
+
(let ((nas (next_accent_start syl))
|
| 811 |
+
(syls (item.feat syl 'syllable_start))
|
| 812 |
+
(syle (item.feat syl 'syllable_end))
|
| 813 |
+
(las (ttt_last_target_time syl)))
|
| 814 |
+
(if (>= printdebug 3)
|
| 815 |
+
(begin (print (format nil "nas: %l syls: %l syle %l las %l" nas syls syle las))))
|
| 816 |
+
(cond
|
| 817 |
+
((and (< (- nas las) 0.5)
|
| 818 |
+
(> (- nas las) 0.25)
|
| 819 |
+
(< syls (+ (/ (- nas las) 2.0) (ttt_last_target_time syl)))
|
| 820 |
+
(> syle (+ (/ (- nas las) 2.0) (ttt_last_target_time syl)))) 1)
|
| 821 |
+
((and (> (- nas las) 0.5)
|
| 822 |
+
(< syls (+ (ttt_last_target_time syl) 0.25))
|
| 823 |
+
(> syle (+ (ttt_last_target_time syl) 0.25))) 2)
|
| 824 |
+
((and (> (- nas las) 0.5)
|
| 825 |
+
(< syls (- nas 0.25))
|
| 826 |
+
(> syle (- nas 0.25))) 3)
|
| 827 |
+
(t 0))))
|
| 828 |
+
(t 0)))
|
| 829 |
+
|
| 830 |
+
|
| 831 |
+
|
| 832 |
+
(define (lh_condition syl)
|
| 833 |
+
"(lh_condition syl)
|
| 834 |
+
Function to determine the need for extra target points between an L and an H
|
| 835 |
+
Returns: 1 - first extra target required
|
| 836 |
+
2 - second extra target required
|
| 837 |
+
0 - no target required.
|
| 838 |
+
"
|
| 839 |
+
(if (>= printdebug 4)
|
| 840 |
+
(begin (print "Entering LH_condition")))
|
| 841 |
+
(cond
|
| 842 |
+
((and (eq 0 (item.feat syl 'accented))
|
| 843 |
+
(string-matches (last_accent_type syl) "\\(L\\*\\)")
|
| 844 |
+
(string-matches (next_accent_type syl)
|
| 845 |
+
"\\(H\\*\\|H\\-\\|H\\-L\\%\\|H\\-H\\%\\)"))
|
| 846 |
+
(cond
|
| 847 |
+
((and (eq 1 (last_accent syl))
|
| 848 |
+
(< 2 (next_accent syl))) 1)
|
| 849 |
+
((and (< 2 (last_accent syl))
|
| 850 |
+
(eq 1 (next_accent syl))) 2)
|
| 851 |
+
(t 0)))
|
| 852 |
+
(t 0)))
|
| 853 |
+
|
| 854 |
+
(define (hl_condition syl)
|
| 855 |
+
"(lh_condition syl)
|
| 856 |
+
Function to determine the need for extra target points between an H and an L
|
| 857 |
+
Returns: 1 - extra target required
|
| 858 |
+
0 - no target required.
|
| 859 |
+
"
|
| 860 |
+
(if (>= printdebug 4)
|
| 861 |
+
(begin (print "Entering HL_condition")))
|
| 862 |
+
(cond
|
| 863 |
+
((and (eq 0 (item.feat syl 'accented))
|
| 864 |
+
(string-matches (next_accent_type syl)
|
| 865 |
+
"\\(L\\*\\|L\\+H\\*\\|L\\+\\!H\\*\\|L\\*\\+H\\|L\\*\\+!H\\|L\\-\\|L\\-L\\%\\|L-H\\%\\)")
|
| 866 |
+
(string-matches (last_accent_type syl)
|
| 867 |
+
"\\(H\\*\\|L\\+H\\*\\|L\\*\\+H\\\\|\\!H\\*\\|L\\+\\!H\\*\\|L\\*\\+\\!H\\|\\%H\\)")
|
| 868 |
+
;MDS: added !H's
|
| 869 |
+
(eq 1 (last_accent syl))
|
| 870 |
+
|
| 871 |
+
;; fall faster! -MDS
|
| 872 |
+
(<= 2 (next_accent syl))) 1)
|
| 873 |
+
(t 0)))
|
| 874 |
+
|
| 875 |
+
(define (next_accent syl)
|
| 876 |
+
"(next_accent syl)
|
| 877 |
+
Wrapper for c++ func ff_next_accent.
|
| 878 |
+
Returns the number of the syllables to the next accent in the following format.
|
| 879 |
+
0 - no next accent
|
| 880 |
+
1 - next syllable
|
| 881 |
+
2 - next next syllable
|
| 882 |
+
etc..."
|
| 883 |
+
(if (>= printdebug 4)
|
| 884 |
+
(begin (print "Entering next_accent")))
|
| 885 |
+
(cond
|
| 886 |
+
((eq 0 (next_accent_type syl)) 0)
|
| 887 |
+
(t (+ (item.feat syl 'next_accent) 1))))
|
| 888 |
+
|
| 889 |
+
;; Fixed bug that crashed complex phrase tones. - MDS
|
| 890 |
+
;; Not sure how else to get a big number...
|
| 891 |
+
(define infinity (/ 1 0))
|
| 892 |
+
|
| 893 |
+
;; Modified to include current accent as well -MDS
|
| 894 |
+
|
| 895 |
+
(define (last_accent syl)
|
| 896 |
+
"(last_accent syl)
|
| 897 |
+
Wrapper for c++ func ff_last_accent.
|
| 898 |
+
Returns the number of the syllables to the previous accent in the following format.
|
| 899 |
+
0 - accent on current syllable
|
| 900 |
+
1 - prev syllable
|
| 901 |
+
2 - prev to prev syllable
|
| 902 |
+
etc...
|
| 903 |
+
infinity - no previous syllable"
|
| 904 |
+
(if (>= printdebug 4)
|
| 905 |
+
(begin (print "Entering last_accent")))
|
| 906 |
+
(cond
|
| 907 |
+
((not (equal? "NONE" (item.feat syl 'tobi_accent))) 0)
|
| 908 |
+
((equal? 0 (last_accent_type syl)) infinity)
|
| 909 |
+
(t (+ (item.feat syl 'last_accent) 1))))
|
| 910 |
+
|
| 911 |
+
(define (next_accent_type syl)
|
| 912 |
+
"(next_accent_type syl)
|
| 913 |
+
Returns the type of the next accent."
|
| 914 |
+
(cond
|
| 915 |
+
((not (eq 0 (item.feat syl "n.R:Intonation.daughter1.name")))
|
| 916 |
+
(item.feat syl "n.R:Intonation.daughter1.name"))
|
| 917 |
+
((eq 0 (item.feat syl 'syl_out)) 0) ;;GM real ip_final would be better
|
| 918 |
+
(t (next_accent_type (item.relation.next syl 'Syllable)))))
|
| 919 |
+
|
| 920 |
+
(define (last_accent_type syl)
|
| 921 |
+
"(last_accent_type syl)
|
| 922 |
+
Returns the type of the last (previous) accent."
|
| 923 |
+
(if (>= printdebug 4)
|
| 924 |
+
(begin (print "Entering last_accent_type")))
|
| 925 |
+
(cond
|
| 926 |
+
((not (equal? "NONE" (item.feat syl 'p.tobi_endtone)))
|
| 927 |
+
(item.feat syl 'R:Syllable.p.tobi_endtone))
|
| 928 |
+
((not (equal? "NONE" (item.feat syl 'p.tobi_accent)))
|
| 929 |
+
(item.feat syl 'R:Syllable.p.tobi_accent))
|
| 930 |
+
((eq 0 (item.feat syl 'syl_in)) 0) ;;GM real ip_initial would be better
|
| 931 |
+
(t (last_accent_type (item.prev syl 'Syllable)))))
|
| 932 |
+
|
| 933 |
+
(define (next_accent_start syl)
|
| 934 |
+
"(next_accent_start syl)
|
| 935 |
+
Returns the start time of the vowel of next accented syllable"
|
| 936 |
+
(if (>= printdebug 4)
|
| 937 |
+
(begin (print "Entering next_accent_start")))
|
| 938 |
+
(cond
|
| 939 |
+
((not (eq 0 (item.feat syl "n.R:Intonation.daughter1.name")))
|
| 940 |
+
(item.feat syl "R:Syllable.n.syllable_start")) ;;GM vowel start would be better
|
| 941 |
+
((eq 0 (item.feat syl 'syl_out)) 0)
|
| 942 |
+
(t (next_accent_start (item.relation.next syl 'Syllable)))))
|
| 943 |
+
|
| 944 |
+
; new features (not used yet)
|
| 945 |
+
|
| 946 |
+
(define (ip_final syl)
|
| 947 |
+
"(ip_final SYL)
|
| 948 |
+
returns 1 if the syllable is the final syllable of an
|
| 949 |
+
ip (intermediate phrase)"
|
| 950 |
+
(cond
|
| 951 |
+
((or (equal? 0 (item.feat syl "syl_out"))
|
| 952 |
+
(equal? "L-" (item.feat syl "tobi_endtone"))
|
| 953 |
+
(equal? "H-" (item.feat syl "tobi_endtone"))
|
| 954 |
+
(equal? "!H-" (item.feat syl "tobi_endtone"))) 1)
|
| 955 |
+
(t 0)))
|
| 956 |
+
|
| 957 |
+
(define (ip_initial syl)
|
| 958 |
+
"(ip_initial SYL)
|
| 959 |
+
returns 1 if the syllable is the initial syllable of an
|
| 960 |
+
ip (intermediate phrase)"
|
| 961 |
+
(cond
|
| 962 |
+
((equal? 0 (item.feat syl "syl_in"))
|
| 963 |
+
1)
|
| 964 |
+
((equal? 1 (ip_final (item.relation.prev syl 'Syllable)))
|
| 965 |
+
1)
|
| 966 |
+
(t 0)))
|
| 967 |
+
|
| 968 |
+
;; NEXT TWO FUNCTIONS ARE NEW - MDS
|
| 969 |
+
(define (ip_one_syllable_case syl)
|
| 970 |
+
"(ip_one_syllable_case SYL)
|
| 971 |
+
returns true if the syllable is the initial syllable of an
|
| 972 |
+
ip (intermediate phrase) and doesn't itself contain a complex
|
| 973 |
+
tone that starts opposite this syllable's accent"
|
| 974 |
+
(if (eqv? 0 (ip_initial syl))
|
| 975 |
+
0
|
| 976 |
+
(let ((accent (item.feat syl "tobi_accent"))
|
| 977 |
+
(tone (item.feat syl "tobi_endtone")))
|
| 978 |
+
(cond
|
| 979 |
+
((and (equal? tone "L-H%")
|
| 980 |
+
(or (equal? accent "H*")
|
| 981 |
+
(equal? accent "!H*")
|
| 982 |
+
(equal? accent "L+H*")
|
| 983 |
+
(equal? accent "L+!H*")
|
| 984 |
+
(equal? accent "L*+H")
|
| 985 |
+
(equal? accent "L*+!H*")
|
| 986 |
+
(equal? accent "H+!H*")))
|
| 987 |
+
0)
|
| 988 |
+
((and (or (equal? tone "H-L%")
|
| 989 |
+
(equal? tone "!H-L%"))
|
| 990 |
+
(equal? accent "L*"))
|
| 991 |
+
0)
|
| 992 |
+
(t
|
| 993 |
+
1)))))
|
| 994 |
+
|
| 995 |
+
(define (hstar_weak_target syl)
|
| 996 |
+
(if (and (equal? 0 (item.feat syl 'asyl_in))
|
| 997 |
+
(member (next_accent_type syl)
|
| 998 |
+
(list "L*" "L*+H" "L*+!H" "L+H*" "L+!H*")))
|
| 999 |
+
1
|
| 1000 |
+
0))
|
| 1001 |
+
|
| 1002 |
+
(provide 'tobi_rules)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/token.scm
ADDED
|
@@ -0,0 +1,815 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Various tokenizing functions and customization
|
| 35 |
+
|
| 36 |
+
(define (Token utt)
|
| 37 |
+
"(Token UTT)
|
| 38 |
+
Build a Word stream from the Token stream, analyzing compound words
|
| 39 |
+
numbers etc as tokens into words. Respects the Parameter Language
|
| 40 |
+
to choose the appropriate token to word module."
|
| 41 |
+
(let ((rval (apply_method 'Token_Method utt)) ;; might be defined
|
| 42 |
+
(language (Parameter.get 'Language)))
|
| 43 |
+
(cond
|
| 44 |
+
(rval rval) ;; newer style
|
| 45 |
+
((or (string-equal "britishenglish" language)
|
| 46 |
+
(string-equal "english" language)
|
| 47 |
+
(string-equal "americanenglish" language))
|
| 48 |
+
(Token_English utt))
|
| 49 |
+
((string-equal "welsh" language)
|
| 50 |
+
(Token_Welsh utt))
|
| 51 |
+
(t
|
| 52 |
+
(Token_Any utt)))))
|
| 53 |
+
|
| 54 |
+
(define (remove_leadtrail_underscores name)
|
| 55 |
+
"(remove_leadtrail_underscores name)
|
| 56 |
+
Get rid of leading and trailing underscores that may be used for emphasis,
|
| 57 |
+
not this is called when there are underscores at the beginning and end but
|
| 58 |
+
there may not be an equal number of them."
|
| 59 |
+
(let ((se (symbolexplode name)))
|
| 60 |
+
(while (string-equal "_" (car se))
|
| 61 |
+
(set! se (cdr se)))
|
| 62 |
+
(set! se (reverse se))
|
| 63 |
+
(while (string-equal "_" (car se))
|
| 64 |
+
(set! se (cdr se)))
|
| 65 |
+
(apply string-append (reverse se))))
|
| 66 |
+
|
| 67 |
+
(define (english_token_to_words token name)
|
| 68 |
+
"(english_token_to_words TOKEN NAME)
|
| 69 |
+
Returns a list of words for NAME from TOKEN. This allows the
|
| 70 |
+
user to customize various non-local, multi-word, context dependent
|
| 71 |
+
translations of tokens into words. If this function is unset only
|
| 72 |
+
the builtin translation rules are used, if this is set the builtin
|
| 73 |
+
rules are not used unless explicitly called. [see Token to word rules]"
|
| 74 |
+
(cond
|
| 75 |
+
((string-equal (downcase name) "al'ar")
|
| 76 |
+
(list "Allar"))
|
| 77 |
+
((string-equal name "POA")
|
| 78 |
+
(list "P" "O" (list '(name "A") (list 'pos token.letter_pos))))
|
| 79 |
+
((string-equal name "US")
|
| 80 |
+
(list "U" "S"))
|
| 81 |
+
((string-equal name "IT")
|
| 82 |
+
(list (list '(name "I") (list 'pos token.letter_pos)) "T"))
|
| 83 |
+
((string-equal name "AI")
|
| 84 |
+
(list (list '(name "A") (list 'pos token.letter_pos)) "I"))
|
| 85 |
+
((string-equal (downcase name) "openai")
|
| 86 |
+
(list "Open" (list '(name "A") (list 'pos token.letter_pos)) "I"))
|
| 87 |
+
((string-equal name "EB")
|
| 88 |
+
(list "E" "B"))
|
| 89 |
+
((string-equal (downcase name) "ios")
|
| 90 |
+
(list "I" "O" "S"))
|
| 91 |
+
((string-equal (downcase name) "content")
|
| 92 |
+
(if (and (string-equal (downcase (item.feat token "n.name")) "with")
|
| 93 |
+
(string-matches (downcase (item.feat token "p.name")) "be\\|been\\|being\\|is\\(n'?t\\)?\\|are\\(n'?t\\)?\\|was\\(n'?t\\)\\|were\\(n'?t\\)\\|not\\|will\\|won'?t\\|may\\|would\\(n'?t\\)?\\|can\\(not\\)?\\|could\\(n'?t\\)?\\|should\\(n'?t\\)?\\|must\\(n'?t\\)?\\|might"))
|
| 94 |
+
(list (list (list 'name name)(list 'pos 'jj)))
|
| 95 |
+
(list (list (list 'name name)(list 'pos 'nn)))))
|
| 96 |
+
((string-equal name "/") (list ""))
|
| 97 |
+
((and (string-equal (downcase name) "refuse")
|
| 98 |
+
(string-equal (downcase (item.feat token "n.name")) "to"))
|
| 99 |
+
(list (list (list 'name name)(list 'pos 'vb))))
|
| 100 |
+
((string-equal (downcase name) "live")
|
| 101 |
+
(cond
|
| 102 |
+
((or (string-matches (downcase (item.feat token "n.name")) "nation\\|tv\\|news\\|broadcasts?\\|streams?\\|stream\\(ing\\)?\\|shows?\\|events?\\|games?\\|match\\(es\\)?\\|scores?")
|
| 103 |
+
(string-matches (downcase (item.feat token "nn.name")) "games?\\|match\\(es\\)?\\|scores?"))
|
| 104 |
+
(list (list (list 'name name)(list 'pos 'jj))))
|
| 105 |
+
((string-equal (item.feat token "p.name") "I")
|
| 106 |
+
(list (list (list 'name name)(list 'pos 'vb))))
|
| 107 |
+
((not (string-equal (item.feat token "punc") 0))
|
| 108 |
+
(if (string-matches (downcase (item.feat token "p.name")) "to\\|I\\|we\\|you\\|they\\|he\\|she\\|will\\|may\\|would\\|can\\|could\\|should\\|must\\|might")
|
| 109 |
+
(list (list (list 'name name)(list 'pos 'vb)))
|
| 110 |
+
(list (list (list 'name name)(list 'pos 'jj)))))
|
| 111 |
+
(t
|
| 112 |
+
(builtin_english_token_to_words token name))))
|
| 113 |
+
((string-equal name "#") (list "pound"))
|
| 114 |
+
((string-equal name "$") (list "dollar"))
|
| 115 |
+
((string-matches name "[A-Z]*[\\$#][0-9,]+\\(\\.[0-9]+\\)?")
|
| 116 |
+
;; Some form of money (pounds or type of dollars)
|
| 117 |
+
(let (amount type currency)
|
| 118 |
+
(cond
|
| 119 |
+
((string-matches name ".*\\$.*")
|
| 120 |
+
(set! amount (string-after name "$"))
|
| 121 |
+
(set! type (string-before name "$"))
|
| 122 |
+
(set! currency "dollar"))
|
| 123 |
+
((string-matches name ".*#.*")
|
| 124 |
+
(set! amount (string-after name "#"))
|
| 125 |
+
(set! type (string-before name "#"))
|
| 126 |
+
(set! currency "pound"))
|
| 127 |
+
(t
|
| 128 |
+
;; who knows
|
| 129 |
+
(set! amount (string-after name "$"))
|
| 130 |
+
(set! type (string-before name "$"))
|
| 131 |
+
(set! currency "dollar")))
|
| 132 |
+
(cond
|
| 133 |
+
((string-matches (item.feat token "n.name")
|
| 134 |
+
".*illion.?")
|
| 135 |
+
(append ;; "billions and billions" - Sagan
|
| 136 |
+
(builtin_english_token_to_words token amount)
|
| 137 |
+
(list (item.feat token "n.name")) ;; illion
|
| 138 |
+
(token_money_expand type)
|
| 139 |
+
(list (string-append currency "s"))))
|
| 140 |
+
((string-matches amount ".*\\...$")
|
| 141 |
+
(cond
|
| 142 |
+
((string-equal amount "0.00")
|
| 143 |
+
(append
|
| 144 |
+
(list "zero")
|
| 145 |
+
(list (string-append currency "s"))))
|
| 146 |
+
((string-equal amount "0.01")
|
| 147 |
+
(append
|
| 148 |
+
(list "one")
|
| 149 |
+
(if (string-equal currency "dollar")
|
| 150 |
+
(list "cent")
|
| 151 |
+
(list "penny"))))
|
| 152 |
+
((string-matches amount "0\\.[0-9][0-9]")
|
| 153 |
+
(append
|
| 154 |
+
(builtin_english_token_to_words
|
| 155 |
+
token (remove_leading_zeros (string-after amount ".")))
|
| 156 |
+
(if (string-equal currency "dollar")
|
| 157 |
+
(list "cents")
|
| 158 |
+
(list "pence"))))
|
| 159 |
+
(t
|
| 160 |
+
(append
|
| 161 |
+
(builtin_english_token_to_words token (string-before amount "."))
|
| 162 |
+
(token_money_expand type)
|
| 163 |
+
(if (string-matches amount "1\\..*")
|
| 164 |
+
(list currency)
|
| 165 |
+
(list (string-append currency "s")))
|
| 166 |
+
(cond
|
| 167 |
+
((string-matches name ".*\\.00$")
|
| 168 |
+
nil)
|
| 169 |
+
((string-matches name ".*\\.01$")
|
| 170 |
+
(append
|
| 171 |
+
(list "and" "one")
|
| 172 |
+
(if (string-equal currency "dollar")
|
| 173 |
+
(list "cent")
|
| 174 |
+
(list "penny"))))
|
| 175 |
+
(t
|
| 176 |
+
(append
|
| 177 |
+
(list "and")
|
| 178 |
+
(builtin_english_token_to_words
|
| 179 |
+
token (remove_leading_zeros (string-after amount ".")))
|
| 180 |
+
(if (string-equal currency "dollar")
|
| 181 |
+
(list "cents")
|
| 182 |
+
(list "pence")))))))))
|
| 183 |
+
(t
|
| 184 |
+
(append ;; nothing after point or lots after point
|
| 185 |
+
(builtin_english_token_to_words token amount)
|
| 186 |
+
(token_money_expand type)
|
| 187 |
+
(if (or (string-matches amount "1")
|
| 188 |
+
(string-equal currency "yuan"))
|
| 189 |
+
(list currency)
|
| 190 |
+
(list (string-append currency "s"))))))))
|
| 191 |
+
((and (string-matches name ".*illion.?")
|
| 192 |
+
(string-matches (item.feat token "p.name")
|
| 193 |
+
"[A-Z]*[\\$#][0-9,]+\\(\\.[0-9]+\\)?"))
|
| 194 |
+
nil ;; dealt with on the previous symbol
|
| 195 |
+
)
|
| 196 |
+
((string-matches name "[1-9][0-9]*/[1-9][0-9]*")
|
| 197 |
+
(let ((numerator (string-before name "/"))
|
| 198 |
+
(denominator (string-after name "/"))
|
| 199 |
+
)
|
| 200 |
+
(cond
|
| 201 |
+
((string-matches name "1/2")
|
| 202 |
+
(list "half"))
|
| 203 |
+
((string-matches denominator "4")
|
| 204 |
+
(append
|
| 205 |
+
(builtin_english_token_to_words token numerator)
|
| 206 |
+
(list "quarter")
|
| 207 |
+
(if (string-equal numerator "1")
|
| 208 |
+
(list '((name "'s")(pos nnp)))
|
| 209 |
+
nil)))
|
| 210 |
+
(t
|
| 211 |
+
(append
|
| 212 |
+
(builtin_english_token_to_words token numerator)
|
| 213 |
+
(begin
|
| 214 |
+
(item.set_feat token "token_pos" "ordinal")
|
| 215 |
+
(builtin_english_token_to_words token denominator))
|
| 216 |
+
(if (string-equal numerator "1")
|
| 217 |
+
nil
|
| 218 |
+
(list '((name "'s")(pos nnp)))))))))
|
| 219 |
+
((and (string-matches name "No")
|
| 220 |
+
(item.next token)
|
| 221 |
+
(string-matches (item.feat token "n.name")
|
| 222 |
+
"[0-9]+"))
|
| 223 |
+
(list
|
| 224 |
+
"number"))
|
| 225 |
+
((string-matches name ".*%$")
|
| 226 |
+
(append
|
| 227 |
+
(token_to_words token (string-before name "%"))
|
| 228 |
+
(list "percent")))
|
| 229 |
+
((string-matches name "[0-9]+s") ;; e.g. 1950s
|
| 230 |
+
(item.set_feat token "token_pos" "year") ;; reasonable guess
|
| 231 |
+
(append
|
| 232 |
+
(builtin_english_token_to_words token (string-before name "s"))
|
| 233 |
+
(list '((name "'s")(pos nnp))) ;; will get assimilated by postlexical rules
|
| 234 |
+
))
|
| 235 |
+
((string-matches name "[0-9]+'s") ;; e.g. 1950's
|
| 236 |
+
(item.set_feat token "token_pos" "year") ;; reasonable guess
|
| 237 |
+
(append
|
| 238 |
+
(builtin_english_token_to_words token (string-before name "'s"))
|
| 239 |
+
(list '((name "'s")(pos nnp))) ;; will get assimilated by postlexical rules
|
| 240 |
+
))
|
| 241 |
+
((and (string-matches name ".*s$")
|
| 242 |
+
(string-equal (item.feat token "punc") "'"))
|
| 243 |
+
;; potential possessive or may be end of a quote
|
| 244 |
+
(if (token_no_starting_quote token)
|
| 245 |
+
(item.set_feat token "punc" ""))
|
| 246 |
+
(builtin_english_token_to_words token name))
|
| 247 |
+
((and (string-equal name "A") ;; letter or determiner
|
| 248 |
+
(or (and (string-matches (item.feat token "p.name") "[A-Za-z].*")
|
| 249 |
+
(string-equal (item.feat token "p.punc") 0))
|
| 250 |
+
;; (or (string-equal (item.feat token "p.punc") 0)
|
| 251 |
+
;; (not string-matches (item.feat token "p.punc") ".*[?!:\\.;].*")))
|
| 252 |
+
(string-matches (item.feat token "n.name") "[A-Z].*")))
|
| 253 |
+
(list (list '(name "a")(list 'pos token.letter_pos))))
|
| 254 |
+
((and (or (string-equal name "A") (string-equal name "a")) ;; letter or determiner
|
| 255 |
+
(and (string-equal (item.feat token "p.name") 0)
|
| 256 |
+
(string-equal (item.feat token "n.name") 0)))
|
| 257 |
+
(list (list '(name "a")(list 'pos token.letter_pos))))
|
| 258 |
+
((member_string name english_homographs)
|
| 259 |
+
(list (list (list 'name name)
|
| 260 |
+
(list 'hg_pos (item.feat token "token_pos")))))
|
| 261 |
+
((string-matches name "__*[^_][^_]*_*_") ;; _emphasis_
|
| 262 |
+
(english_token_to_words
|
| 263 |
+
token
|
| 264 |
+
(remove_leadtrail_underscores name)
|
| 265 |
+
))
|
| 266 |
+
((string-matches name "\\(EW\\|NS\\|NE\\|CC\\|DT\\|TE\\|JS\\|JW\\|JE\\|CR\\|CP\\)[1-4][0-9]") ;; Singapore MRT station IDs
|
| 267 |
+
(let (lname num)
|
| 268 |
+
(cond
|
| 269 |
+
((string-matches name "EW.*")
|
| 270 |
+
(set! lname "EW"))
|
| 271 |
+
((string-matches name "NS.*")
|
| 272 |
+
(set! lname "NS"))
|
| 273 |
+
((string-matches name "NE.*")
|
| 274 |
+
(set! lname "NE"))
|
| 275 |
+
((string-matches name "CC.*")
|
| 276 |
+
(set! lname "CC"))
|
| 277 |
+
((string-matches name "DT.*")
|
| 278 |
+
(set! lname "DT"))
|
| 279 |
+
((string-matches name "TE.*")
|
| 280 |
+
(set! lname "TE"))
|
| 281 |
+
((string-matches name "JS.*")
|
| 282 |
+
(set! lname "JS"))
|
| 283 |
+
((string-matches name "JW.*")
|
| 284 |
+
(set! lname "JW"))
|
| 285 |
+
((string-matches name "JE.*")
|
| 286 |
+
(set! lname "JE"))
|
| 287 |
+
((string-matches name "CR.*")
|
| 288 |
+
(set! lname "CR"))
|
| 289 |
+
(t
|
| 290 |
+
(set! lname "CP")))
|
| 291 |
+
(set! num (string-after name lname))
|
| 292 |
+
(append
|
| 293 |
+
(symbolexplode lname)
|
| 294 |
+
(builtin_english_token_to_words token num))))
|
| 295 |
+
((string-matches name "\\([1-9]\\|10\\|11\\|12\\)[AaPp][Mm]") ;; time
|
| 296 |
+
(let (hour apm (atime (downcase name)))
|
| 297 |
+
(if (string-matches atime ".*am")
|
| 298 |
+
(set! apm "am")
|
| 299 |
+
(set! apm "pm"))
|
| 300 |
+
(set! hour (string-before atime apm))
|
| 301 |
+
(append
|
| 302 |
+
(builtin_english_token_to_words token hour)
|
| 303 |
+
(if (string-equal apm "am")
|
| 304 |
+
(builtin_english_token_to_words token "A.M")
|
| 305 |
+
(builtin_english_token_to_words token "P.M")))))
|
| 306 |
+
((string-matches name "[0-9]?[0-9][:\\.][0-9][0-9][AaPp][Mm]") ;; time
|
| 307 |
+
;; must be am/pm present for . to be acceptable separator
|
| 308 |
+
(let (hours mins half sep (ttime (downcase name)))
|
| 309 |
+
(if (string-matches ttime ".*:.*")
|
| 310 |
+
(set! sep ":")
|
| 311 |
+
(set! sep "."))
|
| 312 |
+
(set! hours (string-before ttime sep))
|
| 313 |
+
(set! mins (string-after ttime sep))
|
| 314 |
+
(if (string-matches ttime ".*am")
|
| 315 |
+
(set! sep "am")
|
| 316 |
+
(set! sep "pm"))
|
| 317 |
+
(set! mins (string-before mins sep))
|
| 318 |
+
(append
|
| 319 |
+
(builtin_english_token_to_words token hours)
|
| 320 |
+
(cond
|
| 321 |
+
((string-equal mins "00")
|
| 322 |
+
nil)
|
| 323 |
+
((string-matches mins "0.")
|
| 324 |
+
(cons
|
| 325 |
+
"oh"
|
| 326 |
+
(builtin_english_token_to_words token (string-after mins "0"))))
|
| 327 |
+
(t
|
| 328 |
+
(builtin_english_token_to_words token mins)))
|
| 329 |
+
(if (string-equal sep "am")
|
| 330 |
+
(builtin_english_token_to_words token "A.M")
|
| 331 |
+
(builtin_english_token_to_words token "P.M")))))
|
| 332 |
+
((string-matches name "[0-9]?[0-9]:[0-9][0-9]") ;; time
|
| 333 |
+
(append
|
| 334 |
+
(builtin_english_token_to_words
|
| 335 |
+
token (remove_leading_zeros (string-before name ":")))
|
| 336 |
+
(cond
|
| 337 |
+
((string-equal "00" (string-after name ":"))
|
| 338 |
+
nil)
|
| 339 |
+
((string-matches (string-after name ":") "0.")
|
| 340 |
+
(cons
|
| 341 |
+
"oh"
|
| 342 |
+
(builtin_english_token_to_words
|
| 343 |
+
token
|
| 344 |
+
(remove_leading_zeros (string-after name ":")))))
|
| 345 |
+
(t
|
| 346 |
+
(builtin_english_token_to_words
|
| 347 |
+
token
|
| 348 |
+
(string-after name ":"))))))
|
| 349 |
+
((string-matches name "[0-9][0-9]:[0-9][0-9]:[0-9][0-9]") ;; exact time
|
| 350 |
+
(append
|
| 351 |
+
(builtin_english_token_to_words
|
| 352 |
+
token (remove_leading_zeros (string-before name ":")))
|
| 353 |
+
(list "hours")
|
| 354 |
+
(builtin_english_token_to_words
|
| 355 |
+
token (remove_leading_zeros
|
| 356 |
+
(string-before (string-after name ":") ":")))
|
| 357 |
+
(list "minutes" "and")
|
| 358 |
+
(builtin_english_token_to_words
|
| 359 |
+
token (remove_leading_zeros
|
| 360 |
+
(string-after (string-after name ":") ":")))
|
| 361 |
+
(list "seconds")))
|
| 362 |
+
((string-matches name "[12][0-9][0-9][0-9]/\\(1[012]\\|0[1-9]\\)/\\(0[1-9]\\|[12][0-9]\\|3[01]\\)")
|
| 363 |
+
(let ((year (string-before name "/"))
|
| 364 |
+
(num1 (string-before (string-after name "/") "/"))
|
| 365 |
+
(num2 (string-after (string-after name "/") "/"))
|
| 366 |
+
month day)
|
| 367 |
+
(cond
|
| 368 |
+
((string-equal num1 "01")
|
| 369 |
+
(set! month "january"))
|
| 370 |
+
((string-equal num1 "02")
|
| 371 |
+
(set! month "february"))
|
| 372 |
+
((string-equal num1 "03")
|
| 373 |
+
(set! month "march"))
|
| 374 |
+
((string-equal num1 "04")
|
| 375 |
+
(set! month "april"))
|
| 376 |
+
((string-equal num1 "05")
|
| 377 |
+
(set! month "may"))
|
| 378 |
+
((string-equal num1 "06")
|
| 379 |
+
(set! month "june"))
|
| 380 |
+
((string-equal num1 "07")
|
| 381 |
+
(set! month "july"))
|
| 382 |
+
((string-equal num1 "08")
|
| 383 |
+
(set! month "august"))
|
| 384 |
+
((string-equal num1 "09")
|
| 385 |
+
(set! month "september"))
|
| 386 |
+
((string-equal num1 "10")
|
| 387 |
+
(set! month "october"))
|
| 388 |
+
((string-equal num1 "11")
|
| 389 |
+
(set! month "november"))
|
| 390 |
+
((string-equal num1 "12")
|
| 391 |
+
(set! month "december"))
|
| 392 |
+
(t
|
| 393 |
+
(set! month (tok_string_as_letters num1))))
|
| 394 |
+
(item.set_feat token "token_pos" "ordinal")
|
| 395 |
+
(set! day (builtin_english_token_to_words token num2))
|
| 396 |
+
(item.set_feat token "token_pos" "year")
|
| 397 |
+
(append
|
| 398 |
+
(list month)
|
| 399 |
+
(list "the")
|
| 400 |
+
day
|
| 401 |
+
(builtin_english_token_to_words token year))))
|
| 402 |
+
((string-matches name "[0-9][0-9]?/[0-9][0-9]?/[0-9][0-9]\\([0-9][0-9]\\)?")
|
| 403 |
+
;; date, say it as numbers to avoid American/British problem
|
| 404 |
+
(let ((num1 (string-before name "/"))
|
| 405 |
+
(num2 (string-before (string-after name "/") "/"))
|
| 406 |
+
(year (string-after (string-after name "/") "/"))
|
| 407 |
+
day month)
|
| 408 |
+
(item.set_feat token "token_pos" "cardinal")
|
| 409 |
+
(set! day (builtin_english_token_to_words token num1))
|
| 410 |
+
(set! month (builtin_english_token_to_words token num2))
|
| 411 |
+
(item.set_feat token "token_pos" "year")
|
| 412 |
+
(append
|
| 413 |
+
day
|
| 414 |
+
month
|
| 415 |
+
(list '((name ",")(pbreak_scale 0.9)))
|
| 416 |
+
(builtin_english_token_to_words token year))))
|
| 417 |
+
((string-matches name "[0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]")
|
| 418 |
+
(item.set_feat token "token_pos" "digits") ;; canonical phone number
|
| 419 |
+
(append
|
| 420 |
+
(builtin_english_token_to_words token (string-before name "-"))
|
| 421 |
+
(list '((name ",")(pbreak_scale 1.0)))
|
| 422 |
+
(builtin_english_token_to_words token (string-after name "-"))))
|
| 423 |
+
((string-matches name "[0-9]+-[0-9]+-[-0-9]+")
|
| 424 |
+
;; long distance number
|
| 425 |
+
(let ((r '(dummy)) (remainder name))
|
| 426 |
+
(item.set_feat token "token_pos" "digits")
|
| 427 |
+
(while (> (length remainder) 0)
|
| 428 |
+
(if (string-matches remainder "[0-9]+")
|
| 429 |
+
(set! r (append r
|
| 430 |
+
(builtin_english_token_to_words
|
| 431 |
+
token remainder)))
|
| 432 |
+
(set! r (append r
|
| 433 |
+
(builtin_english_token_to_words
|
| 434 |
+
token (string-before remainder "-")))))
|
| 435 |
+
(set! remainder (string-after remainder "-"))
|
| 436 |
+
(if (> (length remainder) 0)
|
| 437 |
+
(set! r (append r (list '((name ",")(pbreak_scale 1.0)))))))
|
| 438 |
+
(cdr r))
|
| 439 |
+
)
|
| 440 |
+
((and (string-matches name "[0-9][0-9][0-9]")
|
| 441 |
+
(string-matches (item.feat token "n.name")
|
| 442 |
+
"[0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]"))
|
| 443 |
+
(item.set_feat token "token_pos" "digits")
|
| 444 |
+
(builtin_english_token_to_words token name))
|
| 445 |
+
((string-matches name "[0-9]+-[0-9]+")
|
| 446 |
+
(let ((tokpos))
|
| 447 |
+
(item.set_name token (string-before name "-"))
|
| 448 |
+
(set! tokpos (wagon token
|
| 449 |
+
(car (cdr (assoc "[0-9]+" token_pos_cart_trees)))))
|
| 450 |
+
(item.set_feat token "token_pos" (car tokpos))
|
| 451 |
+
(append
|
| 452 |
+
(builtin_english_token_to_words token (string-before name "-"))
|
| 453 |
+
(list "to")
|
| 454 |
+
(builtin_english_token_to_words token (string-after name "-")))))
|
| 455 |
+
((string-matches name "\\(iii?\\|II?I?\\|IV\\|VI?I?I?\\|IX\\|X[VIX]*\\)")
|
| 456 |
+
;; Roman numerals
|
| 457 |
+
(let ((tp (item.feat token "token_pos")))
|
| 458 |
+
(cond
|
| 459 |
+
((string-matches tp "century");; always believe this
|
| 460 |
+
(item.set_feat token "token_pos" "ordinal")
|
| 461 |
+
(if (or (string-equal "1" (tok_rex token))
|
| 462 |
+
(item.feat token "p.lisp_tok_rex_names"))
|
| 463 |
+
(append
|
| 464 |
+
(list "the")
|
| 465 |
+
(builtin_english_token_to_words
|
| 466 |
+
token (tok_roman_to_numstring name)))
|
| 467 |
+
(builtin_english_token_to_words
|
| 468 |
+
token (tok_roman_to_numstring name))))
|
| 469 |
+
((string-matches name "[IVX]");; be *very* wary of this one
|
| 470 |
+
(if (and (string-equal
|
| 471 |
+
"1" (item.feat token "p.lisp_tok_section_name"))
|
| 472 |
+
(string-matches tp "number")
|
| 473 |
+
(string-equal (item.feat token "p.punc") 0))
|
| 474 |
+
(builtin_english_token_to_words
|
| 475 |
+
token (tok_roman_to_numstring name))
|
| 476 |
+
(tok_string_as_letters name)))
|
| 477 |
+
((or (string-matches tp "number")
|
| 478 |
+
(string-matches name "iii?\\|III?"))
|
| 479 |
+
(item.set_feat token "token_pos" "cardinal")
|
| 480 |
+
(builtin_english_token_to_words
|
| 481 |
+
token (tok_roman_to_numstring name)))
|
| 482 |
+
(t;; else its a letter
|
| 483 |
+
(tok_string_as_letters name)))))
|
| 484 |
+
((and (string-matches name "pp")
|
| 485 |
+
(string-matches (item.feat token "n.name")
|
| 486 |
+
"[0-9]+-[0-9]+"))
|
| 487 |
+
(list "pages"))
|
| 488 |
+
((and (string-matches name "ss")
|
| 489 |
+
(string-matches (item.feat token "n.name")
|
| 490 |
+
"[0-9]+-[0-9]+"))
|
| 491 |
+
(list "sections"))
|
| 492 |
+
((string-matches name "_____+")
|
| 493 |
+
(list "line" "of" "underscores"))
|
| 494 |
+
((string-matches name "=====+")
|
| 495 |
+
(list "line" "of" "equals"))
|
| 496 |
+
((string-matches name "-----+")
|
| 497 |
+
(list "line" "of" "hyphens"))
|
| 498 |
+
((string-matches name "\\*\\*\\*\\*\\*+")
|
| 499 |
+
(list "line" "of" "asterisks"))
|
| 500 |
+
((string-matches name "--+")
|
| 501 |
+
(list '((name ",")(pbreak_scale 1.0))))
|
| 502 |
+
((string-matches name ".*--+.*")
|
| 503 |
+
(append
|
| 504 |
+
(builtin_english_token_to_words token (string-before name "--"))
|
| 505 |
+
(list '((name ",")(pbreak_scale 1.0)))
|
| 506 |
+
(builtin_english_token_to_words token (string-after name "--"))))
|
| 507 |
+
((string-matches name "[A-Z][A-Z]?&[A-Z][A-Z]?")
|
| 508 |
+
(append
|
| 509 |
+
(tok_string_as_letters (string-before name "&"))
|
| 510 |
+
(list "and")
|
| 511 |
+
(tok_string_as_letters (string-after name "&"))))
|
| 512 |
+
((and (string-equal name "Ms")
|
| 513 |
+
(string-matches (item.feat token "n.name") "[A-Z][^A-Z]*"))
|
| 514 |
+
(list "mizz"))
|
| 515 |
+
((string-matches name "[A-Z]+'s")
|
| 516 |
+
(append
|
| 517 |
+
(builtin_english_token_to_words token (string-before name "'s"))
|
| 518 |
+
(list '((name "'s")(pos nnp))) ;; will get assimilated by postlexical rules
|
| 519 |
+
))
|
| 520 |
+
((or (string-matches name "[A-Z][A-Z]+s")
|
| 521 |
+
(string-matches name "[BCDEFGHJKLMNOPQRSTVWXYZ]+s"))
|
| 522 |
+
(append
|
| 523 |
+
(builtin_english_token_to_words token (string-before name "s"))
|
| 524 |
+
(list '((name "'s")(pos nnp))) ;; will get assimilated by postlexical rules
|
| 525 |
+
))
|
| 526 |
+
((string-matches name "<.*@.*>") ;; quoted e-mail
|
| 527 |
+
(append
|
| 528 |
+
(builtin_english_token_to_words
|
| 529 |
+
token (string-after (string-before name "@") "<"))
|
| 530 |
+
(list "at")
|
| 531 |
+
(builtin_english_token_to_words
|
| 532 |
+
token (string-before (string-after name "@") ">"))))
|
| 533 |
+
((string-matches name "\\(Dr\\|St\\)")
|
| 534 |
+
(if (string-equal (item.feat token "token_pos") "street")
|
| 535 |
+
(if (string-matches name "Dr")
|
| 536 |
+
(list "drive")
|
| 537 |
+
(list "street"))
|
| 538 |
+
(if (string-matches name "Dr") ;; default on title side
|
| 539 |
+
(list "doctor")
|
| 540 |
+
(list "saint"))))
|
| 541 |
+
((string-matches name "[Cc]alif") ;; hopelessly specific ...
|
| 542 |
+
(list
|
| 543 |
+
"california"))
|
| 544 |
+
(t
|
| 545 |
+
(builtin_english_token_to_words token name))))
|
| 546 |
+
|
| 547 |
+
;;; This is set as the default
|
| 548 |
+
(defvar token_to_words english_token_to_words)
|
| 549 |
+
|
| 550 |
+
(defvar token.punctuation "\"'`.,:;!?(){}[]"
|
| 551 |
+
"token.punctuation
|
| 552 |
+
A string of characters which are to be treated as punctuation when
|
| 553 |
+
tokenizing text. Punctuation symbols will be removed from the text
|
| 554 |
+
of the token and made available through the \"punctuation\" feature.
|
| 555 |
+
[see Tokenizing]")
|
| 556 |
+
(defvar token.prepunctuation "\"'`({["
|
| 557 |
+
"token.prepunctuation
|
| 558 |
+
A string of characters which are to be treated as preceding punctuation
|
| 559 |
+
when tokenizing text. Prepunctuation symbols will be removed from the text
|
| 560 |
+
of the token and made available through the \"prepunctuation\" feature.
|
| 561 |
+
[see Tokenizing]")
|
| 562 |
+
(defvar token.whitespace " \t\n\r "
|
| 563 |
+
"token.whitespace
|
| 564 |
+
A string of characters which are to be treated as whitespace when
|
| 565 |
+
tokenizing text. Whitespace is treated as a separator and removed
|
| 566 |
+
from the text of a token and made available through the \"whitespace\"
|
| 567 |
+
feature. [see Tokenizing]")
|
| 568 |
+
(defvar token.singlecharsymbols ""
|
| 569 |
+
"token.singlecharsymbols
|
| 570 |
+
Characters which have always to be split as tokens. This would be
|
| 571 |
+
usual is standard text, but is useful in parsing some types of
|
| 572 |
+
file. [see Tokenizing]")
|
| 573 |
+
|
| 574 |
+
(defvar token.letter_pos 'nn
|
| 575 |
+
"token.letter_pos
|
| 576 |
+
The part of speech tag (valid for your part of speech tagger) for
|
| 577 |
+
individual letters. When the tokenizer decide to pronounce a token
|
| 578 |
+
as a list of letters this tag is added to each letter in the list.
|
| 579 |
+
Note this should be from the part of speech set used in your tagger
|
| 580 |
+
which may not be the same one that appears in the actual lexical
|
| 581 |
+
entry (if you map them afterwards). This specifically allows \"a\"
|
| 582 |
+
to come out as ae rather than @.")
|
| 583 |
+
|
| 584 |
+
(defvar token.unknown_word_name "unknown"
|
| 585 |
+
"token.unknown_word_name
|
| 586 |
+
When all else fails and a pronunciation for a word or character can't
|
| 587 |
+
be found this word will be said instead. If you make this \"\" them
|
| 588 |
+
the unknown word will simple be omitted. This will only
|
| 589 |
+
really be called when there is a bug in the lexicon and characters
|
| 590 |
+
are missing from the lexicon. Note this word should be in the lexicon.")
|
| 591 |
+
|
| 592 |
+
(def_feature_docstring
|
| 593 |
+
'Token.punc
|
| 594 |
+
"Token.punc
|
| 595 |
+
Succeeding punctuation symbol found after token in original
|
| 596 |
+
string/file.")
|
| 597 |
+
(def_feature_docstring
|
| 598 |
+
'Token.whitespace
|
| 599 |
+
"Token.whitespace
|
| 600 |
+
Whitespace found before token in original string/file.")
|
| 601 |
+
(def_feature_docstring
|
| 602 |
+
'Token.prepunctuation
|
| 603 |
+
"Token.prepunctuation
|
| 604 |
+
Preceeding puctuation symbol found before token in original string/file.")
|
| 605 |
+
|
| 606 |
+
(require 'tokenpos)
|
| 607 |
+
;;;
|
| 608 |
+
;;; Token pos are gross level part of speech tags which help decide
|
| 609 |
+
;;; pronunciation of tokens (particular expansion of Tokens into words)
|
| 610 |
+
;;; The most obvious example is identifying number types (ordinals,
|
| 611 |
+
;;; years, digits or numbers).
|
| 612 |
+
;;;
|
| 613 |
+
(defvar english_token_pos_cart_trees
|
| 614 |
+
'(
|
| 615 |
+
;; Format is (Regex Tree)
|
| 616 |
+
("[0-9]+"
|
| 617 |
+
((lisp_num_digits < 3.8)
|
| 618 |
+
((p.lisp_token_pos_guess is month)
|
| 619 |
+
((lisp_month_range is 0) ((cardinal)) ((ordinal)))
|
| 620 |
+
((n.lisp_token_pos_guess is month)
|
| 621 |
+
((lisp_month_range is 0) ((cardinal)) ((ordinal)))
|
| 622 |
+
((n.lisp_token_pos_guess is numeric)
|
| 623 |
+
((lisp_num_digits < 2)
|
| 624 |
+
((p.lisp_token_pos_guess is numeric)
|
| 625 |
+
((pp.lisp_token_pos_guess is sym) ((digits)) ((cardinal)))
|
| 626 |
+
((cardinal)))
|
| 627 |
+
((nn.lisp_token_pos_guess is sym) ((cardinal)) ((digits))))
|
| 628 |
+
((lisp_num_digits < 2)
|
| 629 |
+
((nn.lisp_token_pos_guess is numeric)
|
| 630 |
+
((n.lisp_token_pos_guess is sym)
|
| 631 |
+
((lisp_month_range is 0) ((digits)) ((cardinal)))
|
| 632 |
+
((cardinal)))
|
| 633 |
+
((cardinal)))
|
| 634 |
+
((name < 302.3)
|
| 635 |
+
((p.lisp_token_pos_guess is flight)
|
| 636 |
+
((digits))
|
| 637 |
+
((n.lisp_token_pos_guess is sym)
|
| 638 |
+
((p.lisp_token_pos_guess is sym) ((digits)) ((cardinal)))
|
| 639 |
+
((cardinal))))
|
| 640 |
+
((p.lisp_token_pos_guess is a)
|
| 641 |
+
((digits))
|
| 642 |
+
((n.lisp_token_pos_guess is sym)
|
| 643 |
+
((nn.lisp_token_pos_guess is sym)
|
| 644 |
+
((name < 669.2) ((digits)) ((cardinal)))
|
| 645 |
+
((cardinal)))
|
| 646 |
+
((name < 373.2)
|
| 647 |
+
((cardinal))
|
| 648 |
+
((name < 436.2)
|
| 649 |
+
((name < 392.6) ((digits)) ((cardinal)))
|
| 650 |
+
((name < 716.5)
|
| 651 |
+
((cardinal))
|
| 652 |
+
((name < 773.6)
|
| 653 |
+
((p.lisp_token_pos_guess is _other_) ((digits)) ((cardinal)))
|
| 654 |
+
((cardinal)))))))))))))
|
| 655 |
+
((p.lisp_token_pos_guess is numeric)
|
| 656 |
+
((pp.lisp_token_pos_guess is month)
|
| 657 |
+
((year))
|
| 658 |
+
((nn.lisp_token_pos_guess is numeric)
|
| 659 |
+
((cardinal))
|
| 660 |
+
((p.lisp_token_pos_precise_val is year)
|
| 661 |
+
((cardinal))
|
| 662 |
+
((p.lisp_token_pos_precise_val is ordinal)
|
| 663 |
+
((cardinal))
|
| 664 |
+
((digits))))))
|
| 665 |
+
((nn.lisp_token_pos_guess is numeric)
|
| 666 |
+
((n.lisp_token_pos_guess is month)
|
| 667 |
+
((cardinal))
|
| 668 |
+
((n.lisp_token_pos_guess is numeric)
|
| 669 |
+
((digits))
|
| 670 |
+
((p.lisp_token_pos_guess is _other_) ((cardinal)) ((year)))))
|
| 671 |
+
((p.lisp_token_pos_guess is _other_)
|
| 672 |
+
((lisp_num_digits < 4.4)
|
| 673 |
+
((name < 2959.6)
|
| 674 |
+
((name < 1773.4) ((cardinal)) ((year)))
|
| 675 |
+
((cardinal)))
|
| 676 |
+
((pp.lisp_token_pos_guess is _other_) ((digits)) ((cardinal))))
|
| 677 |
+
((n.lisp_token_pos_guess is to)
|
| 678 |
+
((year))
|
| 679 |
+
((p.lisp_token_pos_guess is sym)
|
| 680 |
+
((pp.lisp_token_pos_guess is sym)
|
| 681 |
+
((cardinal))
|
| 682 |
+
((lisp_num_digits < 4.6) ((year)) ((digits))))
|
| 683 |
+
((lisp_num_digits < 4.8)
|
| 684 |
+
((name < 2880)
|
| 685 |
+
((name < 1633.2)
|
| 686 |
+
((name < 1306.4) ((cardinal)) ((year)))
|
| 687 |
+
((year)))
|
| 688 |
+
((cardinal)))
|
| 689 |
+
((cardinal)))))))))
|
| 690 |
+
)
|
| 691 |
+
("\\(II?I?\\|IV\\|VI?I?I?\\|IX\\|X[VIX]*\\)";; Roman numerals
|
| 692 |
+
((p.lisp_tok_rex_names is 0)
|
| 693 |
+
((lisp_num_digits is 5)
|
| 694 |
+
((number))
|
| 695 |
+
((lisp_num_digits is 4)
|
| 696 |
+
((number))
|
| 697 |
+
((nn.lisp_num_digits is 13)
|
| 698 |
+
((number))
|
| 699 |
+
((p.lisp_num_digits is 7)
|
| 700 |
+
((number))
|
| 701 |
+
((p.lisp_tok_section_name is 0)
|
| 702 |
+
((lisp_tok_rex is 0)
|
| 703 |
+
((lisp_num_digits is 3)
|
| 704 |
+
((p.lisp_num_digits is 4)
|
| 705 |
+
((number))
|
| 706 |
+
((nn.lisp_num_digits is 4)
|
| 707 |
+
((number))
|
| 708 |
+
((n.lisp_num_digits is 4)
|
| 709 |
+
((number))
|
| 710 |
+
((pp.lisp_num_digits is 3)
|
| 711 |
+
((number))
|
| 712 |
+
((p.lisp_num_digits is 2)
|
| 713 |
+
((letter))
|
| 714 |
+
((nn.lisp_num_digits is 2)
|
| 715 |
+
((letter))
|
| 716 |
+
((n.cap is 0) ((letter)) ((number)))))))))
|
| 717 |
+
((nn.lisp_num_digits is 11)
|
| 718 |
+
((letter))
|
| 719 |
+
((lisp_num_digits is 1)
|
| 720 |
+
((pp.lisp_num_digits is 9)
|
| 721 |
+
((letter))
|
| 722 |
+
((p.lisp_num_digits is 9)
|
| 723 |
+
((letter))
|
| 724 |
+
((n.lisp_num_digits is 6)
|
| 725 |
+
((letter))
|
| 726 |
+
((pp.lisp_num_digits is 6)
|
| 727 |
+
((letter))
|
| 728 |
+
((pp.cap is 0)
|
| 729 |
+
((n.cap is 0)
|
| 730 |
+
((p.lisp_num_digits is 1)
|
| 731 |
+
((letter))
|
| 732 |
+
((n.lisp_num_digits is 4) ((letter)) ((letter))))
|
| 733 |
+
((letter)))
|
| 734 |
+
((letter)))))))
|
| 735 |
+
((p.lisp_num_digits is 10)
|
| 736 |
+
((number))
|
| 737 |
+
((n.lisp_num_digits is 8)
|
| 738 |
+
((number))
|
| 739 |
+
((pp.lisp_num_digits is 9)
|
| 740 |
+
((number))
|
| 741 |
+
((nn.lisp_num_digits is 5)
|
| 742 |
+
((number))
|
| 743 |
+
((n.lisp_num_digits is 4) ((number)) ((letter))))))))))
|
| 744 |
+
((letter)))
|
| 745 |
+
((number)))))))
|
| 746 |
+
((century))))
|
| 747 |
+
("\\(Dr\\|St\\)"
|
| 748 |
+
((n.name is 0)
|
| 749 |
+
((p.cap is 1)
|
| 750 |
+
((street))
|
| 751 |
+
((p.name matches "[0-9]*\\(1[sS][tT]\\|2[nN][dD]\\|3[rR][dD]\\|[0-9][tT][hH]\\)")
|
| 752 |
+
((street))
|
| 753 |
+
((title))))
|
| 754 |
+
((punc matches ".*,.*")
|
| 755 |
+
((street))
|
| 756 |
+
((p.punc matches ".*,.*")
|
| 757 |
+
((title))
|
| 758 |
+
((n.cap is 0)
|
| 759 |
+
((street))
|
| 760 |
+
((p.cap is 0)
|
| 761 |
+
((p.name matches "[0-9]*\\(1[sS][tT]\\|2[nN][dD]\\|3[rR][dD]\\|[0-9][tT][hH]\\)")
|
| 762 |
+
((street))
|
| 763 |
+
((title)))
|
| 764 |
+
((pp.name matches "[1-9][0-9]+")
|
| 765 |
+
((street))
|
| 766 |
+
((title)))))))))
|
| 767 |
+
("lead"
|
| 768 |
+
((p.name in (was were have had been having has is are))
|
| 769 |
+
((led))
|
| 770 |
+
((liid))))
|
| 771 |
+
("read"
|
| 772 |
+
((p.name in ("was" "wasn't" "were" "weren't" "have" "haven't" "had" "hadn't" "been" "having" "has" "hasn't" "is" "isn't" "are" "aren't" "He" "She"))
|
| 773 |
+
((red))
|
| 774 |
+
((riid))))
|
| 775 |
+
;("read"
|
| 776 |
+
; ((p.name matches "[Pp]lease\\|PLEASE\\|[Ll]et'?s\\|LET'?S\\|[Tt][Oo]\\|[Ww]ill\\|WILL\\|[Ww]on'?t\\|WON'?T\\|[Mm]ay\\|MAY\\|[Cc]an\\(not\\|'?t\\)?\\|CAN\\(NOT\\|'?T\\)?\\|[Mm]ight\\|MIGHT\\|\\([Ww]ould\\|[Cc]ould\\|[Ss]hould\\|[Mm]ust\\)\\(n'?t\\)?\\|\\(WOULD\\|COULD\\|SHOULD\\|MUST\\)\\(N'?T\\)?")
|
| 777 |
+
; ((riid))
|
| 778 |
+
; ((p.name is 0)
|
| 779 |
+
; ((riid))
|
| 780 |
+
; ((red)))))
|
| 781 |
+
("and"
|
| 782 |
+
((n.name is 0)
|
| 783 |
+
((aend))
|
| 784 |
+
((punc is 0)
|
| 785 |
+
((ahnd))
|
| 786 |
+
((aend)))))
|
| 787 |
+
("for"
|
| 788 |
+
((n.name is 0)
|
| 789 |
+
((faor))
|
| 790 |
+
((punc is 0)
|
| 791 |
+
((fer))
|
| 792 |
+
((faor)))))
|
| 793 |
+
("to"
|
| 794 |
+
((n.name is 0)
|
| 795 |
+
((tuu))
|
| 796 |
+
((punc is 0)
|
| 797 |
+
((to))
|
| 798 |
+
((tuu)))))
|
| 799 |
+
))
|
| 800 |
+
|
| 801 |
+
(defvar english_homographs
|
| 802 |
+
'("lead" "read" "and" "for" "to")
|
| 803 |
+
"english_homographs
|
| 804 |
+
A list of tokens that are dealt with by a homograph disambiguation tree
|
| 805 |
+
in english_token_pos_cart_trees.")
|
| 806 |
+
|
| 807 |
+
(defvar token_pos_cart_trees
|
| 808 |
+
english_token_pos_cart_trees
|
| 809 |
+
"token_pos_cart_trees
|
| 810 |
+
This is a list of pairs or regex plus CART tree. Tokens that match
|
| 811 |
+
the regex will have the CART tree aplied, setting the result as
|
| 812 |
+
the token_pos feature on the token. The list is checked in order
|
| 813 |
+
and only the first match will be applied.")
|
| 814 |
+
|
| 815 |
+
(provide 'token)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/tokenpos.scm
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Functions used in identifying token types.
|
| 35 |
+
;;;
|
| 36 |
+
|
| 37 |
+
(defvar token_most_common
|
| 38 |
+
'(
|
| 39 |
+
sym numeric month to day in the of on and writes a years from
|
| 40 |
+
for jst at million by is was gmt page he that than more since as when
|
| 41 |
+
with but after about or his i has it date no died number bst who miles
|
| 42 |
+
university some people an only w year have ago were are pages up days
|
| 43 |
+
months hours minutes through out had which least hi last now ft this
|
| 44 |
+
all one its there between cents until over will before past they
|
| 45 |
+
nearly times tim message so lbs just if age we during she billion then
|
| 46 |
+
other be time new her first states not you members under would many
|
| 47 |
+
says degrees two next fax week while bush been around including back
|
| 48 |
+
campaign american within publisher flight points even early later
|
| 49 |
+
world countries every edt can president most could their what them
|
| 50 |
+
former began women killed another also received long americans pounds
|
| 51 |
+
do dear said km made into did dead war tel still old x took total men
|
| 52 |
+
like f am less c well late down weeks end chapter among place house
|
| 53 |
+
away him election death almost students state soviet where version
|
| 54 |
+
summer man s nation because washington top though m id est these spent
|
| 55 |
+
seats gnu estimated those lost ian high each copies children acres
|
| 56 |
+
tons son per my found won off seconds power nations federal born
|
| 57 |
+
presidential much city begin p name different whose three home hello
|
| 58 |
+
million-euro)
|
| 59 |
+
|
| 60 |
+
"token_most_common
|
| 61 |
+
A list of (English) words which were found to be most common in
|
| 62 |
+
an text database and are used as discriminators in token analysis.")
|
| 63 |
+
|
| 64 |
+
(define (token_pos_precise_val sc)
|
| 65 |
+
"(tok_pos sc)
|
| 66 |
+
Returns a general pos for sc's name.
|
| 67 |
+
numeric All digits
|
| 68 |
+
number float or comma'd numeric
|
| 69 |
+
sym Contains at least one non alphanumeric
|
| 70 |
+
month has month name (or abbrev)
|
| 71 |
+
day has day name (or abbrev)
|
| 72 |
+
rootname else downcased alphabetic.
|
| 73 |
+
Note this can be used to find token_pos but isn't used directly as
|
| 74 |
+
its not disciminatory enough."
|
| 75 |
+
(let ((name (downcase (item.feat sc "token_pos"))))
|
| 76 |
+
(cond
|
| 77 |
+
((string-equal name "year")
|
| 78 |
+
'year)
|
| 79 |
+
((string-equal name "ordinal")
|
| 80 |
+
'ordinal)
|
| 81 |
+
(t
|
| 82 |
+
'_other_))))
|
| 83 |
+
|
| 84 |
+
(define (token_pos_guess sc)
|
| 85 |
+
"(tok_pos sc)
|
| 86 |
+
Returns a general pos for sc's name.
|
| 87 |
+
numeric All digits
|
| 88 |
+
number float or comma'd numeric
|
| 89 |
+
sym Contains at least one non alphanumeric
|
| 90 |
+
month has month name (or abbrev)
|
| 91 |
+
day has day name (or abbrev)
|
| 92 |
+
rootname else downcased alphabetic.
|
| 93 |
+
Note this can be used to find token_pos but isn't used directly as
|
| 94 |
+
its not disciminatory enough."
|
| 95 |
+
(let ((name (downcase (item.name sc))))
|
| 96 |
+
(cond
|
| 97 |
+
((string-matches name "[0-9]+")
|
| 98 |
+
'numeric)
|
| 99 |
+
((or (string-matches name "[0-9]+\\.[0-9]+")
|
| 100 |
+
(string-matches name
|
| 101 |
+
"[0-9][0-9]?[0-9]?,\\([0-9][0-9][0-9],\\)*[0-9][0-9][0-9]"))
|
| 102 |
+
'number)
|
| 103 |
+
((member_string name token_most_common)
|
| 104 |
+
name)
|
| 105 |
+
((string-matches name ".*[^A-Za-z0-9].*")
|
| 106 |
+
'sym)
|
| 107 |
+
((member_string name '(jan january feb february mar march
|
| 108 |
+
apr april may jun june
|
| 109 |
+
jul july aug august sep sept september
|
| 110 |
+
oct october nov november dec december))
|
| 111 |
+
'month)
|
| 112 |
+
((member_string name '(sun sunday mon monday tue tues tuesday
|
| 113 |
+
wed wednesday thu thurs thursday
|
| 114 |
+
fri friday sat saturday))
|
| 115 |
+
'day)
|
| 116 |
+
(t
|
| 117 |
+
'_other_))))
|
| 118 |
+
|
| 119 |
+
(define (token_no_starting_quote token)
|
| 120 |
+
"(token_no_starting_quote TOKEN)
|
| 121 |
+
Check to see if a single quote (or backquote) appears as prepunctuation
|
| 122 |
+
in this token or any previous one in this utterance. This is used to
|
| 123 |
+
disambiguate ending single quote as possessive or end quote."
|
| 124 |
+
(cond
|
| 125 |
+
((null token)
|
| 126 |
+
t)
|
| 127 |
+
((string-matches (item.feat token "prepunctuation") "[`']")
|
| 128 |
+
nil)
|
| 129 |
+
(t
|
| 130 |
+
(token_no_starting_quote (item.relation.prev token "Token")))))
|
| 131 |
+
|
| 132 |
+
(define (token_zerostart sc)
|
| 133 |
+
"(zerostart sc)
|
| 134 |
+
Returns, 1 if first char of sc's name is 0, 0 otherwise."
|
| 135 |
+
(if (string-matches (item.name sc) "^0.*")
|
| 136 |
+
"1"
|
| 137 |
+
"0"))
|
| 138 |
+
|
| 139 |
+
(define (tok_roman_to_numstring roman)
|
| 140 |
+
"(tok_roman_to_numstring ROMAN)
|
| 141 |
+
Takes a string of roman numerals and converts it to a number and
|
| 142 |
+
then returns the printed string of that. Only deals with numbers up to 50."
|
| 143 |
+
(let ((val 0) (chars (symbolexplode (upcase roman))))
|
| 144 |
+
(while chars
|
| 145 |
+
(cond
|
| 146 |
+
((equal? (car chars) 'X)
|
| 147 |
+
(set! val (+ 10 val)))
|
| 148 |
+
((equal? (car chars) 'V)
|
| 149 |
+
(set! val (+ 5 val)))
|
| 150 |
+
((equal? (car chars) 'I)
|
| 151 |
+
(cond
|
| 152 |
+
((equal? (car (cdr chars)) 'V)
|
| 153 |
+
(set! val (+ 4 val))
|
| 154 |
+
(set! chars (cdr chars)))
|
| 155 |
+
((equal? (car (cdr chars)) 'X)
|
| 156 |
+
(set! val (+ 9 val))
|
| 157 |
+
(set! chars (cdr chars)))
|
| 158 |
+
(t
|
| 159 |
+
(set! val (+ 1 val))))))
|
| 160 |
+
(set! chars (cdr chars)))
|
| 161 |
+
(format nil "%d" val)))
|
| 162 |
+
|
| 163 |
+
(define (num_digits sc)
|
| 164 |
+
"(num_digits SC)
|
| 165 |
+
Returns number of digits (actually chars) is SC's name."
|
| 166 |
+
(string-length (format nil "%s" (item.name sc))))
|
| 167 |
+
|
| 168 |
+
(define (month_range sc)
|
| 169 |
+
"(month_range SC)
|
| 170 |
+
1 if SC's name is > 0 and < 32, 0 otherwise."
|
| 171 |
+
(let ((val (parse-number (item.name sc))))
|
| 172 |
+
(if (and (> val 0) (< val 32))
|
| 173 |
+
"1"
|
| 174 |
+
"0")))
|
| 175 |
+
|
| 176 |
+
(define (remove_leading_zeros name)
|
| 177 |
+
"(remove_leading_zeros name)
|
| 178 |
+
Remove leading zeros from given string."
|
| 179 |
+
(let ((nname name))
|
| 180 |
+
(while (string-matches nname "^0..*")
|
| 181 |
+
(set! nname (string-after nname "0")))
|
| 182 |
+
nname))
|
| 183 |
+
|
| 184 |
+
(define (token_money_expand type)
|
| 185 |
+
"(token_money_expand type)
|
| 186 |
+
Convert shortened form of money identifier to words if of a known type."
|
| 187 |
+
(cond
|
| 188 |
+
((string-equal type "HK")
|
| 189 |
+
(list "Hong" "Kong"))
|
| 190 |
+
((string-equal type "C")
|
| 191 |
+
(list "Canadian"))
|
| 192 |
+
((string-equal type "A")
|
| 193 |
+
(list "Australian"))
|
| 194 |
+
((< (length type) 4)
|
| 195 |
+
(mapcar
|
| 196 |
+
(lambda (letter)
|
| 197 |
+
(list (list 'name letter)
|
| 198 |
+
(list 'pos token.letter_pos)))
|
| 199 |
+
(symbolexplode type)))
|
| 200 |
+
(t
|
| 201 |
+
(list type))))
|
| 202 |
+
|
| 203 |
+
(define (find_month_from_number token string-number)
|
| 204 |
+
"(find_month_from_number token string-number)
|
| 205 |
+
Find the textual representation of the month from the given string number"
|
| 206 |
+
(let ((nnum (parse-number string-number)))
|
| 207 |
+
(cond
|
| 208 |
+
((equal? 1 nnum) (list "January"))
|
| 209 |
+
((equal? 2 nnum) (list "February"))
|
| 210 |
+
((equal? 3 nnum) (list "March"))
|
| 211 |
+
((equal? 4 nnum) (list "April"))
|
| 212 |
+
((equal? 5 nnum) (list "May"))
|
| 213 |
+
((equal? 6 nnum) (list "June"))
|
| 214 |
+
((equal? 7 nnum) (list "July"))
|
| 215 |
+
((equal? 8 nnum) (list "August"))
|
| 216 |
+
((equal? 9 nnum) (list "September"))
|
| 217 |
+
((equal? 10 nnum) (list "October"))
|
| 218 |
+
((equal? 11 nnum) (list "November"))
|
| 219 |
+
((equal? 12 nnum) (list "December"))
|
| 220 |
+
(t
|
| 221 |
+
(cons "month"
|
| 222 |
+
(builtin_english_token_to_words token string-number))))))
|
| 223 |
+
|
| 224 |
+
(define (tok_allcaps sc)
|
| 225 |
+
"(tok_allcaps sc)
|
| 226 |
+
Returns 1 if sc's name is all capitals, 0 otherwise"
|
| 227 |
+
(if (string-matches (item.name sc) "[A-Z]+")
|
| 228 |
+
"1"
|
| 229 |
+
"0"))
|
| 230 |
+
|
| 231 |
+
(define (tok_section_name sc)
|
| 232 |
+
"(tok_section_name sc)
|
| 233 |
+
Returns 1 if sc's name is in list of things that are section/chapter
|
| 234 |
+
like."
|
| 235 |
+
(if (member_string
|
| 236 |
+
(downcase (item.name sc))
|
| 237 |
+
'(chapter section part article phrase verse scene act book
|
| 238 |
+
volume chap sect art vol war fortran saturn
|
| 239 |
+
trek))
|
| 240 |
+
"1"
|
| 241 |
+
"0"))
|
| 242 |
+
|
| 243 |
+
(define (tok_string_as_letters name)
|
| 244 |
+
"(tok_string_as_letters NAME)
|
| 245 |
+
Return list of letters marked as letter part of speech made
|
| 246 |
+
by exploding NAME."
|
| 247 |
+
(mapcar
|
| 248 |
+
(lambda (letter)
|
| 249 |
+
(list (list 'name letter)
|
| 250 |
+
(list 'pos token.letter_pos)))
|
| 251 |
+
(symbolexplode name)))
|
| 252 |
+
|
| 253 |
+
(define (tok_rex sc)
|
| 254 |
+
"(tok_rex sc)
|
| 255 |
+
Returns 1 if King like title is within 3 tokens before or 2 after."
|
| 256 |
+
(let ((kings '(king queen pope duke tsar emperor shah ceasar
|
| 257 |
+
duchess tsarina empress baron baroness
|
| 258 |
+
count countess)))
|
| 259 |
+
(if (or (member_string
|
| 260 |
+
(downcase (item.feat sc "R:Token.pp.name"))
|
| 261 |
+
kings)
|
| 262 |
+
(member_string
|
| 263 |
+
(downcase (item.feat sc "R:Token.pp.p.name"))
|
| 264 |
+
kings)
|
| 265 |
+
(member_string
|
| 266 |
+
(downcase (item.feat sc "R:Token.n.name"))
|
| 267 |
+
kings))
|
| 268 |
+
"1"
|
| 269 |
+
"0")))
|
| 270 |
+
|
| 271 |
+
(define (tok_rex_names sc)
|
| 272 |
+
"(tok_rex sc)
|
| 273 |
+
Returns 1 if this is a King-like name."
|
| 274 |
+
(if (and
|
| 275 |
+
(member_string
|
| 276 |
+
(downcase (item.name sc))
|
| 277 |
+
'(louis henry charles philip george edward pius william richard
|
| 278 |
+
ptolemy john paul peter nicholas
|
| 279 |
+
alexander frederick james alfonso ivan napolean leo
|
| 280 |
+
gregory catherine alexandria pierre elizabeth mary))
|
| 281 |
+
(or (string-equal "" (item.feat sc "punc"))
|
| 282 |
+
(string-equal "0" (item.feat sc "punc"))))
|
| 283 |
+
"1"
|
| 284 |
+
"0"))
|
| 285 |
+
|
| 286 |
+
(provide 'tokenpos)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/tts.scm
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Various tts functions and hooks
|
| 35 |
+
|
| 36 |
+
;;; Once the utterance is built these functions synth and play it
|
| 37 |
+
(defvar tts_hooks (list utt.synth utt.play)
|
| 38 |
+
"tts_hooks
|
| 39 |
+
Function or list of functions to be called during text to speech.
|
| 40 |
+
The function tts_file, chunks data into Utterances of type Token and
|
| 41 |
+
applies this hook to the utterance. This typically contains the utt.synth
|
| 42 |
+
function and utt.play. [see TTS]")
|
| 43 |
+
|
| 44 |
+
;;; This is used to define utterance breaks in tts on files
|
| 45 |
+
(defvar eou_tree
|
| 46 |
+
'((lisp_max_num_tokens > 200)
|
| 47 |
+
((1))
|
| 48 |
+
((n.whitespace matches ".*\n.*\n\\(.\\|\n\\)*");; significant break (2 nls)
|
| 49 |
+
((1))
|
| 50 |
+
((name matches "--+")
|
| 51 |
+
((1))
|
| 52 |
+
((punc matches ".*[\\?:!;].*")
|
| 53 |
+
((1))
|
| 54 |
+
((punc matches ".*\\..*")
|
| 55 |
+
((punc matches "..+");; longer punctuation string
|
| 56 |
+
((punc matches "\\..*,") ;; for U.S.S.R., like tokens
|
| 57 |
+
((0))
|
| 58 |
+
((1)))
|
| 59 |
+
;; This is to distinguish abbreviations vs periods
|
| 60 |
+
;; These are heuristics
|
| 61 |
+
((name matches "\\(.*\\..*\\|[A-Z][A-Za-z]?[A-Za-z]?\\|etc\\)");; an abbreviation
|
| 62 |
+
((n.whitespace is " ")
|
| 63 |
+
((0));; if abbrev single space isn't enough for break
|
| 64 |
+
((n.name matches "[A-Z].*")
|
| 65 |
+
((1))
|
| 66 |
+
((0))))
|
| 67 |
+
((n.whitespace is " ");; if it doesn't look like an abbreviation
|
| 68 |
+
((n.name matches "[A-Z].*");; single space and non-cap is no break
|
| 69 |
+
((1))
|
| 70 |
+
((0)))
|
| 71 |
+
((1)))))
|
| 72 |
+
((0)))))))
|
| 73 |
+
"eou_tree
|
| 74 |
+
End of utterance tree. A decision tree used to determine if the given
|
| 75 |
+
token marks the end of an utterance. It may look one token ahead to
|
| 76 |
+
do this. [see Utterance chunking]")
|
| 77 |
+
|
| 78 |
+
(define (max_num_tokens x)
|
| 79 |
+
"(num_tokens x)
|
| 80 |
+
This is probably controversial, but its good to have a maximum number
|
| 81 |
+
of tokens in an utterance. You really dont want to wait on very long
|
| 82 |
+
utterances, some utts can be thousands of words long, these maybe
|
| 83 |
+
shouldn't be spoken, but we do have to deal with them."
|
| 84 |
+
(let ((c 1) (y x))
|
| 85 |
+
(while y
|
| 86 |
+
(set! c (+ 1 c))
|
| 87 |
+
(set! y (item.prev y)))
|
| 88 |
+
c))
|
| 89 |
+
|
| 90 |
+
;;; The program used to parse stml files
|
| 91 |
+
;;; Needs version 1.0 to allow -D option to work
|
| 92 |
+
(defvar sgml_parse_progname "nsgmls-1.0"
|
| 93 |
+
"sgml_parse_progname
|
| 94 |
+
The name of the program to use to parse SGML files. Typically this is
|
| 95 |
+
nsgml-1.0 from the sp SGML package. [see XML/SGML requirements]")
|
| 96 |
+
|
| 97 |
+
;;; When PHRASE elements are specified in an utterance in STML
|
| 98 |
+
;;; no other method for phrase prediction is to be used, so we
|
| 99 |
+
;;; use the following tree
|
| 100 |
+
(set! stml_phrase_cart_tree
|
| 101 |
+
'((R:Token.parent.pbreak is B)
|
| 102 |
+
((B))
|
| 103 |
+
((n.name is 0)
|
| 104 |
+
((B))
|
| 105 |
+
((NB)))))
|
| 106 |
+
|
| 107 |
+
(define (xxml_synth utt)
|
| 108 |
+
"(xxml_synth UTT)
|
| 109 |
+
This applies the xxml_hooks (mode specific) and tts_hooks to the
|
| 110 |
+
given utterance. This function should be called from xxml element
|
| 111 |
+
definitions that signal an utterance boundary."
|
| 112 |
+
(cond
|
| 113 |
+
((or (not utt)
|
| 114 |
+
(not (utt.relation utt 'Token))) ;; no tokens
|
| 115 |
+
nil)
|
| 116 |
+
(t
|
| 117 |
+
(apply_hooks xxml_hooks utt)
|
| 118 |
+
(apply_hooks tts_hooks utt)
|
| 119 |
+
(set! utt nil) ;; not enough ...
|
| 120 |
+
(gc)
|
| 121 |
+
utt))
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
(define (xxml_attval ATTNAME ATTLIST)
|
| 125 |
+
"(xxml_attval ATTNAME ATTLIST)
|
| 126 |
+
Returns attribute value of ATTNAME in ATTLIST or nil if it doesn't
|
| 127 |
+
exists."
|
| 128 |
+
(cond
|
| 129 |
+
((not ATTLIST)
|
| 130 |
+
nil)
|
| 131 |
+
((string-equal ATTNAME (car (car ATTLIST)))
|
| 132 |
+
(car (cdr (car ATTLIST))))
|
| 133 |
+
(t
|
| 134 |
+
(xxml_attval ATTNAME (cdr ATTLIST)))))
|
| 135 |
+
|
| 136 |
+
(defvar xxml_word_features nil
|
| 137 |
+
"xxml_word_features
|
| 138 |
+
An assoc list of features to be added to the current word when
|
| 139 |
+
in xxml parse mode.")
|
| 140 |
+
|
| 141 |
+
(defvar xxml_token_hooks nil
|
| 142 |
+
"xxml_token_hooks
|
| 143 |
+
Functions to apply to each token.")
|
| 144 |
+
|
| 145 |
+
(defvar xxml_hooks nil
|
| 146 |
+
"xxml_hooks
|
| 147 |
+
Function or list of functions to be applied to an utterance when
|
| 148 |
+
parsed with xxML, before tts_hooks.")
|
| 149 |
+
|
| 150 |
+
(defvar xxml_elements nil
|
| 151 |
+
"xxml_elements
|
| 152 |
+
List of Scheme actions to perform on finding xxML tags.")
|
| 153 |
+
|
| 154 |
+
(defvar xml_dtd_dir libdir
|
| 155 |
+
"xml_dtd_dir
|
| 156 |
+
The directory holding standard DTD form the xml parser.")
|
| 157 |
+
|
| 158 |
+
(set! tts_fnum 1)
|
| 159 |
+
(define (save_tts_output utt)
|
| 160 |
+
(let ((fname (string-append "tts_file_" tts_fnum ".wav")))
|
| 161 |
+
(format stderr "festival: saving waveform in %s\n" fname)
|
| 162 |
+
(utt.save.wave utt fname)
|
| 163 |
+
(set! tts_fnum (+ 1 tts_fnum))
|
| 164 |
+
utt))
|
| 165 |
+
|
| 166 |
+
(define (save_waves_during_tts)
|
| 167 |
+
"(save_waves_during_tts)
|
| 168 |
+
Save each waveform in the current directory in files \"tts_file_XXX.wav\".
|
| 169 |
+
use (save_waves_during_tts_STOP) to stop saving waveforms"
|
| 170 |
+
(if (not (member save_tts_output tts_hooks))
|
| 171 |
+
(set! tts_hooks (append tts_hooks (list save_tts_output))))
|
| 172 |
+
t)
|
| 173 |
+
|
| 174 |
+
(define (save_waves_during_tts_STOP)
|
| 175 |
+
"(save_waves_during_tts_STOP)
|
| 176 |
+
Stop saving waveforms when doing tts."
|
| 177 |
+
(if (member save_tts_output tts_hooks)
|
| 178 |
+
(set! tts_hooks (delq save_tts_output tts_hooks)))
|
| 179 |
+
t)
|
| 180 |
+
|
| 181 |
+
(define (tts file mode)
|
| 182 |
+
"(tts FILE MODE)
|
| 183 |
+
Convert FILE to speech. MODE identifies any special treatment
|
| 184 |
+
necessary for FILE. This is simply a front end to tts_file but
|
| 185 |
+
puts the system in async audio mode first. [see TTS]"
|
| 186 |
+
(audio_mode 'async)
|
| 187 |
+
(if mode
|
| 188 |
+
(tts_file file mode)
|
| 189 |
+
(tts_file file (tts_find_text_mode file auto-text-mode-alist)))
|
| 190 |
+
;; (audio_mode 'sync) ;; Hmm this is probably bad
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
(define (tts_text string mode)
|
| 194 |
+
"(tts_text STRING mode)
|
| 195 |
+
Apply tts on given string. That is, segment it into utterances and
|
| 196 |
+
apply tts_hooks to each utterance. This is naively done by saving the
|
| 197 |
+
string to a file and calling tts_file on that file. This differs from
|
| 198 |
+
SayText which constructs a single utterance for the whole given text."
|
| 199 |
+
(let ((tmpfile (make_tmp_filename))
|
| 200 |
+
(fd))
|
| 201 |
+
(set! fd (fopen tmpfile "wb"))
|
| 202 |
+
(format fd "%s" string)
|
| 203 |
+
(fclose fd)
|
| 204 |
+
(audio_mode 'async)
|
| 205 |
+
(tts_file tmpfile mode)
|
| 206 |
+
(delete-file tmpfile)))
|
| 207 |
+
|
| 208 |
+
(define (save_record_wave utt)
|
| 209 |
+
"Saves the waveform and records its so it can be joined into a
|
| 210 |
+
a single waveform at the end."
|
| 211 |
+
(let ((fn (make_tmp_filename)))
|
| 212 |
+
(utt.save.wave utt fn)
|
| 213 |
+
(set! wavefiles (cons fn wavefiles))
|
| 214 |
+
utt))
|
| 215 |
+
|
| 216 |
+
(define (combine_waves)
|
| 217 |
+
"(combine_waves)
|
| 218 |
+
Join all the waves together into the desired output file
|
| 219 |
+
and delete the intermediate ones."
|
| 220 |
+
(let ((wholeutt (Utterance Text "")))
|
| 221 |
+
(mapcar
|
| 222 |
+
(lambda (d)
|
| 223 |
+
(utt.import.wave wholeutt d t)
|
| 224 |
+
(delete-file d))
|
| 225 |
+
(reverse wavefiles))
|
| 226 |
+
wholeutt))
|
| 227 |
+
|
| 228 |
+
(define (tts_textall string mode)
|
| 229 |
+
"(tts_textall STRING MODE)
|
| 230 |
+
Apply tts to STRING. This function is specifically designed for
|
| 231 |
+
use in server mode so a single function call may synthesize the string.
|
| 232 |
+
This function name maybe added to the server safe functions."
|
| 233 |
+
(if (not (string-equal mode "nil"))
|
| 234 |
+
(begin
|
| 235 |
+
;; a mode has been specified so do something different
|
| 236 |
+
(let ((tmpfile (make_tmp_filename))
|
| 237 |
+
(fd))
|
| 238 |
+
(set! fd (fopen tmpfile "wb"))
|
| 239 |
+
(format fd "%s" string)
|
| 240 |
+
(fclose fd)
|
| 241 |
+
(set! tts_hooks (list utt.synth save_record_wave))
|
| 242 |
+
(set! wavefiles nil)
|
| 243 |
+
(tts_file tmpfile mode)
|
| 244 |
+
(delete-file tmpfile)
|
| 245 |
+
(utt.send.wave.client (combine_waves))
|
| 246 |
+
))
|
| 247 |
+
;; Simple fundamental mode
|
| 248 |
+
(utt.send.wave.client
|
| 249 |
+
(utt.synth
|
| 250 |
+
(eval (list 'Utterance 'Text string))))))
|
| 251 |
+
|
| 252 |
+
;; Function to interface with app_festival for asterisk
|
| 253 |
+
;; See http://www.asterisk.org
|
| 254 |
+
(define (tts_textasterisk string mode)
|
| 255 |
+
"(tts_textasterisk STRING MODE)
|
| 256 |
+
Apply tts to STRING. This function is specifically designed for
|
| 257 |
+
use in server mode so a single function call may synthesize the string.
|
| 258 |
+
This function name may be added to the server safe functions."
|
| 259 |
+
(utt.send.wave.asterisk
|
| 260 |
+
(utt.synth
|
| 261 |
+
(eval (list 'Utterance 'Text string)))))
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
(define (tts_return_to_client)
|
| 266 |
+
"(tts_return_to_client)
|
| 267 |
+
This function is called by clients who wish to return waveforms of
|
| 268 |
+
their text samples asynchronously. This replaces utt.play in tts_hooks
|
| 269 |
+
with utt.send.wave.client."
|
| 270 |
+
(if (not (member utt.send.wave.client tts_hooks))
|
| 271 |
+
(set! tts_hooks
|
| 272 |
+
(append (delq utt.play tts_hooks)
|
| 273 |
+
(list utt.send.wave.client)))))
|
| 274 |
+
|
| 275 |
+
(defvar tts_text_modes nil
|
| 276 |
+
"tts_text_modes
|
| 277 |
+
An a-list of text modes data for file type specific tts functions.
|
| 278 |
+
See the manual for an example. [see Text modes]")
|
| 279 |
+
|
| 280 |
+
(define (tts_find_text_mode file alist)
|
| 281 |
+
"(find_text_mode FILE ALIST)
|
| 282 |
+
Search through ALIST for one that matches FILE. Returns nil if
|
| 283 |
+
nothing macthes."
|
| 284 |
+
(cond
|
| 285 |
+
((null alist) nil) ;; can't find a match
|
| 286 |
+
((string-matches file (string-append ".*" (car (car alist)) ".*"))
|
| 287 |
+
(cdr (car alist)))
|
| 288 |
+
(t
|
| 289 |
+
(tts_find_text_mode file (cdr alist)))))
|
| 290 |
+
|
| 291 |
+
(defvar auto-text-mode-alist
|
| 292 |
+
(list
|
| 293 |
+
(cons "\\.sable$" 'sable)
|
| 294 |
+
(cons "\\.ogi" 'ogimarkup)
|
| 295 |
+
(cons "\\.email" 'email)
|
| 296 |
+
(cons "" 'fundamental)
|
| 297 |
+
)
|
| 298 |
+
"auto-text-mode-alist
|
| 299 |
+
Following Emacs' auto-mode-alist thios provides a mechanism for auto
|
| 300 |
+
selecting a TTS text mode based on the filename being analyzed. Its
|
| 301 |
+
format is exactly the same as Emacs in that it consists of an alist of
|
| 302 |
+
dotted pairs of regular expression and text mode name.")
|
| 303 |
+
|
| 304 |
+
(provide 'tts)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/unilex_phones.scm
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 2003, 2004 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; unilex phoneset
|
| 35 |
+
;;;
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
(defPhoneSet
|
| 39 |
+
unilex
|
| 40 |
+
;;; Phone Features
|
| 41 |
+
(;; vowel or consonant
|
| 42 |
+
(vc + -)
|
| 43 |
+
;; vowel length: short long dipthong schwa
|
| 44 |
+
(vlng s l d a 0)
|
| 45 |
+
;; vowel height: high mid low
|
| 46 |
+
(vheight 1 2 3 0)
|
| 47 |
+
;; vowel frontness: front mid back
|
| 48 |
+
(vfront 1 2 3 0)
|
| 49 |
+
;; lip rounding
|
| 50 |
+
(vrnd + - 0)
|
| 51 |
+
;; consonant type: stop fricative affricative nasal liquid approximant
|
| 52 |
+
(ctype s f a n l t r 0)
|
| 53 |
+
;; place of articulation: labial alveolar palatal labio-dental
|
| 54 |
+
;; dental velar glottal
|
| 55 |
+
(cplace l a p b d v g 0)
|
| 56 |
+
;; consonant voicing
|
| 57 |
+
(cvox + - 0)
|
| 58 |
+
)
|
| 59 |
+
(
|
| 60 |
+
(SIL - 0 0 0 0 0 0 -) ;; slience ...
|
| 61 |
+
(# - 0 0 0 0 0 0 -) ;; slience ...
|
| 62 |
+
(B_10 - 0 0 0 0 0 0 -) ;; Pauses
|
| 63 |
+
(B_20 - 0 0 0 0 0 0 -) ;; Pauses
|
| 64 |
+
(B_30 - 0 0 0 0 0 0 -) ;; Pauses
|
| 65 |
+
(B_40 - 0 0 0 0 0 0 -) ;; Pauses
|
| 66 |
+
(B_50 - 0 0 0 0 0 0 -) ;; Pauses
|
| 67 |
+
(B_100 - 0 0 0 0 0 0 -) ;; Pauses
|
| 68 |
+
(B_150 - 0 0 0 0 0 0 -) ;; Pauses
|
| 69 |
+
(B_200 - 0 0 0 0 0 0 -) ;; Pauses
|
| 70 |
+
(B_250 - 0 0 0 0 0 0 -) ;; Pauses
|
| 71 |
+
(B_300 - 0 0 0 0 0 0 -) ;; Pauses
|
| 72 |
+
(B_400 - 0 0 0 0 0 0 -) ;; Pauses
|
| 73 |
+
(IGNORE - 0 0 0 0 0 0 -) ;; Pauses
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
;; insert the phones here, see examples in
|
| 77 |
+
;; festival/lib/*_phones.scm
|
| 78 |
+
|
| 79 |
+
;(name vc vling vheight vfront vrnd ctype cplace cvox)
|
| 80 |
+
|
| 81 |
+
;;; Rob guesed these values for Edinburgh English
|
| 82 |
+
;;; Not to be taken too seriously.
|
| 83 |
+
|
| 84 |
+
(p - 0 0 0 0 s l -)
|
| 85 |
+
(t - 0 0 0 0 s a -)
|
| 86 |
+
(? - 0 0 0 0 s g +) ;;; ???
|
| 87 |
+
(t^ - 0 0 0 0 t a +) ;;; ???
|
| 88 |
+
(k - 0 0 0 0 s v -)
|
| 89 |
+
(x - 0 0 0 0 f v -)
|
| 90 |
+
(b - 0 0 0 0 s l +)
|
| 91 |
+
(d - 0 0 0 0 s a +)
|
| 92 |
+
(g - 0 0 0 0 s v +)
|
| 93 |
+
(ch - 0 0 0 0 a p -)
|
| 94 |
+
(jh - 0 0 0 0 a p +)
|
| 95 |
+
(s - 0 0 0 0 f a -)
|
| 96 |
+
(z - 0 0 0 0 f a +)
|
| 97 |
+
(sh - 0 0 0 0 f p -)
|
| 98 |
+
(zh - 0 0 0 0 f p +)
|
| 99 |
+
(f - 0 0 0 0 f b -)
|
| 100 |
+
(v - 0 0 0 0 f b +)
|
| 101 |
+
(th - 0 0 0 0 f d -)
|
| 102 |
+
(dh - 0 0 0 0 f d +)
|
| 103 |
+
(h - 0 0 0 0 f 0 -) ;;; ???
|
| 104 |
+
(m - 0 0 0 0 n l +)
|
| 105 |
+
(m! - 0 0 0 0 n l +)
|
| 106 |
+
(n - 0 0 0 0 n a +)
|
| 107 |
+
(n! - 0 0 0 0 n a +)
|
| 108 |
+
(ng - 0 0 0 0 n v +)
|
| 109 |
+
(l - 0 0 0 0 r a +)
|
| 110 |
+
(ll - 0 0 0 0 r a +)
|
| 111 |
+
(lw - 0 0 0 0 r a +)
|
| 112 |
+
(l! - 0 0 0 0 r a +)
|
| 113 |
+
(r - 0 0 0 0 r a +)
|
| 114 |
+
(y - 0 0 0 0 l p +)
|
| 115 |
+
(w - 0 0 0 0 l l +)
|
| 116 |
+
(hw - 0 0 0 0 l l +)
|
| 117 |
+
(e + s 2 1 - 0 0 0)
|
| 118 |
+
(ao + s 3 1 - 0 0 0)
|
| 119 |
+
(a + s 3 1 - 0 0 0)
|
| 120 |
+
(ah + s 3 1 - 0 0 0)
|
| 121 |
+
(oa + s 3 1 - 0 0 0)
|
| 122 |
+
(aa + s 3 1 - 0 0 0)
|
| 123 |
+
(ar + s 3 1 - 0 0 0)
|
| 124 |
+
(eh + s 3 1 - 0 0 0) ;;; ?
|
| 125 |
+
(oul + d 2 3 + 0 0 0) ;;; ?
|
| 126 |
+
(ou + d 2 3 + 0 0 0)
|
| 127 |
+
(ouw + d 2 3 + 0 0 0)
|
| 128 |
+
(oou + l 3 3 + 0 0 0)
|
| 129 |
+
(o + l 3 3 + 0 0 0)
|
| 130 |
+
(au + l 3 3 + 0 0 0)
|
| 131 |
+
(oo + l 3 3 + 0 0 0)
|
| 132 |
+
(or + l 3 3 + 0 0 0)
|
| 133 |
+
(our + d 2 3 + 0 0 0)
|
| 134 |
+
(ii + l 1 1 - 0 0 0)
|
| 135 |
+
(ihr + s 1 1 - 0 0 0)
|
| 136 |
+
(iy + l 1 1 - 0 0 0)
|
| 137 |
+
(i + s 1 1 - 0 0 0)
|
| 138 |
+
(ie + l 1 1 - 0 0 0) ;;; ?
|
| 139 |
+
(iii + s 1 1 - 0 0 0) ;;; was ii;
|
| 140 |
+
(@r + a 2 2 - r a +)
|
| 141 |
+
(@ + a 2 2 - 0 0 0)
|
| 142 |
+
(uh + s 2 2 - 0 0 0)
|
| 143 |
+
(uhr + s 2 2 - 0 0 0)
|
| 144 |
+
(u + l 1 3 + 0 0 0)
|
| 145 |
+
(uu + l 1 3 + 0 0 0)
|
| 146 |
+
(iu + l 1 3 + 0 0 0)
|
| 147 |
+
(uuu + l 1 3 + 0 0 0) ;;; was uu;
|
| 148 |
+
(uw + l 1 3 + 0 0 0) ;;; ???
|
| 149 |
+
(uul + l 1 3 + 0 0 0) ;;; ???
|
| 150 |
+
(ei + d 2 1 - 0 0 0)
|
| 151 |
+
(ee + d 2 1 - 0 0 0)
|
| 152 |
+
(ai + d 3 2 - 0 0 0) ;;; ???
|
| 153 |
+
(ae + d 3 2 - 0 0 0) ;;; ???
|
| 154 |
+
(aer + d 3 2 - 0 0 0) ;;; ???
|
| 155 |
+
(aai + d 3 2 - 0 0 0) ;;; ???
|
| 156 |
+
(oi + d 2 3 + 0 0 0) ;;; ???
|
| 157 |
+
(oir + d 2 3 + 0 0 0) ;;; ???
|
| 158 |
+
(ow + d 3 2 - 0 0 0)
|
| 159 |
+
(owr + d 3 2 - 0 0 0) ;;; ???
|
| 160 |
+
(oow + d 3 2 - 0 0 0) ;;; ???
|
| 161 |
+
(i@ + l 1 1 - 0 0 0) ;;; iy + @ ?
|
| 162 |
+
(ir + s 1 1 - 0 0 0)
|
| 163 |
+
(irr + s 1 1 - 0 0 0) ;;; was ir;
|
| 164 |
+
(iir + s 1 1 - 0 0 0)
|
| 165 |
+
(@@r + a 2 2 - 0 0 0)
|
| 166 |
+
(er + s 2 1 - 0 0 0)
|
| 167 |
+
(eir + s 2 1 - 0 0 0) ;;; ???
|
| 168 |
+
(ur + s 1 3 + 0 0 0) ;;; ???
|
| 169 |
+
(urr + s 1 3 + 0 0 0) ;;; ???
|
| 170 |
+
(iur + s 1 3 + 0 0 0) ;;; ???
|
| 171 |
+
)
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
(PhoneSet.silences '( # SIL))
|
| 175 |
+
|
| 176 |
+
(define (unilex::select_phoneset)
|
| 177 |
+
"(unilex::select_phoneset)
|
| 178 |
+
Set up phone set for unilex"
|
| 179 |
+
(Parameter.set 'PhoneSet 'unilex)
|
| 180 |
+
(PhoneSet.select 'unilex)
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
(define (unilex::reset_phoneset)
|
| 184 |
+
"(unilex::reset_phoneset)
|
| 185 |
+
Reset phone set for unilex."
|
| 186 |
+
t
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
(provide 'unilex_phones)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/voices.scm
ADDED
|
@@ -0,0 +1,361 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 2 |
+
;;; ;;
|
| 3 |
+
;;; Centre for Speech Technology Research ;;
|
| 4 |
+
;;; University of Edinburgh, UK ;;
|
| 5 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 6 |
+
;;; All Rights Reserved. ;;
|
| 7 |
+
;;; ;;
|
| 8 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 9 |
+
;;; this software and its documentation without restriction, including ;;
|
| 10 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 11 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 12 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 13 |
+
;;; the following conditions: ;;
|
| 14 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 15 |
+
;;; conditions and the following disclaimer. ;;
|
| 16 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 17 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 18 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 19 |
+
;;; derived from this software without specific prior written ;;
|
| 20 |
+
;;; permission. ;;
|
| 21 |
+
;;; ;;
|
| 22 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 23 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 24 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 25 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 26 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 27 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 28 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 29 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 30 |
+
;;; THIS SOFTWARE. ;;
|
| 31 |
+
;;; ;;
|
| 32 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 33 |
+
;;;
|
| 34 |
+
;;; Preapre to access voices. Searches down a path of places.
|
| 35 |
+
;;;
|
| 36 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 37 |
+
|
| 38 |
+
(define current-voice nil
|
| 39 |
+
"current-voice
|
| 40 |
+
The name of the current voice.")
|
| 41 |
+
|
| 42 |
+
;; The path to search for voices is created from the load-path with
|
| 43 |
+
;; an extra list of directories appended.
|
| 44 |
+
|
| 45 |
+
(defvar system-voice-path '( )
|
| 46 |
+
"system-voice-path
|
| 47 |
+
Additional directory not near the load path where voices can be
|
| 48 |
+
found, this can be redefined in lib/sitevars.scm if desired.")
|
| 49 |
+
|
| 50 |
+
(defvar system-voice-path-multisyn '( )
|
| 51 |
+
"system-voice-path-multisyn
|
| 52 |
+
Additional directory not near the load path where multisyn voices can be
|
| 53 |
+
found, this can be redefined in lib/sitevars.scm if desired.")
|
| 54 |
+
|
| 55 |
+
(defvar voice-path
|
| 56 |
+
(remove-duplicates
|
| 57 |
+
(append (mapcar (lambda (d) (path-append d "voices/")) load-path)
|
| 58 |
+
(mapcar (lambda (d) (path-as-directory d)) system-voice-path)
|
| 59 |
+
))
|
| 60 |
+
|
| 61 |
+
"voice-path
|
| 62 |
+
List of places to look for voices. If not set it is initialised from
|
| 63 |
+
load-path by appending \"voices/\" to each directory with
|
| 64 |
+
system-voice-path appended.")
|
| 65 |
+
|
| 66 |
+
(defvar voice-path-multisyn
|
| 67 |
+
(remove-duplicates
|
| 68 |
+
(append (mapcar (lambda (d) (path-append d "voices-multisyn/")) load-path)
|
| 69 |
+
(mapcar (lambda (d) (path-as-directory d)) system-voice-path-multisyn)
|
| 70 |
+
))
|
| 71 |
+
|
| 72 |
+
"voice-path-multisyn
|
| 73 |
+
List of places to look for multisyn voices. If not set it is initialised from
|
| 74 |
+
load-path by appending \"voices-multisyn/\" to each directory with
|
| 75 |
+
system-voice-path-multisyn appended.")
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
;; Declaration of voices. When we declare a voice we record the
|
| 79 |
+
;; directory and set up an autoload for the vocie-selecting function
|
| 80 |
+
|
| 81 |
+
(defvar voice-locations ()
|
| 82 |
+
"voice-locations
|
| 83 |
+
Association list recording where voices were found.")
|
| 84 |
+
|
| 85 |
+
(defvar voice-location-trace nil
|
| 86 |
+
"voice-location-trace
|
| 87 |
+
Set t to print voice locations as they are found")
|
| 88 |
+
|
| 89 |
+
(define (voice-location name dir doc)
|
| 90 |
+
"(voice-location NAME DIR DOCSTRING)
|
| 91 |
+
Record the location of a voice. Called for each voice found on voice-path.
|
| 92 |
+
Can be called in site-init or .festivalrc for additional voices which
|
| 93 |
+
exist elsewhere."
|
| 94 |
+
(let ((func_name (intern (string-append "voice_" name)))
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
(set! name (intern name))
|
| 98 |
+
(set! voice-locations (cons (cons name dir) voice-locations))
|
| 99 |
+
(eval (list 'autoload func_name (path-append dir "festvox/" name) doc))
|
| 100 |
+
(if voice-location-trace
|
| 101 |
+
(format t "Voice: %s %s\n" name dir)
|
| 102 |
+
)
|
| 103 |
+
)
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
(define (voice-location-multisyn name rootname dir doc)
|
| 107 |
+
"(voice-location NAME ROOTNAME DIR DOCSTRING)
|
| 108 |
+
Record the location of a voice. Called for each voice found on voice-path.
|
| 109 |
+
Can be called in site-init or .festivalrc for additional voices which
|
| 110 |
+
exist elsewhere."
|
| 111 |
+
(let ((func_name (intern (string-append "voice_" name)))
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
(set! name (intern name))
|
| 115 |
+
(set! voice-locations (cons (cons name dir) voice-locations))
|
| 116 |
+
(eval (list 'autoload func_name (path-append dir "festvox/" rootname) doc))
|
| 117 |
+
(if voice-location-trace
|
| 118 |
+
(format t "Voice: %s %s\n" name dir)
|
| 119 |
+
)
|
| 120 |
+
)
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
(define (current_voice_reset)
|
| 126 |
+
"(current_voice_reset)
|
| 127 |
+
This function is called at the start of defining any new voice.
|
| 128 |
+
It is design to allow the previous voice to reset any global
|
| 129 |
+
values it has messed with. If this variable value is nil then
|
| 130 |
+
the function wont be called.")
|
| 131 |
+
|
| 132 |
+
(define (voice_reset)
|
| 133 |
+
"(voice_reset)
|
| 134 |
+
This resets all variables back to acceptable values that may affect
|
| 135 |
+
voice generation. This function should always be called at the
|
| 136 |
+
start of any function defining a voice. In addition to reseting
|
| 137 |
+
standard variables the function current_voice_reset will be called.
|
| 138 |
+
This should always be set by the voice definition function (even
|
| 139 |
+
if it does nothing). This allows voice specific changes to be reset
|
| 140 |
+
when a new voice is selection. Unfortunately I can't force this
|
| 141 |
+
to be used."
|
| 142 |
+
(Parameter.set 'Duration_Stretch 1.0)
|
| 143 |
+
(set! after_synth_hooks default_after_synth_hooks)
|
| 144 |
+
|
| 145 |
+
;; The follow are reset to allow existing voices to continue
|
| 146 |
+
;; to work, new voices should be setting these explicitly
|
| 147 |
+
(Parameter.set 'Token_Method 'Token_English)
|
| 148 |
+
(Parameter.set 'POS_Method Classic_POS)
|
| 149 |
+
(Parameter.set 'Phrasify_Method Classic_Phrasify)
|
| 150 |
+
(Parameter.set 'Word_Method Classic_Word)
|
| 151 |
+
(Parameter.set 'Pause_Method Classic_Pauses)
|
| 152 |
+
(Parameter.set 'PostLex_Method Classic_PostLex)
|
| 153 |
+
|
| 154 |
+
(set! diphone_module_hooks nil)
|
| 155 |
+
(set! UniSyn_module_hooks nil)
|
| 156 |
+
|
| 157 |
+
(if current_voice_reset
|
| 158 |
+
(current_voice_reset))
|
| 159 |
+
(set! current_voice_reset nil)
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
(defvar Voice_descriptions nil
|
| 164 |
+
"Internal variable containing list of voice descriptions as
|
| 165 |
+
decribed by proclaim_voice.")
|
| 166 |
+
|
| 167 |
+
(define (proclaim_voice name description)
|
| 168 |
+
"(proclaim_voice NAME DESCRIPTION)
|
| 169 |
+
Describe a voice to the systen. NAME should be atomic name, that
|
| 170 |
+
conventionally will have voice_ prepended to name the basic selection
|
| 171 |
+
function. OPTIONS is an assoc list of feature and value and must
|
| 172 |
+
have at least features for language, gender, dialect and
|
| 173 |
+
description. The first there of these are atomic, while the description
|
| 174 |
+
is a text string describing the voice."
|
| 175 |
+
(let ((voxdesc (assoc name Voice_descriptions)))
|
| 176 |
+
(if voxdesc
|
| 177 |
+
(set-car! (cdr voxdesc) description)
|
| 178 |
+
(set! Voice_descriptions
|
| 179 |
+
(cons (list name description) Voice_descriptions))))
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
(define (voice.description name)
|
| 183 |
+
"(voice.description NAME)
|
| 184 |
+
Output description of named voice. If the named voice is not yet loaded
|
| 185 |
+
it is loaded."
|
| 186 |
+
(let ((voxdesc (assoc name Voice_descriptions))
|
| 187 |
+
(cv current-voice))
|
| 188 |
+
(if (null voxdesc)
|
| 189 |
+
(unwind-protect
|
| 190 |
+
(begin
|
| 191 |
+
(voice.select name)
|
| 192 |
+
(voice.select cv) ;; switch back to current voice
|
| 193 |
+
(set! voxdesc (assoc name Voice_descriptions)))))
|
| 194 |
+
(if voxdesc
|
| 195 |
+
voxdesc
|
| 196 |
+
(begin
|
| 197 |
+
(format t "SIOD: unknown voice %s\n" name)
|
| 198 |
+
nil))))
|
| 199 |
+
|
| 200 |
+
(define (voice.select name)
|
| 201 |
+
"(voice.select NAME)
|
| 202 |
+
Call function to set up voice NAME. This is normally done by
|
| 203 |
+
prepending voice_ to NAME and call it as a function."
|
| 204 |
+
(eval (list (intern (string-append "voice_" name)))))
|
| 205 |
+
|
| 206 |
+
(define (voice.describe name)
|
| 207 |
+
"(voice.describe NAME)
|
| 208 |
+
Describe voice NAME by saying its description. Unfortunately although
|
| 209 |
+
it would be nice to say that voice's description in the voice itself
|
| 210 |
+
its not going to work cross language. So this just uses the current
|
| 211 |
+
voice. So here we assume voices describe themselves in English
|
| 212 |
+
which is pretty anglo-centric, shitsurei shimasu."
|
| 213 |
+
(let ((voxdesc (voice.description name)))
|
| 214 |
+
(let ((desc (car (cdr (assoc 'description (car (cdr voxdesc)))))))
|
| 215 |
+
(cond
|
| 216 |
+
(desc (tts_text desc nil))
|
| 217 |
+
(voxdesc
|
| 218 |
+
(SayText
|
| 219 |
+
(format nil "A voice called %s exist but it has no description"
|
| 220 |
+
name)))
|
| 221 |
+
(t
|
| 222 |
+
(SayText
|
| 223 |
+
(format nil "There is no voice called %s defined" name)))))))
|
| 224 |
+
|
| 225 |
+
(define (voice.list)
|
| 226 |
+
"(voice.list)
|
| 227 |
+
List of all (potential) voices in the system. This checks the voice-location
|
| 228 |
+
list of potential voices found be scanning the voice-path at start up time.
|
| 229 |
+
These names can be used as arguments to voice.description and
|
| 230 |
+
voice.describe."
|
| 231 |
+
(mapcar car voice-locations))
|
| 232 |
+
|
| 233 |
+
;; Voices are found on the voice-path if they are in directories of the form
|
| 234 |
+
;; DIR/LANGUAGE/NAME
|
| 235 |
+
|
| 236 |
+
(define (search-for-voices)
|
| 237 |
+
"(search-for-voices)
|
| 238 |
+
Search down voice-path to locate voices."
|
| 239 |
+
|
| 240 |
+
(let ((dirs voice-path)
|
| 241 |
+
(dir nil)
|
| 242 |
+
languages language
|
| 243 |
+
voices voicedir voice
|
| 244 |
+
)
|
| 245 |
+
(while dirs
|
| 246 |
+
(set! dir (car dirs))
|
| 247 |
+
(setq languages (directory-entries dir t))
|
| 248 |
+
(while languages
|
| 249 |
+
(set! language (car languages))
|
| 250 |
+
(set! voices (directory-entries (path-append dir language) t))
|
| 251 |
+
(while voices
|
| 252 |
+
(set! voicedir (car voices))
|
| 253 |
+
(set! voice (path-basename voicedir))
|
| 254 |
+
(if (string-matches voicedir ".*\\..*")
|
| 255 |
+
nil
|
| 256 |
+
(voice-location
|
| 257 |
+
voice
|
| 258 |
+
(path-as-directory (path-append dir language voicedir))
|
| 259 |
+
"voice found on path")
|
| 260 |
+
)
|
| 261 |
+
(set! voices (cdr voices))
|
| 262 |
+
)
|
| 263 |
+
(set! languages (cdr languages))
|
| 264 |
+
)
|
| 265 |
+
(set! dirs (cdr dirs))
|
| 266 |
+
)
|
| 267 |
+
)
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
;; A single file is allowed to define multiple multisyn voices, so this has
|
| 271 |
+
;; been adapted for this. Rob thinks this is just evil, but couldn't think
|
| 272 |
+
;; of a better way.
|
| 273 |
+
(define (search-for-voices-multisyn)
|
| 274 |
+
"(search-for-voices-multisyn)
|
| 275 |
+
Search down multisyn voice-path to locate multisyn voices."
|
| 276 |
+
(let ((dirs voice-path-multisyn)
|
| 277 |
+
(dir nil)
|
| 278 |
+
languages language
|
| 279 |
+
voices voicedir voice voice-list
|
| 280 |
+
)
|
| 281 |
+
(while dirs
|
| 282 |
+
(set! dir (car dirs))
|
| 283 |
+
(set! languages (directory-entries dir t))
|
| 284 |
+
(while languages
|
| 285 |
+
(set! language (car languages))
|
| 286 |
+
(set! voices (directory-entries (path-append dir language) t))
|
| 287 |
+
(while voices
|
| 288 |
+
(set! voicedir (car voices))
|
| 289 |
+
(set! voice (path-basename voicedir))
|
| 290 |
+
(if (string-matches voicedir ".*\\..*")
|
| 291 |
+
nil
|
| 292 |
+
(begin
|
| 293 |
+
;; load the voice definition file, but don't evaluate it!
|
| 294 |
+
(set! voice-def-file (load (path-append dir language voicedir "festvox"
|
| 295 |
+
(string-append voicedir ".scm")) t))
|
| 296 |
+
;; now find the "proclaim_voice" lines and register these voices.
|
| 297 |
+
(mapcar
|
| 298 |
+
(lambda (line)
|
| 299 |
+
(if (string-matches (car line) "proclaim_voice")
|
| 300 |
+
(voice-location-multisyn (intern (cadr (cadr line))) voicedir (path-append dir language voicedir) "registerd multisyn voice")))
|
| 301 |
+
voice-def-file)
|
| 302 |
+
))
|
| 303 |
+
(set! voices (cdr voices)))
|
| 304 |
+
(set! languages (cdr languages)))
|
| 305 |
+
(set! dirs (cdr dirs)))))
|
| 306 |
+
|
| 307 |
+
(search-for-voices)
|
| 308 |
+
(search-for-voices-multisyn)
|
| 309 |
+
|
| 310 |
+
;; We select the default voice from a list of possibilities. One of these
|
| 311 |
+
;; had better exist in every installation.
|
| 312 |
+
|
| 313 |
+
(define (no_voice_error)
|
| 314 |
+
(format t "\nWARNING\n")
|
| 315 |
+
(format t "No default voice found in %l\n" voice-path)
|
| 316 |
+
(format t "either no voices unpacked or voice-path is wrong\n")
|
| 317 |
+
(format t "Scheme interpreter will work, but there is no voice to speak with.\n")
|
| 318 |
+
(format t "WARNING\n\n"))
|
| 319 |
+
|
| 320 |
+
(defvar voice_default 'no_voice_error
|
| 321 |
+
"voice_default
|
| 322 |
+
A variable whose value is a function name that is called on start up to
|
| 323 |
+
the default voice. [see Site initialization]")
|
| 324 |
+
|
| 325 |
+
(defvar default-voice-priority-list
|
| 326 |
+
'(kal_diphone
|
| 327 |
+
cmu_us_slt_cg
|
| 328 |
+
cmu_us_rms_cg
|
| 329 |
+
cmu_us_bdl_cg
|
| 330 |
+
cmu_us_jmk_cg
|
| 331 |
+
cmu_us_awb_cg
|
| 332 |
+
; cstr_rpx_nina_multisyn ; restricted license (lexicon)
|
| 333 |
+
; cstr_rpx_jon_multisyn ; restricted license (lexicon)
|
| 334 |
+
; cstr_edi_awb_arctic_multisyn ; restricted license (lexicon)
|
| 335 |
+
; cstr_us_awb_arctic_multisyn
|
| 336 |
+
ked_diphone
|
| 337 |
+
don_diphone
|
| 338 |
+
rab_diphone
|
| 339 |
+
en1_mbrola
|
| 340 |
+
us1_mbrola
|
| 341 |
+
us2_mbrola
|
| 342 |
+
us3_mbrola
|
| 343 |
+
gsw_diphone ;; not publically distributed
|
| 344 |
+
el_diphone
|
| 345 |
+
)
|
| 346 |
+
"default-voice-priority-list
|
| 347 |
+
List of voice names. The first of them available becomes the default voice.")
|
| 348 |
+
|
| 349 |
+
(let ((voices default-voice-priority-list)
|
| 350 |
+
voice)
|
| 351 |
+
(while (and voices (eq voice_default 'no_voice_error))
|
| 352 |
+
(set! voice (car voices))
|
| 353 |
+
(if (assoc voice voice-locations)
|
| 354 |
+
(set! voice_default (intern (string-append "voice_" voice)))
|
| 355 |
+
)
|
| 356 |
+
(set! voices (cdr voices))
|
| 357 |
+
)
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
(provide 'voices)
|
pretrained_models/CosyVoice-ttsfrd/resource/festival/web.scm
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 6 |
+
;;; DO NOT EDIT THIS FILE ON PAIN OF MORE PAIN.
|
| 7 |
+
;;;
|
| 8 |
+
;;; The master copy of this file is in ../../speech_tools/lib/siod/web.scm
|
| 9 |
+
;;; and is copied here at build time.
|
| 10 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 24 |
+
;;; ;;
|
| 25 |
+
;;; Centre for Speech Technology Research ;;
|
| 26 |
+
;;; University of Edinburgh, UK ;;
|
| 27 |
+
;;; Copyright (c) 1996,1997 ;;
|
| 28 |
+
;;; All Rights Reserved. ;;
|
| 29 |
+
;;; ;;
|
| 30 |
+
;;; Permission is hereby granted, free of charge, to use and distribute ;;
|
| 31 |
+
;;; this software and its documentation without restriction, including ;;
|
| 32 |
+
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
|
| 33 |
+
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
|
| 34 |
+
;;; permit persons to whom this work is furnished to do so, subject to ;;
|
| 35 |
+
;;; the following conditions: ;;
|
| 36 |
+
;;; 1. The code must retain the above copyright notice, this list of ;;
|
| 37 |
+
;;; conditions and the following disclaimer. ;;
|
| 38 |
+
;;; 2. Any modifications must be clearly marked as such. ;;
|
| 39 |
+
;;; 3. Original authors' names are not deleted. ;;
|
| 40 |
+
;;; 4. The authors' names are not used to endorse or promote products ;;
|
| 41 |
+
;;; derived from this software without specific prior written ;;
|
| 42 |
+
;;; permission. ;;
|
| 43 |
+
;;; ;;
|
| 44 |
+
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
|
| 45 |
+
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
|
| 46 |
+
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
|
| 47 |
+
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
|
| 48 |
+
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
|
| 49 |
+
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
|
| 50 |
+
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
|
| 51 |
+
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
|
| 52 |
+
;;; THIS SOFTWARE. ;;
|
| 53 |
+
;;; ;;
|
| 54 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 55 |
+
;;;
|
| 56 |
+
;;; Some things for dealing with the web.
|
| 57 |
+
;;;
|
| 58 |
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
| 59 |
+
|
| 60 |
+
(define (get_url url filename)
|
| 61 |
+
"(get_url URL OUTFILE)
|
| 62 |
+
Get URL and put contents in OUTFILE. Currently only http, and file
|
| 63 |
+
type URLs are supported."
|
| 64 |
+
|
| 65 |
+
(let ((infile (fopen (parse_url url) "rb")))
|
| 66 |
+
(if infile
|
| 67 |
+
(let ((outfile (fopen filename "wb")))
|
| 68 |
+
(if outfile
|
| 69 |
+
(let ((buffer " ") n)
|
| 70 |
+
(while (set! n (fread buffer infile))
|
| 71 |
+
(if ( < n (length buffer))
|
| 72 |
+
(setq buffer (substring buffer 0 n)))
|
| 73 |
+
(fwrite buffer outfile))
|
| 74 |
+
(fclose infile)
|
| 75 |
+
(fclose outfile)
|
| 76 |
+
)
|
| 77 |
+
"can't open out"
|
| 78 |
+
)
|
| 79 |
+
)
|
| 80 |
+
"can't open in"
|
| 81 |
+
)
|
| 82 |
+
)
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
(define (socket_open host port how)
|
| 86 |
+
"(socket_open HOST PORT HOW)
|
| 87 |
+
Open a file descriptor to the BSD socket on HOST at PORT. HOW may
|
| 88 |
+
be \"r\" or \"w\" for a read only or write only filedescriptor. If
|
| 89 |
+
HOW is unspecified or NIL, \"w\" is assumed. If HOW is \"rw\" then
|
| 90 |
+
a list of two file descriptors is returned, the first for reading
|
| 91 |
+
the second for writing. Take care when using the bidiectional socket
|
| 92 |
+
that deadlock doesn't occur."
|
| 93 |
+
|
| 94 |
+
(let ((file (fopen (list "tcp" host port "") how)))
|
| 95 |
+
(if (or (equal? how "rw") (equal how "r+"))
|
| 96 |
+
(list file file)
|
| 97 |
+
file)
|
| 98 |
+
)
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
(provide 'web)
|
| 103 |
+
|
pretrained_models/CosyVoice-ttsfrd/resource/jprsc/COPYING
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Copyright (c) 2009, Nara Institute of Science and Technology, Japan.
|
| 2 |
+
|
| 3 |
+
All rights reserved.
|
| 4 |
+
|
| 5 |
+
Redistribution and use in source and binary forms, with or without
|
| 6 |
+
modification, are permitted provided that the following conditions are
|
| 7 |
+
met:
|
| 8 |
+
|
| 9 |
+
Redistributions of source code must retain the above copyright notice,
|
| 10 |
+
this list of conditions and the following disclaimer.
|
| 11 |
+
Redistributions in binary form must reproduce the above copyright
|
| 12 |
+
notice, this list of conditions and the following disclaimer in the
|
| 13 |
+
documentation and/or other materials provided with the distribution.
|
| 14 |
+
Neither the name of the Nara Institute of Science and Technology
|
| 15 |
+
(NAIST) nor the names of its contributors may be used to endorse or
|
| 16 |
+
promote products derived from this software without specific prior
|
| 17 |
+
written permission.
|
| 18 |
+
|
| 19 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 20 |
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 21 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| 22 |
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
| 23 |
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
| 24 |
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
| 25 |
+
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
| 26 |
+
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
| 27 |
+
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
| 28 |
+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
| 29 |
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 30 |
+
|
| 31 |
+
Copyright (c) 2011-2017, The UniDic Consortium
|
| 32 |
+
All rights reserved.
|
| 33 |
+
|
| 34 |
+
Redistribution and use in source and binary forms, with or without
|
| 35 |
+
modification, are permitted provided that the following conditions are
|
| 36 |
+
met:
|
| 37 |
+
|
| 38 |
+
* Redistributions of source code must retain the above copyright
|
| 39 |
+
notice, this list of conditions and the following disclaimer.
|
| 40 |
+
|
| 41 |
+
* Redistributions in binary form must reproduce the above copyright
|
| 42 |
+
notice, this list of conditions and the following disclaimer in the
|
| 43 |
+
documentation and/or other materials provided with the
|
| 44 |
+
distribution.
|
| 45 |
+
|
| 46 |
+
* Neither the name of the UniDic Consortium nor the names of its
|
| 47 |
+
contributors may be used to endorse or promote products derived
|
| 48 |
+
from this software without specific prior written permission.
|
| 49 |
+
|
| 50 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
| 51 |
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
| 52 |
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
| 53 |
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
| 54 |
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
| 55 |
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
| 56 |
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
| 57 |
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
| 58 |
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| 59 |
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 60 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 61 |
+
|
| 62 |
+
/* ----------------------------------------------------------------- */
|
| 63 |
+
/* The Japanese TTS System "Open JTalk" */
|
| 64 |
+
/* developed by HTS Working Group */
|
| 65 |
+
/* http://open-jtalk.sourceforge.net/ */
|
| 66 |
+
/* ----------------------------------------------------------------- */
|
| 67 |
+
/* */
|
| 68 |
+
/* Copyright (c) 2008-2016 Nagoya Institute of Technology */
|
| 69 |
+
/* Department of Computer Science */
|
| 70 |
+
/* */
|
| 71 |
+
/* All rights reserved. */
|
| 72 |
+
/* */
|
| 73 |
+
/* Redistribution and use in source and binary forms, with or */
|
| 74 |
+
/* without modification, are permitted provided that the following */
|
| 75 |
+
/* conditions are met: */
|
| 76 |
+
/* */
|
| 77 |
+
/* - Redistributions of source code must retain the above copyright */
|
| 78 |
+
/* notice, this list of conditions and the following disclaimer. */
|
| 79 |
+
/* - Redistributions in binary form must reproduce the above */
|
| 80 |
+
/* copyright notice, this list of conditions and the following */
|
| 81 |
+
/* disclaimer in the documentation and/or other materials provided */
|
| 82 |
+
/* with the distribution. */
|
| 83 |
+
/* - Neither the name of the HTS working group nor the names of its */
|
| 84 |
+
/* contributors may be used to endorse or promote products derived */
|
| 85 |
+
/* from this software without specific prior written permission. */
|
| 86 |
+
/* */
|
| 87 |
+
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
|
| 88 |
+
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
| 89 |
+
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
| 90 |
+
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
| 91 |
+
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
|
| 92 |
+
/* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
|
| 93 |
+
/* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
|
| 94 |
+
/* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
|
| 95 |
+
/* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
|
| 96 |
+
/* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
|
| 97 |
+
/* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
|
| 98 |
+
/* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
| 99 |
+
/* POSSIBILITY OF SUCH DAMAGE. */
|
| 100 |
+
/* ----------------------------------------------------------------- */
|