Dionyssos commited on
Commit
d3db00d
·
1 Parent(s): b2772ad

T5 shorter descriptions

Browse files
Files changed (2) hide show
  1. app.py +7 -7
  2. textual.py +7 -7
app.py CHANGED
@@ -63,20 +63,20 @@ def audionar_tts(text=None,
63
  x = np.zeros(4 * 16000, dtype=np.float32) # If no txt 4s of audiogen
64
 
65
  elif lang not in language_names: # text exists / StyleTTS2
66
-
67
  text = only_greek_or_only_latin(text, lang='eng')
68
 
69
  x = _tts.inference(text,
70
  ref_s='wav/' + lang + '.wav')[0, 0, :].numpy() # 24 Khz
71
-
72
  if x.shape[0] > 10:
73
 
74
  x = audresample.resample(signal=x.astype(np.float32),
75
  original_rate=24000,
76
  target_rate=16000)[0, :] # 16 KHz
77
-
78
  else: # VITS
79
-
80
  lang_code = lang_map.get(lang.lower(), lang.lower().split()[0].strip())
81
 
82
  global cached_lang_code, cached_net_g, cached_tokenizer
@@ -111,19 +111,19 @@ def audionar_tts(text=None,
111
 
112
  if soundscape and soundscape.strip():
113
 
114
-
115
  speech_duration_secs = len(x) / 16000
116
  target_duration = max(speech_duration_secs + 0.74, 2.0)
117
 
118
 
119
  background_audio = audiogen.generate(
120
- soundscape,
121
  duration=target_duration,
122
  max_tokens=min( max(7, int(max_tokens)), 288 ) # limit sounds tokens (clone beyond)
123
  ).numpy()
124
 
125
  # PAD
126
-
127
  len_speech = len(x)
128
  len_background = len(background_audio)
129
 
 
63
  x = np.zeros(4 * 16000, dtype=np.float32) # If no txt 4s of audiogen
64
 
65
  elif lang not in language_names: # text exists / StyleTTS2
66
+
67
  text = only_greek_or_only_latin(text, lang='eng')
68
 
69
  x = _tts.inference(text,
70
  ref_s='wav/' + lang + '.wav')[0, 0, :].numpy() # 24 Khz
71
+
72
  if x.shape[0] > 10:
73
 
74
  x = audresample.resample(signal=x.astype(np.float32),
75
  original_rate=24000,
76
  target_rate=16000)[0, :] # 16 KHz
77
+
78
  else: # VITS
79
+
80
  lang_code = lang_map.get(lang.lower(), lang.lower().split()[0].strip())
81
 
82
  global cached_lang_code, cached_net_g, cached_tokenizer
 
111
 
112
  if soundscape and soundscape.strip():
113
 
114
+
115
  speech_duration_secs = len(x) / 16000
116
  target_duration = max(speech_duration_secs + 0.74, 2.0)
117
 
118
 
119
  background_audio = audiogen.generate(
120
+ soundscape[:64], # to have shape of cross attention not grow large of T5 Num tokens
121
  duration=target_duration,
122
  max_tokens=min( max(7, int(max_tokens)), 288 ) # limit sounds tokens (clone beyond)
123
  ).numpy()
124
 
125
  # PAD
126
+
127
  len_speech = len(x)
128
  len_background = len(background_audio)
129
 
textual.py CHANGED
@@ -41,12 +41,12 @@ def only_greek_or_only_latin(text, lang='grc'):
41
  # 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f', 'х': 'kh', 'ц': 'ts',
42
  # 'ч': 'ch', 'ш': 'sh', 'щ': 'shch', 'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu',
43
  # 'я': 'ya',
44
- # ----------------
45
- 'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ж': 'zh',
46
  'з': 'z', 'и': 'i', 'ј': 'j', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n',
47
- 'о': 'o', 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f',
48
- 'х': 'h', 'ц': 'c', 'ч': 'ts', 'ш': 'sch',
49
- 'ђ': 'd', 'љ': 'lj', 'њ': 'nj', 'ћ': 'c', 'џ': 'dsz',
50
  'ё': 'yo', 'й': 'y', 'щ': 'shch', 'ъ': '', 'ы': 'y', 'ь': '',
51
  'э': 'e', 'ю': 'io', 'я': 'ia',
52
  'ѓ': 'y', 'ѕ': 's', 'ќ': 'k',
@@ -166,7 +166,7 @@ def fix_vocals(text, lang='ron'):
166
  '^': ' la puterea ',
167
  '+': ' plus ',
168
  ' - ': ' minus ', # only replace if standalone so to not say minus if is a-b-c
169
- '*': ' ori ', # times
170
  '/': ' împărțit la ', # divided by
171
  '=': ' egal cu ', # equals
172
  'pi': ' pi ',
@@ -199,7 +199,7 @@ def fix_vocals(text, lang='ron'):
199
  '^': ' to the power of ',
200
  '+': ' plus ',
201
  ' - ': ' minus ',
202
- '*': ' times ',
203
  ' / ': ' divided by ',
204
  '=': ' equals ',
205
  'pi': ' pi ',
 
41
  # 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u', 'ф': 'f', 'х': 'kh', 'ц': 'ts',
42
  # 'ч': 'ch', 'ш': 'sh', 'щ': 'shch', 'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu',
43
  # 'я': 'ya',
44
+ # ----------------кључеви
45
+ 'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'di', 'е': 'e', 'ж': 'zu',
46
  'з': 'z', 'и': 'i', 'ј': 'j', 'к': 'k', 'л': 'l', 'м': 'm', 'н': 'n',
47
+ 'о': 'o', 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'ou', 'ф': 'f',
48
+ 'х': 'h', 'ц': 'ts', 'ч': 'ts', 'ш': 'sch',
49
+ 'ђ': 'd', 'љ': 'li', 'њ': 'nj', 'ћ': 'c', 'џ': 'dsz',
50
  'ё': 'yo', 'й': 'y', 'щ': 'shch', 'ъ': '', 'ы': 'y', 'ь': '',
51
  'э': 'e', 'ю': 'io', 'я': 'ia',
52
  'ѓ': 'y', 'ѕ': 's', 'ќ': 'k',
 
166
  '^': ' la puterea ',
167
  '+': ' plus ',
168
  ' - ': ' minus ', # only replace if standalone so to not say minus if is a-b-c
169
+ # '*': ' ori ', # times
170
  '/': ' împărțit la ', # divided by
171
  '=': ' egal cu ', # equals
172
  'pi': ' pi ',
 
199
  '^': ' to the power of ',
200
  '+': ' plus ',
201
  ' - ': ' minus ',
202
+ # '*': ' times ',
203
  ' / ': ' divided by ',
204
  '=': ' equals ',
205
  'pi': ' pi ',