marks commited on
Commit
697ec60
·
1 Parent(s): c405952

Scrubbed letters for tts

Browse files
Files changed (1) hide show
  1. tts.py +45 -3
tts.py CHANGED
@@ -1,13 +1,55 @@
1
- def text_to_speech(text, api_key):
2
- import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
 
 
 
 
4
  url = "https://api.elevenlabs.io/v1/text-to-speech"
5
  headers = {
6
  "Authorization": f"Bearer {api_key}",
7
  "Content-Type": "application/json"
8
  }
9
  data = {
10
- "text": text,
11
  "voice": "en_us_male", # Specify the desired voice
12
  "output_format": "mp3" # Specify the desired output format
13
  }
 
1
+ import re
2
+ import requests
3
+
4
+ def clean_text_for_speech(text):
5
+ # Replace URLs with readable text
6
+ text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
7
+ ' link ', text)
8
+
9
+ # Replace common symbols with spoken equivalents
10
+ replacements = {
11
+ '*': '',
12
+ '#': 'hashtag',
13
+ '@': 'at',
14
+ '&': 'and',
15
+ '%': 'percent',
16
+ '+': 'plus',
17
+ '=': 'equals',
18
+ '/': ' or ',
19
+ '\\': ' ',
20
+ '|': ' ',
21
+ '_': ' ',
22
+ '>': 'greater than',
23
+ '<': 'less than',
24
+ '`': '',
25
+ '~': '',
26
+ '[': '',
27
+ ']': '',
28
+ '{': '',
29
+ '}': '',
30
+ '(': '',
31
+ ')': '',
32
+ }
33
+
34
+ for old, new in replacements.items():
35
+ text = text.replace(old, new)
36
+
37
+ # Remove multiple spaces
38
+ text = ' '.join(text.split())
39
+
40
+ return text
41
 
42
+ def text_to_speech(text, api_key):
43
+ # Clean text before sending to API
44
+ cleaned_text = clean_text_for_speech(text)
45
+
46
  url = "https://api.elevenlabs.io/v1/text-to-speech"
47
  headers = {
48
  "Authorization": f"Bearer {api_key}",
49
  "Content-Type": "application/json"
50
  }
51
  data = {
52
+ "text": cleaned_text, # Use cleaned text instead of original
53
  "voice": "en_us_male", # Specify the desired voice
54
  "output_format": "mp3" # Specify the desired output format
55
  }