Bushra-KB commited on
Commit
69ab4fe
·
verified ·
1 Parent(s): d1dc89e

Update backend/app.py

Browse files
Files changed (1) hide show
  1. backend/app.py +4 -7
backend/app.py CHANGED
@@ -17,7 +17,6 @@ from gtts.tts import gTTSError
17
  mms_model = None
18
  mms_tokenizer = None
19
  # Define a writable cache directory for Hugging Face models
20
- os.environ["HOME"] = "/tmp" # Set HOME to /tmp for writable cache on Spaces
21
  CACHE_DIR = os.environ.get("TRANSFORMERS_CACHE")
22
 
23
 
@@ -104,12 +103,10 @@ def text_to_speech():
104
  import torch
105
  import soundfile as sf
106
 
107
- # Import uroman for romanization
108
- from uroman import uroman
109
-
110
- # Romanize the text to handle non-Roman characters
111
- text = uroman(text)
112
- print(f"Romanized text: {text}")
113
 
114
  inputs = mms_tokenizer(text, return_tensors="pt")
115
  try:
 
17
  mms_model = None
18
  mms_tokenizer = None
19
  # Define a writable cache directory for Hugging Face models
 
20
  CACHE_DIR = os.environ.get("TRANSFORMERS_CACHE")
21
 
22
 
 
103
  import torch
104
  import soundfile as sf
105
 
106
+ # The transformers tokenizer will automatically use uroman if it's installed.
107
+ # No explicit call is needed.
108
+ if re.search(r"[^A-Za-z0-9\s\.,\?!;:'\"\-]", text):
109
+ print("Text contains non-Roman characters. Relying on tokenizer's automatic romanization.")
 
 
110
 
111
  inputs = mms_tokenizer(text, return_tensors="pt")
112
  try: