clone

Build error

App Files Files Community

nikkmitra commited on Oct 11, 2024

Commit

726d00d

verified ·

1 Parent(s): 0855517

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -19

app.py CHANGED Viewed

@@ -57,15 +57,7 @@ def check_voice_files():
     else:
         return "**All voice files are present.** 🎉"
-# Initialize Hindi tokenizer
-def load_hindi_tokenizer():
-    """
-    Loads a pre-trained Hindi tokenizer from Hugging Face.
-    """
-    tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-bert", use_fast=True)
-    return tokenizer
-hindi_tokenizer = load_hindi_tokenizer()
 # New function to split text into chunks of 100 tokens using the Hindi tokenizer
 def split_text_into_chunks(text, max_tokens=100, language="en"):
@@ -74,26 +66,16 @@ def split_text_into_chunks(text, max_tokens=100, language="en"):
     Inserts a newline after each chunk.
     Uses a specialized tokenizer for Hindi language.
     """
-    if language == "hi":
-        tokens = hindi_tokenizer.tokenize(text)
-    else:
-        tokens = text.split()  # Fallback to simple splitting for other languages
     chunks = []
     for i in range(0, len(tokens), max_tokens):
-        if language == "hi":
-            # Convert tokens back to string for Hindi
-            chunk = hindi_tokenizer.convert_tokens_to_string(tokens[i:i + max_tokens])
-        else:
-            chunk = ' '.join(tokens[i:i + max_tokens])
         chunks.append(chunk)
     return '\n'.join(chunks)
 @spaces.GPU(duration=120)
 def tts_generate(text, voice, language):
     # Check for Hindi language and split text if necessary
-    if language == "hi":
-        text = split_text_into_chunks(text, max_tokens=100, language=language)
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
         temp_audio_path = temp_audio.name

     else:
         return "**All voice files are present.** 🎉"
 # New function to split text into chunks of 100 tokens using the Hindi tokenizer
 def split_text_into_chunks(text, max_tokens=100, language="en"):
     Inserts a newline after each chunk.
     Uses a specialized tokenizer for Hindi language.
     """
     chunks = []
     for i in range(0, len(tokens), max_tokens):
+        chunk = ' '.join(tokens[i:i + max_tokens])
         chunks.append(chunk)
     return '\n'.join(chunks)
 @spaces.GPU(duration=120)
 def tts_generate(text, voice, language):
     # Check for Hindi language and split text if necessary
     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
         temp_audio_path = temp_audio.name