| from indicvoice import IndicPipeline, IndicModel |
| import torch |
| from scipy.io import wavfile |
|
|
| def save_audio(audio: torch.Tensor, filename: str): |
| """Helper function to save audio tensor as WAV file""" |
| if audio is not None: |
| |
| audio_cpu = audio.cpu().numpy() |
| |
| |
| wavfile.write( |
| filename, |
| 24000, |
| audio_cpu |
| ) |
| print(f"Audio saved as '{filename}'") |
| else: |
| print("No audio was generated") |
|
|
| def main(): |
| |
| pipeline = IndicPipeline(lang_code='a') |
| |
| |
| |
| phonemes = "hˌW ɑɹ ju tədˈA? ˌI ɐm dˈuɪŋ ɹˈizənəbli wˈɛl, θˈæŋk ju fɔɹ ˈæskɪŋ" |
| |
| try: |
| print("\nExample 1: Using generate_from_tokens with raw phonemes") |
| results = list(pipeline.generate_from_tokens( |
| tokens=phonemes, |
| voice="af_bella", |
| speed=1.0 |
| )) |
| if results: |
| save_audio(results[0].audio, 'phoneme_output_new.wav') |
| |
| |
| print("\nExample 2: Using generate_from_tokens with pre-processed tokens") |
| |
| text = "How are you today? I am doing reasonably well, thank you for asking" |
| _, tokens = pipeline.g2p(text) |
| |
| |
| for result in pipeline.generate_from_tokens( |
| tokens=tokens, |
| voice="af_bella", |
| speed=1.0 |
| ): |
| |
| if result.tokens: |
| for token in result.tokens: |
| if hasattr(token, 'start_ts') and hasattr(token, 'end_ts'): |
| print(f"Token: {token.text} ({token.start_ts:.2f}s - {token.end_ts:.2f}s)") |
| save_audio(result.audio, f'token_output_{hash(result.phonemes)}.wav') |
| |
| except Exception as e: |
| print(f"An error occurred: {str(e)}") |
|
|
| if __name__ == "__main__": |
| main() |