Spaces:
Build error
Build error
| import re | |
| import numpy as np | |
| import soundfile as sf | |
| import os | |
| import tempfile | |
| from pydub import AudioSegment | |
| import io | |
| class ChatterboxScriptProcessor: | |
| def __init__(self, engine): | |
| self.engine = engine | |
| def split_text_into_chunks(self, text, max_chars=800): | |
| """ | |
| Splits text into chunks based on sentence boundaries. | |
| Chatterbox can handle longer texts, so we use a larger chunk size. | |
| """ | |
| # Clean text | |
| text = text.replace('\n', ' ').strip() | |
| # Split by sentence boundaries but keep the punctuation | |
| sentences = re.split('(?<=[.!?]) +', text) | |
| chunks = [] | |
| current_chunk = "" | |
| for sentence in sentences: | |
| if len(current_chunk) + len(sentence) < max_chars: | |
| current_chunk += " " + sentence | |
| else: | |
| if current_chunk: | |
| chunks.append(current_chunk.strip()) | |
| current_chunk = sentence | |
| if current_chunk: | |
| chunks.append(current_chunk.strip()) | |
| return chunks | |
| def process_long_script(self, text, voice, speed=1.0, lang='en', custom_voice_path=None, exaggeration=0.5, cfg_weight=0.5, seed=None, temperature=1.0): | |
| """ | |
| Processes a long script by chunking, generating audio for each, and merging. | |
| """ | |
| chunks = self.split_text_into_chunks(text) | |
| print(f"Split script into {len(chunks)} chunks.") | |
| combined_audio = [] | |
| sample_rate = None | |
| for i, chunk in enumerate(chunks): | |
| print(f"Processing chunk {i+1}/{len(chunks)}...") | |
| audio, sr = self.engine.generate(chunk, voice=voice, speed=speed, lang=lang, custom_voice_path=custom_voice_path, | |
| exaggeration=exaggeration, cfg_weight=cfg_weight, seed=seed, temperature=temperature) | |
| if audio is not None: | |
| combined_audio.append(audio) | |
| if sample_rate is None: | |
| sample_rate = sr | |
| if not combined_audio: | |
| return None, 22050 | |
| # Concatenate numpy arrays (ensuring they are 1D) | |
| final_audio = np.concatenate([a.flatten() for a in combined_audio]) | |
| return final_audio, sample_rate | |
| def save_audio(self, audio_data, sample_rate, output_path): | |
| """ | |
| Saves numpy audio data to a file. | |
| """ | |
| try: | |
| sf.write(output_path, audio_data, sample_rate) | |
| return output_path | |
| except Exception as e: | |
| print(f"Error saving audio: {str(e)}") | |
| return None | |
| def process_audio_upload(self, audio_file): | |
| """ | |
| Process uploaded audio file for voice cloning. | |
| Returns the path to the processed audio file. | |
| """ | |
| try: | |
| if audio_file is None: | |
| return None | |
| # Create a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: | |
| # Read the uploaded audio | |
| audio = AudioSegment.from_file(audio_file) | |
| # Convert to WAV format if needed | |
| audio.export(tmp.name, format="wav") | |
| # Check audio duration (should be at least 10 seconds for better cloning) | |
| duration_seconds = len(audio) / 1000.0 | |
| print(f"Audio duration: {duration_seconds:.2f} seconds") | |
| if duration_seconds < 5: | |
| print("Warning: Audio reference is shorter than 5 seconds. Voice cloning quality may be reduced.") | |
| return tmp.name | |
| except Exception as e: | |
| print(f"Error processing audio upload: {str(e)}") | |
| return None | |
| def validate_audio_file(self, audio_path): | |
| """ | |
| Validate that the audio file is suitable for voice cloning. | |
| """ | |
| try: | |
| if not os.path.exists(audio_path): | |
| return False, "Audio file not found" | |
| # Load audio to check properties | |
| audio = AudioSegment.from_file(audio_path) | |
| duration = len(audio) / 1000.0 | |
| if duration < 3: | |
| return False, "Audio too short (minimum 3 seconds required)" | |
| if duration > 60: | |
| return False, "Audio too long (maximum 60 seconds recommended)" | |
| # Check sample rate (should be decent quality) | |
| if hasattr(audio, 'frame_rate'): | |
| if audio.frame_rate < 16000: | |
| return False, "Audio quality too low (minimum 16kHz recommended)" | |
| return True, "Audio file is suitable for voice cloning" | |
| except Exception as e: | |
| return False, f"Error validating audio: {str(e)}" | |