Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import json | |
| import shutil | |
| import pyttsx3 | |
| from pydub import AudioSegment | |
| from transformers import GPT2LMHeadModel, GPT2Tokenizer | |
| from dotenv import load_dotenv | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| # Initialize GPT-2 model and tokenizer | |
| model_name = "distilgpt2" | |
| tokenizer = GPT2Tokenizer.from_pretrained(model_name) | |
| model = GPT2LMHeadModel.from_pretrained(model_name) | |
| # System prompt and article content | |
| system_prompt = """Generate a conversation between Sascha and Marina based on the article content provided. | |
| Sascha is the article writer, and Marina is the interviewer. Make it engaging and emotional, with natural pauses (like "uh") | |
| to make it sound conversational. This is for a podcast called "The Machine Learning Engineer".""" | |
| # TTS voice map for Sascha and Marina | |
| speaker_voice_map = { | |
| "Sascha": "pyttsx3", # Sascha will use pyttsx3 for offline TTS | |
| "Marina": "pyttsx3" # Marina uses pyttsx3 for offline TTS | |
| } | |
| # Initialize pyttsx3 engine for offline TTS | |
| engine = pyttsx3.init() | |
| engine.setProperty('rate', 150) # Speed of speech | |
| engine.setProperty('volume', 0.9) # Volume (0.0 to 1.0) | |
| # Pyttsx3 TTS function for offline TTS | |
| def synthesize_speech_pyttsx3(text, speaker, index): | |
| filename = f"audio-files/{index}_{speaker}.mp3" | |
| engine.save_to_file(text, filename) | |
| engine.runAndWait() | |
| print(f'Audio content written to file "{filename}"') | |
| # Function to synthesize speech based on the speaker | |
| def synthesize_speech(text, speaker, index): | |
| synthesize_speech_pyttsx3(text, speaker, index) | |
| # Function to sort filenames naturally | |
| def natural_sort_key(filename): | |
| return [int(text) if text.isdigit() else text for text in re.split(r'(\d+)', filename)] | |
| # Function to merge audio files | |
| def merge_audios(audio_folder, output_file): | |
| combined = AudioSegment.empty() | |
| audio_files = sorted( | |
| [f for f in os.listdir(audio_folder) if f.endswith(".mp3") or f.endswith(".wav")], | |
| key=natural_sort_key | |
| ) | |
| for filename in audio_files: | |
| audio_path = os.path.join(audio_folder, filename) | |
| print(f"Processing: {audio_path}") | |
| audio = AudioSegment.from_file(audio_path) | |
| combined += audio | |
| combined.export(output_file, format="mp3") | |
| print(f"Merged audio saved as {output_file}") | |
| # Function to generate conversation using distilgpt2 | |
| def generate_conversation(article): | |
| input_text = f"{system_prompt}\n\n{article}\n\nSascha: " | |
| inputs = tokenizer.encode(input_text, return_tensors="pt") | |
| outputs = model.generate(inputs, max_length=1024, num_return_sequences=1, temperature=1.0) | |
| generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Parse conversation into JSON format | |
| lines = re.split(r'(Sascha:|Marina:)', generated_text)[1:] # split by speaker names | |
| conversation = [{"speaker": lines[i].strip(), "text": lines[i + 1].strip()} for i in range(0, len(lines), 2)] | |
| formatted_json = json.dumps(conversation, indent=4) | |
| print(formatted_json) | |
| return conversation | |
| # Function to generate the podcast audio | |
| def generate_audio(conversation): | |
| if os.path.exists('audio-files'): | |
| shutil.rmtree('audio-files') | |
| os.makedirs('audio-files', exist_ok=True) | |
| for index, part in enumerate(conversation): | |
| speaker = part['speaker'] | |
| text = part['text'] | |
| synthesize_speech(text, speaker, index) | |
| output_file = "podcast.mp3" | |
| merge_audios("audio-files", output_file) | |
| return output_file | |
| # Read the article from the file | |
| with open('function-calling.txt', 'r') as file: | |
| article = file.read() | |
| # Generate conversation and audio | |
| conversation = generate_conversation(article) | |
| generate_audio(conversation) | |