# -*- coding: utf-8 -*- """Speech2Video.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1CcYNY0wwS05Ml7UVv4oY7cHjlVrhTbIq """ from google.colab import drive drive.mount('/content/drive') !apt-get install python3-pyaudio !pip install SpeechRecognition !pip install pydub from pydub import AudioSegment import speech_recognition as sr import re import nltk from nltk.stem import PorterStemmer, WordNetLemmatizer from nltk.tokenize import word_tokenize nltk.download('punkt') nltk.download('wordnet') !pip install modelscope==1.4.2 !pip install open_clip_torch !pip install pytorch-lightning from modelscope.pipelines import pipeline from modelscope.outputs import OutputKeys p = pipeline('text-to-video-synthesis', 'damo/text-to-video-synthesis') def convert_to_wav(input_file, output_file): audio = AudioSegment.from_ogg(input_file) audio.export(output_file, format="wav") # Function to convert audio file to text def speech_to_text(audio_file): recognizer = sr.Recognizer() with sr.AudioFile(audio_file) as source: audio = recognizer.record(source) try: text = recognizer.recognize_google(audio) return text except sr.UnknownValueError: print("Sorry, could not understand audio") return "" except sr.RequestError as e: print("Error fetching results; {0}".format(e)) return "" # Function to preprocess text def preprocess_text(text): # Remove non-alphabetic characters text = re.sub(r'[^a-zA-Z\s]', '', text) # Tokenize the text tokens = word_tokenize(text) porter_stemmer = PorterStemmer() lemmatizer = WordNetLemmatizer() stemmed_tokens = [porter_stemmer.stem(token) for token in tokens] lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens] lemmatized_text = ' '.join(lemmatized_tokens) return lemmatized_text # Main function def main(): # Input and output file paths input_file = "/content/drive/MyDrive/IV II PROJECT/WhatsApp Audio 2024-03-24 at 8.52.04 AM.ogg" output_file = "/content/drive/MyDrive/IV II PROJECT/converted_audio.wav" # Convert .ogg to .wav convert_to_wav(input_file, output_file) # Convert audio to text text = speech_to_text(output_file) print("Text from audio:", text) # Preprocess text preprocessed_text = preprocess_text(text) print("Preprocessed text:", preprocessed_text) test_text = { 'text': preprocessed_text, } output_video_path = p(test_text,)[OutputKeys.OUTPUT_VIDEO] print('output_video_path:', output_video_path) from google.colab import files files.download(output_video_path) if __name__ == "__main__": main()