Re1th
/

speech2viddeo

Model card Files Files and versions

Re1th commited on Mar 24, 2024

Commit

d646ee8

·

verified ·

1 Parent(s): a7ae30b

Upload speech2video.py

Files changed (1) hide show

speech2video.py +100 -0

speech2video.py ADDED Viewed

	@@ -0,0 +1,100 @@

+# -*- coding: utf-8 -*-
+"""Speech2Video.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1CcYNY0wwS05Ml7UVv4oY7cHjlVrhTbIq
+"""
+from google.colab import drive
+drive.mount('/content/drive')
+!apt-get install python3-pyaudio
+!pip install SpeechRecognition
+!pip install pydub
+from pydub import AudioSegment
+import speech_recognition as sr
+import re
+import nltk
+from nltk.stem import PorterStemmer, WordNetLemmatizer
+from nltk.tokenize import word_tokenize
+nltk.download('punkt')
+nltk.download('wordnet')
+!pip install modelscope==1.4.2
+!pip install open_clip_torch
+!pip install pytorch-lightning
+from modelscope.pipelines import pipeline
+from modelscope.outputs import OutputKeys
+p = pipeline('text-to-video-synthesis', 'damo/text-to-video-synthesis')
+def convert_to_wav(input_file, output_file):
+    audio = AudioSegment.from_ogg(input_file)
+    audio.export(output_file, format="wav")
+# Function to convert audio file to text
+def speech_to_text(audio_file):
+    recognizer = sr.Recognizer()
+    with sr.AudioFile(audio_file) as source:
+        audio = recognizer.record(source)
+        try:
+            text = recognizer.recognize_google(audio)
+            return text
+        except sr.UnknownValueError:
+            print("Sorry, could not understand audio")
+            return ""
+        except sr.RequestError as e:
+            print("Error fetching results; {0}".format(e))
+            return ""
+# Function to preprocess text
+def preprocess_text(text):
+    # Remove non-alphabetic characters
+    text = re.sub(r'[^a-zA-Z\s]', '', text)
+    # Tokenize the text
+    tokens = word_tokenize(text)
+    porter_stemmer = PorterStemmer()
+    lemmatizer = WordNetLemmatizer()
+    stemmed_tokens = [porter_stemmer.stem(token) for token in tokens]
+    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
+    lemmatized_text = ' '.join(lemmatized_tokens)
+    return lemmatized_text
+# Main function
+def main():
+    # Input and output file paths
+    input_file = "/content/drive/MyDrive/IV II PROJECT/WhatsApp Audio 2024-03-24 at 8.52.04 AM.ogg"
+    output_file = "/content/drive/MyDrive/IV II PROJECT/converted_audio.wav"
+    # Convert .ogg to .wav
+    convert_to_wav(input_file, output_file)
+    # Convert audio to text
+    text = speech_to_text(output_file)
+    print("Text from audio:", text)
+    # Preprocess text
+    preprocessed_text = preprocess_text(text)
+    print("Preprocessed text:", preprocessed_text)
+    test_text = {
+        'text': preprocessed_text,
+    }
+    output_video_path = p(test_text,)[OutputKeys.OUTPUT_VIDEO]
+    print('output_video_path:', output_video_path)
+    from google.colab import files
+    files.download(output_video_path)
+if __name__ == "__main__":
+    main()