Re1th commited on
Commit
d646ee8
·
verified ·
1 Parent(s): a7ae30b

Upload speech2video.py

Browse files
Files changed (1) hide show
  1. speech2video.py +100 -0
speech2video.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Speech2Video.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1CcYNY0wwS05Ml7UVv4oY7cHjlVrhTbIq
8
+ """
9
+
10
+ from google.colab import drive
11
+ drive.mount('/content/drive')
12
+
13
+ !apt-get install python3-pyaudio
14
+ !pip install SpeechRecognition
15
+ !pip install pydub
16
+
17
+ from pydub import AudioSegment
18
+ import speech_recognition as sr
19
+ import re
20
+ import nltk
21
+ from nltk.stem import PorterStemmer, WordNetLemmatizer
22
+ from nltk.tokenize import word_tokenize
23
+
24
+ nltk.download('punkt')
25
+ nltk.download('wordnet')
26
+
27
+ !pip install modelscope==1.4.2
28
+ !pip install open_clip_torch
29
+ !pip install pytorch-lightning
30
+
31
+ from modelscope.pipelines import pipeline
32
+ from modelscope.outputs import OutputKeys
33
+
34
+ p = pipeline('text-to-video-synthesis', 'damo/text-to-video-synthesis')
35
+
36
+ def convert_to_wav(input_file, output_file):
37
+ audio = AudioSegment.from_ogg(input_file)
38
+ audio.export(output_file, format="wav")
39
+
40
+ # Function to convert audio file to text
41
+ def speech_to_text(audio_file):
42
+ recognizer = sr.Recognizer()
43
+ with sr.AudioFile(audio_file) as source:
44
+ audio = recognizer.record(source)
45
+ try:
46
+ text = recognizer.recognize_google(audio)
47
+ return text
48
+ except sr.UnknownValueError:
49
+ print("Sorry, could not understand audio")
50
+ return ""
51
+ except sr.RequestError as e:
52
+ print("Error fetching results; {0}".format(e))
53
+ return ""
54
+
55
+ # Function to preprocess text
56
+ def preprocess_text(text):
57
+ # Remove non-alphabetic characters
58
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
59
+
60
+ # Tokenize the text
61
+ tokens = word_tokenize(text)
62
+
63
+ porter_stemmer = PorterStemmer()
64
+ lemmatizer = WordNetLemmatizer()
65
+
66
+ stemmed_tokens = [porter_stemmer.stem(token) for token in tokens]
67
+ lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
68
+
69
+ lemmatized_text = ' '.join(lemmatized_tokens)
70
+
71
+ return lemmatized_text
72
+
73
+ # Main function
74
+ def main():
75
+ # Input and output file paths
76
+ input_file = "/content/drive/MyDrive/IV II PROJECT/WhatsApp Audio 2024-03-24 at 8.52.04 AM.ogg"
77
+ output_file = "/content/drive/MyDrive/IV II PROJECT/converted_audio.wav"
78
+
79
+ # Convert .ogg to .wav
80
+ convert_to_wav(input_file, output_file)
81
+
82
+ # Convert audio to text
83
+ text = speech_to_text(output_file)
84
+ print("Text from audio:", text)
85
+
86
+ # Preprocess text
87
+ preprocessed_text = preprocess_text(text)
88
+ print("Preprocessed text:", preprocessed_text)
89
+
90
+ test_text = {
91
+ 'text': preprocessed_text,
92
+ }
93
+ output_video_path = p(test_text,)[OutputKeys.OUTPUT_VIDEO]
94
+ print('output_video_path:', output_video_path)
95
+ from google.colab import files
96
+ files.download(output_video_path)
97
+
98
+ if __name__ == "__main__":
99
+ main()
100
+