youtube_video_transcriber.py

#1
Files changed (1) hide show
  1. youtube_video_transcriber.py +142 -0
youtube_video_transcriber.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import whisper
3
+ from pytube import YouTube
4
+ import gradio as gr
5
+ import os
6
+ import re
7
+ import logging
8
+ from easygoogletranslate import EasyGoogleTranslate
9
+ import nltk
10
+ from nltk.corpus import stopwords
11
+ from nltk.tokenize import word_tokenize, sent_tokenize
12
+ from typing import Tuple, Optional, List
13
+
14
+ class VideoTranscriber:
15
+
16
+ def __init__(self) -> None:
17
+
18
+ """
19
+ Initializes the VideoTranscriber class by loading the whisper model and downloading NLTK resources.
20
+ """
21
+ self.model = whisper.load_model("base")
22
+ nltk.download("punkt")
23
+ nltk.download("stopwords")
24
+
25
+ def text_summarizer(text: str) -> str:
26
+
27
+ """
28
+ Summarizes the given text using a simple algorithm based on word frequency.
29
+
30
+ Args:
31
+ text (str): The input text to be summarized.
32
+
33
+ Returns:
34
+ str: The summary of the input text.
35
+ """
36
+ # Tokenize the text into sentences and words
37
+ sentences = sent_tokenize(text)
38
+ words = word_tokenize(text)
39
+
40
+ # Remove stopwords (common words that don't add much meaning)
41
+ stop_words = set(stopwords.words("english"))
42
+ words = [word for word in words if word.lower() not in stop_words]
43
+
44
+ # Calculate word frequency
45
+ word_frequency = {}
46
+ for word in words:
47
+ if word not in word_frequency:
48
+ word_frequency[word] = 1
49
+ else:
50
+ word_frequency[word] += 1
51
+
52
+ # Calculate sentence scores based on word frequency
53
+ sentence_scores = {}
54
+ for sentence in sentences:
55
+ for word in word_tokenize(sentence.lower()):
56
+ if word in word_frequency:
57
+ if sentence not in sentence_scores:
58
+ sentence_scores[sentence] = word_frequency[word]
59
+ else:
60
+ sentence_scores[sentence] += word_frequency[word]
61
+
62
+ # Get the top 'num_sentences' sentences with highest scores
63
+ summary_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:4]
64
+
65
+ # Combine the selected sentences to form the summary
66
+ summary = " ".join(summary_sentences)
67
+
68
+ return summary
69
+
70
+
71
+ def get_text(self, url: str, language: str) -> Tuple[str, str]:
72
+
73
+ """
74
+ Transcribes the audio of a YouTube video and translates the result to the specified language.
75
+
76
+ Args:
77
+ url (str): The URL of the YouTube video.
78
+ language (str): The target language for translation.
79
+
80
+ Returns:
81
+ Tuple[str, str]: A tuple containing the transcribed text and its summary.
82
+ """
83
+ #try:
84
+ if url != '':
85
+ output_text_transcribe = ''
86
+
87
+ yt = YouTube(url)
88
+ #video_length = yt.length --- doesn't work anymore - using byte file size of the audio file instead now
89
+ #if video_length < 5400:
90
+ video = yt.streams.filter(only_audio=True).first()
91
+ out_file=video.download(output_path=".")
92
+
93
+ file_stats = os.stat(out_file)
94
+ logging.info(f'Size of audio file in Bytes: {file_stats.st_size}')
95
+
96
+ if file_stats.st_size <= 30000000:
97
+ base, ext = os.path.splitext(out_file)
98
+ new_file = base+'.mp3'
99
+ os.rename(out_file, new_file)
100
+ a = new_file
101
+ result = self.model.transcribe(a)
102
+ text = result['text'].strip()
103
+ translator = EasyGoogleTranslate(
104
+ source_language='en',
105
+ target_language=language,
106
+ timeout=10
107
+ )
108
+ result = translator.translate(text)
109
+ summary = self.text_summarizer(result)
110
+ return result, summary
111
+
112
+ def gradio_interface(self):
113
+
114
+ """
115
+ Sets up and launches the Gradio interface for YouTube video transcription.
116
+ """
117
+ with gr.Blocks(css="style.css",theme= 'freddyaboulton/test-blue') as demo:
118
+ with gr.Column(elem_id="col-container"):
119
+ gr.HTML("""<center><h1 style="color:#fff">YouTube Video Transcriber </h1></center>""")
120
+ with gr.Row():
121
+ with gr.Column(scale=0.4):
122
+ language = gr.Dropdown(
123
+ ["en","ta","te","hi","ml"], label="Select Language"
124
+ )
125
+ with gr.Column(scale=0.6):
126
+ input_text_url = gr.Textbox(placeholder='Youtube video URL', label='YouTube URL',elem_classes="textbox")
127
+ with gr.Row():
128
+ result_button_transcribe = gr.Button('Transcribe')
129
+
130
+ with gr.Row():
131
+ output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript',lines=10)
132
+
133
+ with gr.Row():
134
+ output_text_summary = gr.Textbox(placeholder='Summary of the YouTube video transcript.', label='Summary',lines=5)
135
+
136
+ result_button_transcribe.click(self.get_text, inputs = [input_text_url,language], outputs = [output_text_transcribe,output_text_summary] )
137
+
138
+ demo.launch(share = True)
139
+
140
+ if __name__ == "__main__":
141
+ transcriber = VideoTranscriber()
142
+ transcriber.gradio_interface()