David Li commited on
Commit
d903faf
·
1 Parent(s): 625eaf3

fix: try again

Browse files
Files changed (2) hide show
  1. app.py +141 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import gradio as gr
3
+ import ffmpeg
4
+ import youtube_dl
5
+ import os
6
+
7
+ youtube_livestream_codes = [
8
+ 91,
9
+ 92,
10
+ 93,
11
+ 94,
12
+ 95,
13
+ 96,
14
+ 300,
15
+ 301,
16
+ ]
17
+ youtube_mp4_codes = [
18
+ 298,
19
+ 18,
20
+ 22,
21
+ 140,
22
+ 133,
23
+ 134
24
+ ]
25
+
26
+ import sys
27
+
28
+ def get_video_metadata(video_url: str = "https://www.youtube.com/watch?v=21X5lGlDOfg&ab_channel=NASA")-> dict:
29
+ with youtube_dl.YoutubeDL({'outtmpl': '%(id)s.%(ext)s'}) as ydl:
30
+ info_dict = ydl.extract_info(video_url, download=False)
31
+ video_title = info_dict.get('title', None)
32
+ uploader_id = info_dict.get('uploader_id', None)
33
+ print(f"[youtube] {video_title}: {uploader_id}")
34
+ return info_dict
35
+
36
+
37
+ def parse_metadata(metadata) -> dict:
38
+ """
39
+ Parse metadata and send to discord.
40
+ After a video is done recording,
41
+ it will have both the livestream format and the mp4 format.
42
+ """
43
+ # send metadata to discord
44
+ formats = metadata.get("formats", [])
45
+ # filter for ext = mp4
46
+ mp4_formats = [f for f in formats if f.get("ext", "") == "mp4"]
47
+ format_ids = [int(f.get("format_id", 0)) for f in mp4_formats]
48
+ video_entries = sorted(set(format_ids).intersection(youtube_mp4_codes))
49
+
50
+ is_livestream = True
51
+ if len(video_entries) > 0:
52
+ # use video format id over livestream id if available
53
+ selected_id = video_entries[0]
54
+ is_livestream = False
55
+
56
+
57
+ return {
58
+ "selected_id": selected_id,
59
+ "is_livestream": is_livestream,
60
+ }
61
+
62
+ def get_video(url: str, config: dict):
63
+ """
64
+ Get video from start time.
65
+ """
66
+ # result = subprocess.run()
67
+ # could delay start time by a few seconds to just sync up and capture the full video length
68
+ # but would need to time how long it takes to fetch the video using youtube-dl and other adjustments and start a bit before
69
+ filename = config.get("filename", "livestream01.mp4")
70
+ end = config.get("end", "00:15:00")
71
+ overlay_file = ffmpeg.input(filename)
72
+ (
73
+ ffmpeg
74
+ .input(url, t=end)
75
+ .output(filename)
76
+ .run()
77
+ )
78
+
79
+ def get_all_files(url: str, end: str = "00:15:00"):
80
+ metadata = get_video_metadata(url)
81
+ temp_dict = parse_metadata(metadata)
82
+ selected_id = temp_dict.get("selected_id", 0)
83
+ formats = metadata.get("formats", [])
84
+ selected_format = [f for f in formats if f.get("format_id", "") == str(selected_id)][0]
85
+ format_url = selected_format.get("url", "")
86
+ filename = "temp.mp4"
87
+ get_video(format_url, {"filename": filename, "end": end})
88
+ return filename
89
+
90
+ def get_text_from_mp3_whisper(inputType:str, mp3_file: str, url_path: str, taskName: str, srcLanguage: str)->str:
91
+ model = whisper.load_model("medium")
92
+ # options = whisper.DecodingOptions(language="en", without_timestamps=True)
93
+ options = dict(language=srcLanguage)
94
+ transcribe_options = dict(task=taskName, **options)
95
+ if inputType == "url":
96
+ filename = get_all_files(url_path)
97
+ result = model.transcribe(filename, **transcribe_options)
98
+ else:
99
+ result = model.transcribe(mp3_file, **transcribe_options)
100
+ # adjust for spacy mode
101
+ html_text = ""
102
+ lines = []
103
+ for count, segment in enumerate(result.get("segments")):
104
+ # print(segment)
105
+ start = segment.get("start")
106
+ end = segment.get("end")
107
+ lines.append(f"{count}")
108
+ lines.append(f"{second_to_timecode(start)} --> {second_to_timecode(end)}")
109
+ lines.append(segment.get("text", "").strip())
110
+ lines.append('')
111
+ words = '\n'.join(lines)
112
+ input_file = filename or mp3_file
113
+ # ffmpeg -i testing.mp4 -vf subtitles=transcript.srt mysubtitledmovie.mp4
114
+ # use ffmpeg bindings to add subtitles to video
115
+ # use python to call ffmpeg -i testing.mp4 -vf subtitles=transcript.srt mysubtitledmovie.mp4
116
+
117
+ input_video = ffmpeg.input('testing.mp4')
118
+ subtitle = ffmpeg.filter('subtitles', 'transcript.srt')
119
+
120
+ output_video = ffmpeg.output(input_video, subtitle, 'subtitled.mp4', vcodec='libx264', video_filters='[v]subtitles=transcript.srt[v]')
121
+
122
+ ffmpeg.run(output_video)
123
+ # for spacy use advanced logic to extract and append to html_text using tables?
124
+
125
+ # get output_video as mp4
126
+ return result.get("segments"), words, "subtitled.mp4"
127
+
128
+ gr.Interface(
129
+ title = 'Download Video From url and extract text from audio',
130
+ fn=get_text_from_mp3_whisper,
131
+ inputs=[
132
+ gr.Dropdown(["url", "file"]),
133
+ gr.inputs.Audio(type="filepath"),
134
+ gr.inputs.Textbox(),
135
+ gr.Dropdown(["translate", "transcribe"]),
136
+ gr.Dropdown(["Japanese", "English"])
137
+ ],
138
+ outputs=[
139
+ "json", "text", "mp4"
140
+ ],
141
+ live=True).launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ openai-whisper
2
+ youtube_dl
3
+ ffmpeg-python
4
+ gradio