Spaces:
Runtime error
Runtime error
David Li commited on
Commit ·
d903faf
1
Parent(s): 625eaf3
fix: try again
Browse files- app.py +141 -0
- requirements.txt +4 -0
app.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import whisper
|
| 2 |
+
import gradio as gr
|
| 3 |
+
import ffmpeg
|
| 4 |
+
import youtube_dl
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
youtube_livestream_codes = [
|
| 8 |
+
91,
|
| 9 |
+
92,
|
| 10 |
+
93,
|
| 11 |
+
94,
|
| 12 |
+
95,
|
| 13 |
+
96,
|
| 14 |
+
300,
|
| 15 |
+
301,
|
| 16 |
+
]
|
| 17 |
+
youtube_mp4_codes = [
|
| 18 |
+
298,
|
| 19 |
+
18,
|
| 20 |
+
22,
|
| 21 |
+
140,
|
| 22 |
+
133,
|
| 23 |
+
134
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
import sys
|
| 27 |
+
|
| 28 |
+
def get_video_metadata(video_url: str = "https://www.youtube.com/watch?v=21X5lGlDOfg&ab_channel=NASA")-> dict:
|
| 29 |
+
with youtube_dl.YoutubeDL({'outtmpl': '%(id)s.%(ext)s'}) as ydl:
|
| 30 |
+
info_dict = ydl.extract_info(video_url, download=False)
|
| 31 |
+
video_title = info_dict.get('title', None)
|
| 32 |
+
uploader_id = info_dict.get('uploader_id', None)
|
| 33 |
+
print(f"[youtube] {video_title}: {uploader_id}")
|
| 34 |
+
return info_dict
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def parse_metadata(metadata) -> dict:
|
| 38 |
+
"""
|
| 39 |
+
Parse metadata and send to discord.
|
| 40 |
+
After a video is done recording,
|
| 41 |
+
it will have both the livestream format and the mp4 format.
|
| 42 |
+
"""
|
| 43 |
+
# send metadata to discord
|
| 44 |
+
formats = metadata.get("formats", [])
|
| 45 |
+
# filter for ext = mp4
|
| 46 |
+
mp4_formats = [f for f in formats if f.get("ext", "") == "mp4"]
|
| 47 |
+
format_ids = [int(f.get("format_id", 0)) for f in mp4_formats]
|
| 48 |
+
video_entries = sorted(set(format_ids).intersection(youtube_mp4_codes))
|
| 49 |
+
|
| 50 |
+
is_livestream = True
|
| 51 |
+
if len(video_entries) > 0:
|
| 52 |
+
# use video format id over livestream id if available
|
| 53 |
+
selected_id = video_entries[0]
|
| 54 |
+
is_livestream = False
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
return {
|
| 58 |
+
"selected_id": selected_id,
|
| 59 |
+
"is_livestream": is_livestream,
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
def get_video(url: str, config: dict):
|
| 63 |
+
"""
|
| 64 |
+
Get video from start time.
|
| 65 |
+
"""
|
| 66 |
+
# result = subprocess.run()
|
| 67 |
+
# could delay start time by a few seconds to just sync up and capture the full video length
|
| 68 |
+
# but would need to time how long it takes to fetch the video using youtube-dl and other adjustments and start a bit before
|
| 69 |
+
filename = config.get("filename", "livestream01.mp4")
|
| 70 |
+
end = config.get("end", "00:15:00")
|
| 71 |
+
overlay_file = ffmpeg.input(filename)
|
| 72 |
+
(
|
| 73 |
+
ffmpeg
|
| 74 |
+
.input(url, t=end)
|
| 75 |
+
.output(filename)
|
| 76 |
+
.run()
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
def get_all_files(url: str, end: str = "00:15:00"):
|
| 80 |
+
metadata = get_video_metadata(url)
|
| 81 |
+
temp_dict = parse_metadata(metadata)
|
| 82 |
+
selected_id = temp_dict.get("selected_id", 0)
|
| 83 |
+
formats = metadata.get("formats", [])
|
| 84 |
+
selected_format = [f for f in formats if f.get("format_id", "") == str(selected_id)][0]
|
| 85 |
+
format_url = selected_format.get("url", "")
|
| 86 |
+
filename = "temp.mp4"
|
| 87 |
+
get_video(format_url, {"filename": filename, "end": end})
|
| 88 |
+
return filename
|
| 89 |
+
|
| 90 |
+
def get_text_from_mp3_whisper(inputType:str, mp3_file: str, url_path: str, taskName: str, srcLanguage: str)->str:
|
| 91 |
+
model = whisper.load_model("medium")
|
| 92 |
+
# options = whisper.DecodingOptions(language="en", without_timestamps=True)
|
| 93 |
+
options = dict(language=srcLanguage)
|
| 94 |
+
transcribe_options = dict(task=taskName, **options)
|
| 95 |
+
if inputType == "url":
|
| 96 |
+
filename = get_all_files(url_path)
|
| 97 |
+
result = model.transcribe(filename, **transcribe_options)
|
| 98 |
+
else:
|
| 99 |
+
result = model.transcribe(mp3_file, **transcribe_options)
|
| 100 |
+
# adjust for spacy mode
|
| 101 |
+
html_text = ""
|
| 102 |
+
lines = []
|
| 103 |
+
for count, segment in enumerate(result.get("segments")):
|
| 104 |
+
# print(segment)
|
| 105 |
+
start = segment.get("start")
|
| 106 |
+
end = segment.get("end")
|
| 107 |
+
lines.append(f"{count}")
|
| 108 |
+
lines.append(f"{second_to_timecode(start)} --> {second_to_timecode(end)}")
|
| 109 |
+
lines.append(segment.get("text", "").strip())
|
| 110 |
+
lines.append('')
|
| 111 |
+
words = '\n'.join(lines)
|
| 112 |
+
input_file = filename or mp3_file
|
| 113 |
+
# ffmpeg -i testing.mp4 -vf subtitles=transcript.srt mysubtitledmovie.mp4
|
| 114 |
+
# use ffmpeg bindings to add subtitles to video
|
| 115 |
+
# use python to call ffmpeg -i testing.mp4 -vf subtitles=transcript.srt mysubtitledmovie.mp4
|
| 116 |
+
|
| 117 |
+
input_video = ffmpeg.input('testing.mp4')
|
| 118 |
+
subtitle = ffmpeg.filter('subtitles', 'transcript.srt')
|
| 119 |
+
|
| 120 |
+
output_video = ffmpeg.output(input_video, subtitle, 'subtitled.mp4', vcodec='libx264', video_filters='[v]subtitles=transcript.srt[v]')
|
| 121 |
+
|
| 122 |
+
ffmpeg.run(output_video)
|
| 123 |
+
# for spacy use advanced logic to extract and append to html_text using tables?
|
| 124 |
+
|
| 125 |
+
# get output_video as mp4
|
| 126 |
+
return result.get("segments"), words, "subtitled.mp4"
|
| 127 |
+
|
| 128 |
+
gr.Interface(
|
| 129 |
+
title = 'Download Video From url and extract text from audio',
|
| 130 |
+
fn=get_text_from_mp3_whisper,
|
| 131 |
+
inputs=[
|
| 132 |
+
gr.Dropdown(["url", "file"]),
|
| 133 |
+
gr.inputs.Audio(type="filepath"),
|
| 134 |
+
gr.inputs.Textbox(),
|
| 135 |
+
gr.Dropdown(["translate", "transcribe"]),
|
| 136 |
+
gr.Dropdown(["Japanese", "English"])
|
| 137 |
+
],
|
| 138 |
+
outputs=[
|
| 139 |
+
"json", "text", "mp4"
|
| 140 |
+
],
|
| 141 |
+
live=True).launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openai-whisper
|
| 2 |
+
youtube_dl
|
| 3 |
+
ffmpeg-python
|
| 4 |
+
gradio
|