YT_SRT / app.py
youngtsai's picture
verify_password(password_secret, correct_password)
0d03f45
import os
import random
import string
import gradio as gr
import yt_dlp as ydlp
from openai import OpenAI
import re
OPEN_AI_KEY = os.environ.get("OPEN_AI_KEY") # 從環境變量中獲取默認的OpenAI key
PASSWORD = os.environ.get("PASSWORD_SECRET")
def verify_password(input_password, correct_password):
if input_password == correct_password:
return True
else:
raise gr.Error("密碼錯誤")
def ms_to_srt_time(ms):
sec, ms = divmod(ms, 1000)
min, sec = divmod(sec, 60)
hr, min = divmod(min, 60)
return f"{hr:02}:{min:02}:{sec:02},{ms:03}"
# 產生隨機文件名
def random_filename(length=10):
letters = string.ascii_lowercase
result_str = ''.join(random.choice(letters) for i in range(length))
return result_str
def get_video_duration(url):
ydl_opts = {
'quiet': True,
'no_warnings': True,
'forcetitle': True,
'format': 'bestaudio/best',
'skip_download': True
}
with ydlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=False)
return info_dict.get('duration', 0)
def process_video(yt_id_or_url, openAI_key=None, password_secret=None):
# Extract the yt_id from the URL if a full URL is given
yt_id_match = re.search(r"(?<=v=)[a-zA-Z0-9_-]+", yt_id_or_url)
yt_id = yt_id_match.group(0) if yt_id_match else yt_id_or_url
# Check if openAI_key is provided or validate using secret
if not openAI_key:
correct_password = PASSWORD
verify_password(password_secret, correct_password)
# yt_id = "90BAlvlLvE0"
url = f"https://www.youtube.com/watch?v={yt_id}"
# Check the video duration
video_duration = get_video_duration(url)
if video_duration > 3600: # 10 minutes in seconds
return None, "影片超過 60 分鐘." # Modified this line to show the error on Gradio
file_name = random_filename()
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
'preferredquality': '192', # 儘管此設置對.wav格式可能不太重要,但仍然可以根據需要進行調整
}],
'outtmpl': file_name, # 設置輸出文件名
}
with ydlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
print("=========# 抓音檔切片==========")
# 抓音檔切片
from pydub.silence import detect_nonsilent
from pydub import AudioSegment
# 讀取音頻文件
audio = AudioSegment.from_wav(file_name + ".wav")
# 先找到所有非靜音片段的開始和結束時間
nonsilent_ranges = detect_nonsilent(audio, min_silence_len=200, silence_thresh=-40)
def merge_short_ranges(ranges, min_duration=1500, max_duration=3000):
"""
Merge consecutive short durations into the previous range if merging doesn't exceed max_duration.
Args:
ranges (List[Tuple[int, int]]): List of start and end times.
min_duration (int): Minimum duration for a range to be considered valid.
max_duration (int): Maximum duration for a merged range.
Returns:
List[Tuple[int, int]]: Modified list of start and end times.
"""
merged_ranges = []
for start, end in ranges:
if merged_ranges:
prev_start, prev_end = merged_ranges[-1]
# Check if current range is short and if merging doesn't exceed max_duration
if end - start < min_duration and (end - prev_start) <= max_duration:
# Modify the end time of the last range in the list
merged_ranges[-1] = (prev_start, end)
else:
merged_ranges.append((start, end))
else:
merged_ranges.append((start, end))
return merged_ranges
def filter_short_ranges(ranges, min_duration=100): # 0.1秒等於100毫秒
"""
Filter out short durations.
Args:
ranges (List[Tuple[int, int]]): List of start and end times.
min_duration (int): Minimum duration for a range to be considered valid.
Returns:
List[Tuple[int, int]]: Filtered list of start and end times.
"""
return [r for r in ranges if (r[1] - r[0]) >= min_duration]
nonsilent_ranges = merge_short_ranges(nonsilent_ranges)
nonsilent_ranges = filter_short_ranges(nonsilent_ranges)
print(nonsilent_ranges)
# Initialize OpenAI API client
client = OpenAI(api_key = openAI_key)
srt_content = ""
counter = 1
for start, end in nonsilent_ranges:
chunk = audio[start:end]
chunk.export("temp_chunk.wav", format="wav")
with open("temp_chunk.wav", "rb") as audio_file:
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
response_format="text",
prompt="if chinese, please use zh-TW"
)
srt_content += f"{counter}\n"
srt_content += f"{ms_to_srt_time(start)} --> {ms_to_srt_time(end)}\n"
srt_content += f"{transcript}\n\n"
counter += 1
# 列印SRT
print(srt_content)
# Save the SRT content to a file
srt_filename = "output_" + random_filename() + ".txt"
with open(srt_filename, 'w', encoding="utf-8") as f:
f.write(srt_content)
# Generate large SRT by open AI
large_scope_srt_request_payload = {
"model": "gpt-4-turbo-preview",
"messages": [
{
"role": "user",
"content": f"""
這是一個很細的逐字稿
我希望可以將這些字幕合併成一個完整的段落
最好一段大約 20 - 30 秒
字句可以訂正錯字或是錯誤的詞(例如 You, 請訂閱頻道 等)
但不要大幅度刪減
請用中文 zh-TW
不需要覆述規則,不需要幫我總結
直接給我大範圍逐字稿文字
大範圍逐字稿文字格式是:
頭部的 「分:秒」 - 尾部的 「分:秒」 (主題)
逐字稿的片段整合
輸出請省略 小時 跟 毫秒
一段大約 20 - 30 秒
------------------------
{srt_content}
"""
}
]
}
large_scope_srt_response = client.chat.completions.create(**large_scope_srt_request_payload)
large_scope_srt = large_scope_srt_response.choices[0].message.content.strip()
print("=========# 生成大範圍逐字稿==========")
print(large_scope_srt)
# Extract the text from the SRT content without the timeline
srt_lines = srt_content.split("\n")
transcript_without_time = "\n".join([line for line in srt_lines if not re.match(r"(\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3})", line) and not line.isdigit() and line.strip()])
# Generate summary using OpenAI chat-based approach
summary_content = f"""
請根據 {srt_content},判斷這份文本
請提估影片內容,告訴學生這部影片的意義,
小範圍切出不同段落的相對應時間軸的重點摘要,最多不超過五段
注意不要遺漏任何一段時間軸的內容
格式為 【start - end】: 摘要
以及可能的結論與結尾延伸小問題提供學生作反思
整體格式為:
🗂️ 1. 內容類型:?
📚 2. 整體摘要
🔖 3. 條列式重點
🔑 4. 關鍵時刻(段落摘要)
💡 5. 結論反思(為什麼我們要學這個?)
❓ 6. 延伸小問題
"""
summary_request_payload = {
"model": "gpt-4-turbo-preview",
"messages": [
{
"role": "user",
"content": summary_content
}
]
}
summary_response = client.chat.completions.create(**summary_request_payload)
summary = summary_response.choices[0].message.content.strip()
print("=========# 生成摘要==========")
print(summary)
# Generate mind map in markdown format using OpenAI chat-based approach
mind_map_content = f"""
{srt_content} \n 請根據以上逐字稿,生出心智圖的 markdown,請用中文(zh-tw),大標題用 #
次標題用 ##
內容用 - 分段
如果內容分段太多,請用 ### 做小節
注意:不需要前後文敘述,直接給出 markdown 文本即可,這對我很重要
"""
mind_map_request_payload = {
"model": "gpt-4-1106-preview",
"messages": [
{
"role": "user",
"content": mind_map_content
}
]
}
mind_map_response = client.chat.completions.create(**mind_map_request_payload)
mind_map = mind_map_response.choices[0].message.content.strip()
print("=========# 生成思維導圖==========")
print(mind_map)
return (srt_filename, srt_content, large_scope_srt, summary, mind_map)
with gr.Blocks() as demo:
with gr.Row():
video_id = gr.Textbox(label="YouTube Video ID")
openai_key = gr.Textbox(label="OpenAI Key (optional)")
password_secret = gr.Textbox(label="Password Secret (optional)")
with gr.Row():
download_srt = gr.File(label="Download SRT")
srt_content = gr.Textbox(label="SRT Content", show_copy_button=True)
large_scope_srt = gr.Textbox(label="Large Scope SRT", show_copy_button=True)
video_summary = gr.Textbox(label="Video Summary", show_copy_button=True)
mind_map = gr.Textbox(label="Mind Map", show_copy_button=True)
gr.Markdown("Generate SRT, Summary and Mind Map from YouTube video(限額 60 min)")
gr.Markdown("YouTube to SRT, Summary & Mind Map")
submit_btn = gr.Button("Process Video")
submit_btn.click(
fn=process_video,
inputs=[video_id, openai_key, password_secret],
outputs=[download_srt, srt_content, large_scope_srt, video_summary, mind_map]
)
demo.launch()