Spaces:

youngtsai
/

YT_SRT

Runtime error

App Files Files Community

YT_SRT / app.py

youngtsai

verify_password(password_secret, correct_password)

0d03f45 almost 2 years ago

raw

history blame contribute delete

10.4 kB

	import os
	import random
	import string
	import gradio as gr
	import yt_dlp as ydlp
	from openai import OpenAI
	import re


	OPEN_AI_KEY = os.environ.get("OPEN_AI_KEY") # 從環境變量中獲取默認的OpenAI key
	PASSWORD = os.environ.get("PASSWORD_SECRET")

	def verify_password(input_password, correct_password):
	if input_password == correct_password:
	return True
	else:
	raise gr.Error("密碼錯誤")

	def ms_to_srt_time(ms):
	sec, ms = divmod(ms, 1000)
	min, sec = divmod(sec, 60)
	hr, min = divmod(min, 60)
	return f"{hr:02}:{min:02}:{sec:02},{ms:03}"

	# 產生隨機文件名
	def random_filename(length=10):
	letters = string.ascii_lowercase
	result_str = ''.join(random.choice(letters) for i in range(length))
	return result_str

	def get_video_duration(url):
	ydl_opts = {
	'quiet': True,
	'no_warnings': True,
	'forcetitle': True,
	'format': 'bestaudio/best',
	'skip_download': True
	}
	with ydlp.YoutubeDL(ydl_opts) as ydl:
	info_dict = ydl.extract_info(url, download=False)
	return info_dict.get('duration', 0)

	def process_video(yt_id_or_url, openAI_key=None, password_secret=None):
	# Extract the yt_id from the URL if a full URL is given
	yt_id_match = re.search(r"(?<=v=)[a-zA-Z0-9_-]+", yt_id_or_url)
	yt_id = yt_id_match.group(0) if yt_id_match else yt_id_or_url

	# Check if openAI_key is provided or validate using secret
	if not openAI_key:
	correct_password = PASSWORD
	verify_password(password_secret, correct_password)

	# yt_id = "90BAlvlLvE0"
	url = f"https://www.youtube.com/watch?v={yt_id}"

	# Check the video duration
	video_duration = get_video_duration(url)
	if video_duration > 3600: # 10 minutes in seconds
	return None, "影片超過 60 分鐘." # Modified this line to show the error on Gradio

	file_name = random_filename()
	ydl_opts = {
	'format': 'bestaudio/best',
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'wav',
	'preferredquality': '192', # 儘管此設置對.wav格式可能不太重要，但仍然可以根據需要進行調整
	}],
	'outtmpl': file_name, # 設置輸出文件名
	}

	with ydlp.YoutubeDL(ydl_opts) as ydl:
	ydl.download([url])

	print("=========# 抓音檔切片==========")

	# 抓音檔切片
	from pydub.silence import detect_nonsilent
	from pydub import AudioSegment

	# 讀取音頻文件
	audio = AudioSegment.from_wav(file_name + ".wav")

	# 先找到所有非靜音片段的開始和結束時間
	nonsilent_ranges = detect_nonsilent(audio, min_silence_len=200, silence_thresh=-40)

	def merge_short_ranges(ranges, min_duration=1500, max_duration=3000):
	"""
	Merge consecutive short durations into the previous range if merging doesn't exceed max_duration.
	Args:
	ranges (List[Tuple[int, int]]): List of start and end times.
	min_duration (int): Minimum duration for a range to be considered valid.
	max_duration (int): Maximum duration for a merged range.

	Returns:
	List[Tuple[int, int]]: Modified list of start and end times.
	"""
	merged_ranges = []
	for start, end in ranges:
	if merged_ranges:
	prev_start, prev_end = merged_ranges[-1]
	# Check if current range is short and if merging doesn't exceed max_duration
	if end - start < min_duration and (end - prev_start) <= max_duration:
	# Modify the end time of the last range in the list
	merged_ranges[-1] = (prev_start, end)
	else:
	merged_ranges.append((start, end))
	else:
	merged_ranges.append((start, end))
	return merged_ranges


	def filter_short_ranges(ranges, min_duration=100): # 0.1秒等於100毫秒
	"""
	Filter out short durations.
	Args:
	ranges (List[Tuple[int, int]]): List of start and end times.
	min_duration (int): Minimum duration for a range to be considered valid.

	Returns:
	List[Tuple[int, int]]: Filtered list of start and end times.
	"""
	return [r for r in ranges if (r[1] - r[0]) >= min_duration]

	nonsilent_ranges = merge_short_ranges(nonsilent_ranges)
	nonsilent_ranges = filter_short_ranges(nonsilent_ranges)

	print(nonsilent_ranges)

	# Initialize OpenAI API client
	client = OpenAI(api_key = openAI_key)

	srt_content = ""
	counter = 1

	for start, end in nonsilent_ranges:
	chunk = audio[start:end]
	chunk.export("temp_chunk.wav", format="wav")

	with open("temp_chunk.wav", "rb") as audio_file:
	transcript = client.audio.transcriptions.create(
	model="whisper-1",
	file=audio_file,
	response_format="text",
	prompt="if chinese, please use zh-TW"
	)

	srt_content += f"{counter}\n"
	srt_content += f"{ms_to_srt_time(start)} --> {ms_to_srt_time(end)}\n"
	srt_content += f"{transcript}\n\n"
	counter += 1

	# 列印SRT
	print(srt_content)

	# Save the SRT content to a file
	srt_filename = "output_" + random_filename() + ".txt"
	with open(srt_filename, 'w', encoding="utf-8") as f:
	f.write(srt_content)

	# Generate large SRT by open AI
	large_scope_srt_request_payload = {
	"model": "gpt-4-turbo-preview",
	"messages": [
	{
	"role": "user",
	"content": f"""
	這是一個很細的逐字稿
	我希望可以將這些字幕合併成一個完整的段落
	最好一段大約 20 - 30 秒
	字句可以訂正錯字或是錯誤的詞（例如 You, 請訂閱頻道等）
	但不要大幅度刪減
	請用中文 zh-TW
	不需要覆述規則，不需要幫我總結
	直接給我大範圍逐字稿文字

	大範圍逐字稿文字格式是：
	頭部的「分：秒」 - 尾部的「分：秒」 (主題)
	逐字稿的片段整合

	輸出請省略小時跟毫秒
	一段大約 20 - 30 秒
	------------------------
	{srt_content}
	"""
	}
	]
	}

	large_scope_srt_response = client.chat.completions.create(**large_scope_srt_request_payload)
	large_scope_srt = large_scope_srt_response.choices[0].message.content.strip()

	print("=========# 生成大範圍逐字稿==========")
	print(large_scope_srt)

	# Extract the text from the SRT content without the timeline
	srt_lines = srt_content.split("\n")
	transcript_without_time = "\n".join([line for line in srt_lines if not re.match(r"(\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3})", line) and not line.isdigit() and line.strip()])

	# Generate summary using OpenAI chat-based approach
	summary_content = f"""
	請根據 {srt_content}，判斷這份文本
	請提估影片內容，告訴學生這部影片的意義，
	小範圍切出不同段落的相對應時間軸的重點摘要，最多不超過五段
	注意不要遺漏任何一段時間軸的內容
	格式為【start - end】: 摘要
	以及可能的結論與結尾延伸小問題提供學生作反思

	整體格式為：
	🗂️ 1. 內容類型：？
	📚 2. 整體摘要
	🔖 3. 條列式重點
	🔑 4. 關鍵時刻（段落摘要）
	💡 5. 結論反思（為什麼我們要學這個？）
	❓ 6. 延伸小問題
	"""
	summary_request_payload = {
	"model": "gpt-4-turbo-preview",
	"messages": [
	{
	"role": "user",
	"content": summary_content
	}
	]
	}
	summary_response = client.chat.completions.create(**summary_request_payload)
	summary = summary_response.choices[0].message.content.strip()

	print("=========# 生成摘要==========")
	print(summary)

	# Generate mind map in markdown format using OpenAI chat-based approach
	mind_map_content = f"""
	{srt_content} \n 請根據以上逐字稿，生出心智圖的 markdown，請用中文（zh-tw），大標題用 #
	次標題用 ##
	內容用 - 分段
	如果內容分段太多，請用 ### 做小節

	注意：不需要前後文敘述，直接給出 markdown 文本即可，這對我很重要
	"""
	mind_map_request_payload = {
	"model": "gpt-4-1106-preview",
	"messages": [
	{
	"role": "user",
	"content": mind_map_content
	}
	]
	}

	mind_map_response = client.chat.completions.create(**mind_map_request_payload)
	mind_map = mind_map_response.choices[0].message.content.strip()
	print("=========# 生成思維導圖==========")
	print(mind_map)

	return (srt_filename, srt_content, large_scope_srt, summary, mind_map)




	with gr.Blocks() as demo:
	with gr.Row():
	video_id = gr.Textbox(label="YouTube Video ID")
	openai_key = gr.Textbox(label="OpenAI Key (optional)")
	password_secret = gr.Textbox(label="Password Secret (optional)")
	with gr.Row():
	download_srt = gr.File(label="Download SRT")
	srt_content = gr.Textbox(label="SRT Content", show_copy_button=True)
	large_scope_srt = gr.Textbox(label="Large Scope SRT", show_copy_button=True)
	video_summary = gr.Textbox(label="Video Summary", show_copy_button=True)
	mind_map = gr.Textbox(label="Mind Map", show_copy_button=True)

	gr.Markdown("Generate SRT, Summary and Mind Map from YouTube video（限額 60 min）")
	gr.Markdown("YouTube to SRT, Summary & Mind Map")
	submit_btn = gr.Button("Process Video")
	submit_btn.click(
	fn=process_video,
	inputs=[video_id, openai_key, password_secret],
	outputs=[download_srt, srt_content, large_scope_srt, video_summary, mind_map]
	)


	demo.launch()