| import os |
| import random |
| import string |
| import gradio as gr |
| import yt_dlp as ydlp |
| from openai import OpenAI |
| import re |
|
|
|
|
| OPEN_AI_KEY = os.environ.get("OPEN_AI_KEY") |
| PASSWORD = os.environ.get("PASSWORD_SECRET") |
|
|
| def verify_password(input_password, correct_password): |
| if input_password == correct_password: |
| return True |
| else: |
| raise gr.Error("密碼錯誤") |
|
|
| def ms_to_srt_time(ms): |
| sec, ms = divmod(ms, 1000) |
| min, sec = divmod(sec, 60) |
| hr, min = divmod(min, 60) |
| return f"{hr:02}:{min:02}:{sec:02},{ms:03}" |
|
|
| |
| def random_filename(length=10): |
| letters = string.ascii_lowercase |
| result_str = ''.join(random.choice(letters) for i in range(length)) |
| return result_str |
|
|
| def get_video_duration(url): |
| ydl_opts = { |
| 'quiet': True, |
| 'no_warnings': True, |
| 'forcetitle': True, |
| 'format': 'bestaudio/best', |
| 'skip_download': True |
| } |
| with ydlp.YoutubeDL(ydl_opts) as ydl: |
| info_dict = ydl.extract_info(url, download=False) |
| return info_dict.get('duration', 0) |
|
|
| def process_video(yt_id_or_url, openAI_key=None, password_secret=None): |
| |
| yt_id_match = re.search(r"(?<=v=)[a-zA-Z0-9_-]+", yt_id_or_url) |
| yt_id = yt_id_match.group(0) if yt_id_match else yt_id_or_url |
|
|
| |
| if not openAI_key: |
| correct_password = PASSWORD |
| verify_password(password_secret, correct_password) |
|
|
| |
| url = f"https://www.youtube.com/watch?v={yt_id}" |
|
|
| |
| video_duration = get_video_duration(url) |
| if video_duration > 3600: |
| return None, "影片超過 60 分鐘." |
|
|
| file_name = random_filename() |
| ydl_opts = { |
| 'format': 'bestaudio/best', |
| 'postprocessors': [{ |
| 'key': 'FFmpegExtractAudio', |
| 'preferredcodec': 'wav', |
| 'preferredquality': '192', |
| }], |
| 'outtmpl': file_name, |
| } |
|
|
| with ydlp.YoutubeDL(ydl_opts) as ydl: |
| ydl.download([url]) |
|
|
| print("=========# 抓音檔切片==========") |
|
|
| |
| from pydub.silence import detect_nonsilent |
| from pydub import AudioSegment |
|
|
| |
| audio = AudioSegment.from_wav(file_name + ".wav") |
|
|
| |
| nonsilent_ranges = detect_nonsilent(audio, min_silence_len=200, silence_thresh=-40) |
|
|
| def merge_short_ranges(ranges, min_duration=1500, max_duration=3000): |
| """ |
| Merge consecutive short durations into the previous range if merging doesn't exceed max_duration. |
| Args: |
| ranges (List[Tuple[int, int]]): List of start and end times. |
| min_duration (int): Minimum duration for a range to be considered valid. |
| max_duration (int): Maximum duration for a merged range. |
| |
| Returns: |
| List[Tuple[int, int]]: Modified list of start and end times. |
| """ |
| merged_ranges = [] |
| for start, end in ranges: |
| if merged_ranges: |
| prev_start, prev_end = merged_ranges[-1] |
| |
| if end - start < min_duration and (end - prev_start) <= max_duration: |
| |
| merged_ranges[-1] = (prev_start, end) |
| else: |
| merged_ranges.append((start, end)) |
| else: |
| merged_ranges.append((start, end)) |
| return merged_ranges |
|
|
|
|
| def filter_short_ranges(ranges, min_duration=100): |
| """ |
| Filter out short durations. |
| Args: |
| ranges (List[Tuple[int, int]]): List of start and end times. |
| min_duration (int): Minimum duration for a range to be considered valid. |
| |
| Returns: |
| List[Tuple[int, int]]: Filtered list of start and end times. |
| """ |
| return [r for r in ranges if (r[1] - r[0]) >= min_duration] |
|
|
| nonsilent_ranges = merge_short_ranges(nonsilent_ranges) |
| nonsilent_ranges = filter_short_ranges(nonsilent_ranges) |
|
|
| print(nonsilent_ranges) |
|
|
| |
| client = OpenAI(api_key = openAI_key) |
|
|
| srt_content = "" |
| counter = 1 |
|
|
| for start, end in nonsilent_ranges: |
| chunk = audio[start:end] |
| chunk.export("temp_chunk.wav", format="wav") |
| |
| with open("temp_chunk.wav", "rb") as audio_file: |
| transcript = client.audio.transcriptions.create( |
| model="whisper-1", |
| file=audio_file, |
| response_format="text", |
| prompt="if chinese, please use zh-TW" |
| ) |
|
|
| srt_content += f"{counter}\n" |
| srt_content += f"{ms_to_srt_time(start)} --> {ms_to_srt_time(end)}\n" |
| srt_content += f"{transcript}\n\n" |
| counter += 1 |
|
|
| |
| print(srt_content) |
| |
| |
| srt_filename = "output_" + random_filename() + ".txt" |
| with open(srt_filename, 'w', encoding="utf-8") as f: |
| f.write(srt_content) |
|
|
| |
| large_scope_srt_request_payload = { |
| "model": "gpt-4-turbo-preview", |
| "messages": [ |
| { |
| "role": "user", |
| "content": f""" |
| 這是一個很細的逐字稿 |
| 我希望可以將這些字幕合併成一個完整的段落 |
| 最好一段大約 20 - 30 秒 |
| 字句可以訂正錯字或是錯誤的詞(例如 You, 請訂閱頻道 等) |
| 但不要大幅度刪減 |
| 請用中文 zh-TW |
| 不需要覆述規則,不需要幫我總結 |
| 直接給我大範圍逐字稿文字 |
| |
| 大範圍逐字稿文字格式是: |
| 頭部的 「分:秒」 - 尾部的 「分:秒」 (主題) |
| 逐字稿的片段整合 |
| |
| 輸出請省略 小時 跟 毫秒 |
| 一段大約 20 - 30 秒 |
| ------------------------ |
| {srt_content} |
| """ |
| } |
| ] |
| } |
|
|
| large_scope_srt_response = client.chat.completions.create(**large_scope_srt_request_payload) |
| large_scope_srt = large_scope_srt_response.choices[0].message.content.strip() |
|
|
| print("=========# 生成大範圍逐字稿==========") |
| print(large_scope_srt) |
|
|
| |
| srt_lines = srt_content.split("\n") |
| transcript_without_time = "\n".join([line for line in srt_lines if not re.match(r"(\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3})", line) and not line.isdigit() and line.strip()]) |
|
|
| |
| summary_content = f""" |
| 請根據 {srt_content},判斷這份文本 |
| 請提估影片內容,告訴學生這部影片的意義, |
| 小範圍切出不同段落的相對應時間軸的重點摘要,最多不超過五段 |
| 注意不要遺漏任何一段時間軸的內容 |
| 格式為 【start - end】: 摘要 |
| 以及可能的結論與結尾延伸小問題提供學生作反思 |
| |
| 整體格式為: |
| 🗂️ 1. 內容類型:? |
| 📚 2. 整體摘要 |
| 🔖 3. 條列式重點 |
| 🔑 4. 關鍵時刻(段落摘要) |
| 💡 5. 結論反思(為什麼我們要學這個?) |
| ❓ 6. 延伸小問題 |
| """ |
| summary_request_payload = { |
| "model": "gpt-4-turbo-preview", |
| "messages": [ |
| { |
| "role": "user", |
| "content": summary_content |
| } |
| ] |
| } |
| summary_response = client.chat.completions.create(**summary_request_payload) |
| summary = summary_response.choices[0].message.content.strip() |
|
|
| print("=========# 生成摘要==========") |
| print(summary) |
| |
| |
| mind_map_content = f""" |
| {srt_content} \n 請根據以上逐字稿,生出心智圖的 markdown,請用中文(zh-tw),大標題用 # |
| 次標題用 ## |
| 內容用 - 分段 |
| 如果內容分段太多,請用 ### 做小節 |
| |
| 注意:不需要前後文敘述,直接給出 markdown 文本即可,這對我很重要 |
| """ |
| mind_map_request_payload = { |
| "model": "gpt-4-1106-preview", |
| "messages": [ |
| { |
| "role": "user", |
| "content": mind_map_content |
| } |
| ] |
| } |
|
|
| mind_map_response = client.chat.completions.create(**mind_map_request_payload) |
| mind_map = mind_map_response.choices[0].message.content.strip() |
| print("=========# 生成思維導圖==========") |
| print(mind_map) |
|
|
| return (srt_filename, srt_content, large_scope_srt, summary, mind_map) |
|
|
|
|
|
|
|
|
| with gr.Blocks() as demo: |
| with gr.Row(): |
| video_id = gr.Textbox(label="YouTube Video ID") |
| openai_key = gr.Textbox(label="OpenAI Key (optional)") |
| password_secret = gr.Textbox(label="Password Secret (optional)") |
| with gr.Row(): |
| download_srt = gr.File(label="Download SRT") |
| srt_content = gr.Textbox(label="SRT Content", show_copy_button=True) |
| large_scope_srt = gr.Textbox(label="Large Scope SRT", show_copy_button=True) |
| video_summary = gr.Textbox(label="Video Summary", show_copy_button=True) |
| mind_map = gr.Textbox(label="Mind Map", show_copy_button=True) |
| |
| gr.Markdown("Generate SRT, Summary and Mind Map from YouTube video(限額 60 min)") |
| gr.Markdown("YouTube to SRT, Summary & Mind Map") |
| submit_btn = gr.Button("Process Video") |
| submit_btn.click( |
| fn=process_video, |
| inputs=[video_id, openai_key, password_secret], |
| outputs=[download_srt, srt_content, large_scope_srt, video_summary, mind_map] |
| ) |
|
|
|
|
| demo.launch() |
|
|