Spaces:
Sleeping
Sleeping
def get_questions(video_id, df_string):
Browse files
app.py
CHANGED
|
@@ -51,6 +51,7 @@ from urllib.parse import urlparse, parse_qs
|
|
| 51 |
OUTPUT_PATH = 'videos'
|
| 52 |
TRANSCRIPTS = []
|
| 53 |
CURRENT_INDEX = 0
|
|
|
|
| 54 |
|
| 55 |
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
|
| 56 |
client = OpenAI(api_key=OPEN_AI_KEY)
|
|
@@ -295,6 +296,10 @@ def process_youtube_link(link):
|
|
| 295 |
# 使用 YouTube API 获取逐字稿
|
| 296 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
| 297 |
video_id = extract_youtube_id(link)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
| 299 |
|
| 300 |
try:
|
|
@@ -335,8 +340,7 @@ def process_youtube_link(link):
|
|
| 335 |
TRANSCRIPTS = formatted_transcript
|
| 336 |
|
| 337 |
# 基于逐字稿生成其他所需的输出
|
| 338 |
-
|
| 339 |
-
questions = ["", "", ""]
|
| 340 |
formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
| 341 |
summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
|
| 342 |
summary = summary_json["summary"]
|
|
@@ -566,8 +570,27 @@ def generate_questions(df_string):
|
|
| 566 |
|
| 567 |
return questions
|
| 568 |
|
| 569 |
-
def get_questions(df_string):
|
| 570 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 571 |
q1 = questions[0] if len(questions) > 0 else ""
|
| 572 |
q2 = questions[1] if len(questions) > 1 else ""
|
| 573 |
q3 = questions[2] if len(questions) > 2 else ""
|
|
@@ -724,7 +747,7 @@ with gr.Blocks() as demo:
|
|
| 724 |
btn_2.click(respond, inputs=[btn_2, df_string_output, chatbot], outputs=[msg, chatbot])
|
| 725 |
btn_3.click(respond, inputs=[btn_3, df_string_output, chatbot], outputs=[msg, chatbot])
|
| 726 |
|
| 727 |
-
btn_create_question.click(get_questions, inputs = [df_string_output], outputs = [btn_1, btn_2, btn_3])
|
| 728 |
|
| 729 |
# file_upload.change(process_file, inputs=file_upload, outputs=df_string_output)
|
| 730 |
file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
|
|
|
|
| 51 |
OUTPUT_PATH = 'videos'
|
| 52 |
TRANSCRIPTS = []
|
| 53 |
CURRENT_INDEX = 0
|
| 54 |
+
VIDEO_ID = ""
|
| 55 |
|
| 56 |
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
|
| 57 |
client = OpenAI(api_key=OPEN_AI_KEY)
|
|
|
|
| 296 |
# 使用 YouTube API 获取逐字稿
|
| 297 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
| 298 |
video_id = extract_youtube_id(link)
|
| 299 |
+
global VIDEO_ID
|
| 300 |
+
VIDEO_ID = video_id
|
| 301 |
+
|
| 302 |
+
|
| 303 |
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
| 304 |
|
| 305 |
try:
|
|
|
|
| 340 |
TRANSCRIPTS = formatted_transcript
|
| 341 |
|
| 342 |
# 基于逐字稿生成其他所需的输出
|
| 343 |
+
questions = get_questions(video_id, formatted_simple_transcript)
|
|
|
|
| 344 |
formatted_transcript_json = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
| 345 |
summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
|
| 346 |
summary = summary_json["summary"]
|
|
|
|
| 570 |
|
| 571 |
return questions
|
| 572 |
|
| 573 |
+
def get_questions(video_id, df_string):
|
| 574 |
+
# 去 g drive 確認是有有 video_id_questions.json
|
| 575 |
+
print("===get_questions===")
|
| 576 |
+
service = init_drive_service()
|
| 577 |
+
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
| 578 |
+
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
| 579 |
+
file_name = f'{video_id}_questions.json'
|
| 580 |
+
|
| 581 |
+
# 检查檔案是否存在
|
| 582 |
+
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 583 |
+
if not exists:
|
| 584 |
+
questions = generate_questions(df_string)
|
| 585 |
+
questions_text = json.dumps(questions, ensure_ascii=False, indent=2)
|
| 586 |
+
upload_content_directly(service, file_name, folder_id, questions_text)
|
| 587 |
+
print("questions已上傳到Google Drive")
|
| 588 |
+
else:
|
| 589 |
+
# 逐字稿已存在,下载逐字稿内容
|
| 590 |
+
print("questions已存在于Google Drive中")
|
| 591 |
+
questions_text = download_file_as_string(service, file_id)
|
| 592 |
+
questions = json.loads(questions_text)
|
| 593 |
+
|
| 594 |
q1 = questions[0] if len(questions) > 0 else ""
|
| 595 |
q2 = questions[1] if len(questions) > 1 else ""
|
| 596 |
q3 = questions[2] if len(questions) > 2 else ""
|
|
|
|
| 747 |
btn_2.click(respond, inputs=[btn_2, df_string_output, chatbot], outputs=[msg, chatbot])
|
| 748 |
btn_3.click(respond, inputs=[btn_3, df_string_output, chatbot], outputs=[msg, chatbot])
|
| 749 |
|
| 750 |
+
btn_create_question.click(get_questions, inputs = [VIDEO_ID, df_string_output], outputs = [btn_1, btn_2, btn_3])
|
| 751 |
|
| 752 |
# file_upload.change(process_file, inputs=file_upload, outputs=df_string_output)
|
| 753 |
file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
|