Spaces:
Sleeping
Sleeping
summary = get_video_id_summary(video_id, formatted_simple_transcript)
Browse files
app.py
CHANGED
|
@@ -316,8 +316,7 @@ def process_youtube_link(link):
|
|
| 316 |
# questions = generate_questions(formatted_simple_transcript)
|
| 317 |
questions = ["","",""]
|
| 318 |
df_string_output = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
| 319 |
-
|
| 320 |
-
df_summarise = "..."
|
| 321 |
|
| 322 |
global TRANSCRIPTS
|
| 323 |
TRANSCRIPTS = formatted_transcript
|
|
@@ -330,7 +329,7 @@ def process_youtube_link(link):
|
|
| 330 |
questions[1] if len(questions) > 1 else "", \
|
| 331 |
questions[2] if len(questions) > 2 else "", \
|
| 332 |
df_string_output, \
|
| 333 |
-
|
| 334 |
html_content, \
|
| 335 |
first_image, \
|
| 336 |
first_text
|
|
@@ -375,14 +374,6 @@ def screenshot_youtube_video(youtube_id, snapshot_sec):
|
|
| 375 |
|
| 376 |
return screenshot_path
|
| 377 |
|
| 378 |
-
# def get_screenshot_from_video(video_link, start_time):
|
| 379 |
-
# # 实现从视频中提取帧的逻辑
|
| 380 |
-
# # 由于这需要服务器端处理,你可能需要一种方法来下载视频,
|
| 381 |
-
# # 并使用 ffmpeg 或类似工具提取特定时间点的帧
|
| 382 |
-
# # 这里只是一个示意性的函数实现
|
| 383 |
-
# screenshot_url = f"[逻辑以提取视频 {video_link} 在 {start_time} 秒时的截图]"
|
| 384 |
-
# return screenshot_url
|
| 385 |
-
|
| 386 |
def process_web_link(link):
|
| 387 |
# 抓取和解析网页内容
|
| 388 |
response = requests.get(link)
|
|
@@ -390,6 +381,28 @@ def process_web_link(link):
|
|
| 390 |
return soup.get_text()
|
| 391 |
|
| 392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
def generate_df_summarise(df_string):
|
| 394 |
# 使用 OpenAI 生成基于上传数据的问题
|
| 395 |
sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
|
|
@@ -434,6 +447,7 @@ def generate_df_summarise(df_string):
|
|
| 434 |
|
| 435 |
return df_summarise
|
| 436 |
|
|
|
|
| 437 |
def generate_questions(df_string):
|
| 438 |
# 使用 OpenAI 生成基于上传数据的问题
|
| 439 |
|
|
|
|
| 316 |
# questions = generate_questions(formatted_simple_transcript)
|
| 317 |
questions = ["","",""]
|
| 318 |
df_string_output = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
| 319 |
+
summary = get_video_id_summary(video_id, formatted_simple_transcript)
|
|
|
|
| 320 |
|
| 321 |
global TRANSCRIPTS
|
| 322 |
TRANSCRIPTS = formatted_transcript
|
|
|
|
| 329 |
questions[1] if len(questions) > 1 else "", \
|
| 330 |
questions[2] if len(questions) > 2 else "", \
|
| 331 |
df_string_output, \
|
| 332 |
+
summary, \
|
| 333 |
html_content, \
|
| 334 |
first_image, \
|
| 335 |
first_text
|
|
|
|
| 374 |
|
| 375 |
return screenshot_path
|
| 376 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
def process_web_link(link):
|
| 378 |
# 抓取和解析网页内容
|
| 379 |
response = requests.get(link)
|
|
|
|
| 381 |
return soup.get_text()
|
| 382 |
|
| 383 |
|
| 384 |
+
|
| 385 |
+
# get video_id_summary.json content
|
| 386 |
+
def get_video_id_summary(video_id, df_string):
|
| 387 |
+
service = init_drive_service()
|
| 388 |
+
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
| 389 |
+
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
| 390 |
+
file_name = f'{video_id}_summary.json'
|
| 391 |
+
|
| 392 |
+
# 检查逐字稿是否存在
|
| 393 |
+
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 394 |
+
if not exists:
|
| 395 |
+
summary = generate_df_summarise(df_string)
|
| 396 |
+
file_id = upload_content_directly(service, file_name, folder_id, summary)
|
| 397 |
+
print("summary已上传到Google Drive")
|
| 398 |
+
else:
|
| 399 |
+
# 逐字稿已存在,下载逐字稿内容
|
| 400 |
+
print("summary已存在于Google Drive中")
|
| 401 |
+
summary = download_file_as_string(service, file_id)
|
| 402 |
+
|
| 403 |
+
return summary
|
| 404 |
+
|
| 405 |
+
|
| 406 |
def generate_df_summarise(df_string):
|
| 407 |
# 使用 OpenAI 生成基于上传数据的问题
|
| 408 |
sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
|
|
|
|
| 447 |
|
| 448 |
return df_summarise
|
| 449 |
|
| 450 |
+
|
| 451 |
def generate_questions(df_string):
|
| 452 |
# 使用 OpenAI 生成基于上传数据的问题
|
| 453 |
|