Spaces:
Sleeping
Sleeping
has_keywords_added = True
Browse files- app.py +3 -0
- chatbot.py +15 -29
app.py
CHANGED
|
@@ -1088,11 +1088,14 @@ def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript,
|
|
| 1088 |
print("===keywords===")
|
| 1089 |
print(key_moment["keywords"])
|
| 1090 |
print("===keywords===")
|
|
|
|
| 1091 |
if has_keywords_added:
|
| 1092 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
|
| 1093 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
|
| 1094 |
key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
|
| 1095 |
key_moments_json = json.loads(key_moments_text)
|
|
|
|
|
|
|
| 1096 |
|
| 1097 |
elif source == "drive":
|
| 1098 |
print("===get_key_moments on drive===")
|
|
|
|
| 1088 |
print("===keywords===")
|
| 1089 |
print(key_moment["keywords"])
|
| 1090 |
print("===keywords===")
|
| 1091 |
+
has_keywords_added = True
|
| 1092 |
if has_keywords_added:
|
| 1093 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
|
| 1094 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
|
| 1095 |
key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
|
| 1096 |
key_moments_json = json.loads(key_moments_text)
|
| 1097 |
+
print("======key_moments_json=====")
|
| 1098 |
+
print(key_moments_json)
|
| 1099 |
|
| 1100 |
elif source == "drive":
|
| 1101 |
print("===get_key_moments on drive===")
|
chatbot.py
CHANGED
|
@@ -31,6 +31,8 @@ class Chatbot:
|
|
| 31 |
# key_moments_json remove images
|
| 32 |
for moment in key_moments_json:
|
| 33 |
moment.pop('images', None)
|
|
|
|
|
|
|
| 34 |
|
| 35 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
|
| 36 |
return key_moments_text
|
|
@@ -54,38 +56,22 @@ class Chatbot:
|
|
| 54 |
socratic_mode = str(socratic_mode)
|
| 55 |
ai_name = self.ai_name
|
| 56 |
|
| 57 |
-
# string with maximum length 32768
|
| 58 |
-
# if transcript_text is too long, replace by key_moments_text
|
| 59 |
-
if len(transcript_text) > 25000:
|
| 60 |
-
content_text = key_moments_text
|
| 61 |
-
print("=== transcript_text is too long, replace by key_moments_text ===")
|
| 62 |
-
else:
|
| 63 |
-
content_text = transcript_text
|
| 64 |
-
print("=== transcript_text is used ===")
|
| 65 |
-
|
| 66 |
system_prompt = f"""
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-------------------------------------
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
- 請一定要用繁體中文回答 zh-TW,並用台灣人的口語表達,回答時不用特別說明這是台灣人的語氣,也不用說這是「台語的說法」
|
| 80 |
-
- 請用 {content_grade} 的學生能懂的方式回答,不用提到「逐字稿」這個詞
|
| 81 |
-
- 如果學生問了一些問題你無法判斷,請告訴學生你無法判斷,並建議學生可以問其他問題
|
| 82 |
-
- 或者你可以反問學生一些問題,幫助學生更好的理解資料,字數在100字以內
|
| 83 |
-
- 如果學生的問題與資料文本無關,請告訴學生你「無法回答超出影片範圍的問題」,並告訴他可以怎麼問什麼樣的問題(一個就好)
|
| 84 |
-
- 回答範圍一定要在逐字稿資料內,不要引用其他資料,請嚴格執行
|
| 85 |
-
- 並給予學生鼓勵,讓學生有學習的動力
|
| 86 |
-
- 回答時數學式請用數學符號代替文字(Latex 用 $ 字號 render)
|
| 87 |
-
- 只要是參考逐字稿資料,please use the timestamp format and give only one reference, example:【參考資料:00:00:00】
|
| 88 |
"""
|
|
|
|
|
|
|
| 89 |
|
| 90 |
return system_prompt
|
| 91 |
|
|
|
|
| 31 |
# key_moments_json remove images
|
| 32 |
for moment in key_moments_json:
|
| 33 |
moment.pop('images', None)
|
| 34 |
+
moment.pop('end', None)
|
| 35 |
+
moment.pop('transcript', None)
|
| 36 |
|
| 37 |
key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
|
| 38 |
return key_moments_text
|
|
|
|
| 56 |
socratic_mode = str(socratic_mode)
|
| 57 |
ai_name = self.ai_name
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
system_prompt = f"""
|
| 60 |
+
subject: {content_subject}
|
| 61 |
+
grade: {content_grade}
|
| 62 |
+
context: {key_moments_text}
|
| 63 |
-------------------------------------
|
| 64 |
+
Role: {content_subject} teacher, {content_grade} th-grade student.
|
| 65 |
+
Method: Socratic style, guide thinking, no direct answers. this is very important, please be seriously following.
|
| 66 |
+
Language: Traditional Chinese ZH-TW (it's very important), suitable for {content_grade} th-grade level.
|
| 67 |
+
Response: Single question, under 100 characters, include math symbols (use LaTeX $), hint with video timestamp which format 【00:00:00】.
|
| 68 |
+
Sometimes encourage user by Taiwanese tone.
|
| 69 |
+
if user ask questions not include in key_moments_text,
|
| 70 |
+
just tell them to ask the question in context and give them example question.
|
| 71 |
+
Restrictions: Answer within video content, no external references
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
"""
|
| 73 |
+
print("====system_prompt====")
|
| 74 |
+
print(system_prompt)
|
| 75 |
|
| 76 |
return system_prompt
|
| 77 |
|