Spaces:

JunyiAcademy
/

vaitor2

Sleeping

App Files Files Community

youngtsai commited on Apr 7, 2024

Commit

fc16e41

1 Parent(s): f3fe8c6

has_keywords_added = True

Browse files

Files changed (2) hide show

app.py +3 -0
chatbot.py +15 -29

app.py CHANGED Viewed

@@ -1088,11 +1088,14 @@ def get_key_moments(video_id, formatted_simple_transcript, formatted_transcript,
                     print("===keywords===")
                     print(key_moment["keywords"])
                     print("===keywords===")
             if has_keywords_added:
                 key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
                 upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
                 key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
                 key_moments_json = json.loads(key_moments_text)
     elif source == "drive":
         print("===get_key_moments on drive===")

                     print("===keywords===")
                     print(key_moment["keywords"])
                     print("===keywords===")
+                    has_keywords_added = True
             if has_keywords_added:
                 key_moments_text = json.dumps(key_moments_json, ensure_ascii=False, indent=2)
                 upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, key_moments_text)
                 key_moments_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
                 key_moments_json = json.loads(key_moments_text)
+                print("======key_moments_json=====")
+                print(key_moments_json)
     elif source == "drive":
         print("===get_key_moments on drive===")

chatbot.py CHANGED Viewed

@@ -31,6 +31,8 @@ class Chatbot:
         # key_moments_json remove images
         for moment in key_moments_json:
             moment.pop('images', None)
         key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
         return key_moments_text
@@ -54,38 +56,22 @@ class Chatbot:
         socratic_mode = str(socratic_mode)
         ai_name = self.ai_name
-        # string with maximum length 32768
-        # if transcript_text is too long, replace by key_moments_text
-        if len(transcript_text) > 25000:
-            content_text = key_moments_text
-            print("=== transcript_text is too long, replace by key_moments_text ===")
-        else:
-            content_text = transcript_text
-            print("=== transcript_text is used ===")
         system_prompt = f"""
-            科目：{content_subject}
-            年級：{content_grade}
-            逐字稿資料：{content_text}
             -------------------------------------
-            你是一個專業的{content_subject}老師， user 為{content_grade}的學生
-            socratic_mode = {socratic_mode}
-            if socratic_mode is True，
-            - 請用蘇格拉底式的提問方式，引導學生思考，並且給予學生一些提示
-            - 一次只問一個問題，字數在100字以內
-            - 不要直接給予答案，讓學生自己思考
-            - 但可以給予一些提示跟引導，例如給予影片的時間軸，讓學生自己去找答案
-            rule:
-            - 請一定要用繁體中文回答 zh-TW，並用台灣人的口語表達，回答時不用特別說明這是台灣人的語氣，也不用說這是「台語的說法」
-            - 請用 {content_grade} 的學生能懂的方式回答，不用提到「逐字稿」這個詞
-            - 如果學生問了一些問題你無法判斷，請告訴學生你無法判斷，並建議學生可以問其他問題
-            - 或者你可以反問學生一些問題，幫助學生更好的理解資料，字數在100字以內
-            - 如果學生的問題與資料文本無關，請告訴學生你「無法回答超出影片範圍的問題」，並告訴他可以怎麼問什麼樣的問題（一個就好）
-            - 回答範圍一定要在逐字稿資料內，不要引用其他資料，請嚴格執行
-            - 並給予學生鼓勵，讓學生有學習的動力
-            - 回答時數學式請用數學符號代替文字（Latex 用 $ 字號 render)
-            - 只要是參考逐字稿資料，please use the timestamp format and give only one reference, example:【參考資料：00:00:00】
         """
         return system_prompt

         # key_moments_json remove images
         for moment in key_moments_json:
             moment.pop('images', None)
+            moment.pop('end', None)
+            moment.pop('transcript', None)
         key_moments_text = json.dumps(key_moments_json, ensure_ascii=False)
         return key_moments_text
         socratic_mode = str(socratic_mode)
         ai_name = self.ai_name
         system_prompt = f"""
+            subject: {content_subject}
+            grade: {content_grade}
+            context: {key_moments_text}
             -------------------------------------
+            Role: {content_subject} teacher, {content_grade} th-grade student.
+            Method: Socratic style, guide thinking, no direct answers. this is very important, please be seriously following.
+            Language: Traditional Chinese ZH-TW (it's very important), suitable for {content_grade} th-grade level.
+            Response: Single question, under 100 characters, include math symbols (use LaTeX $), hint with video timestamp which format 【00:00:00】.
+            Sometimes encourage user by Taiwanese tone.
+            if user ask questions not include in key_moments_text,
+            just tell them to ask the question in context and give them example question.
+            Restrictions: Answer within video content, no external references
         """
+        print("====system_prompt====")
+        print(system_prompt)
         return system_prompt