Spaces:
Runtime error
Runtime error
Commit
·
66b707b
1
Parent(s):
c88c1d9
feat/formatter
Browse files- app.py +9 -3
- utils/chatbot.py +58 -41
- utils/utils.py +1 -0
- utils/work_flow_controller.py +12 -4
app.py
CHANGED
|
@@ -34,7 +34,9 @@ with gr.Blocks() as demo:
|
|
| 34 |
)
|
| 35 |
upload_to_db = gr.CheckboxGroup(
|
| 36 |
["Upload to Database"],
|
| 37 |
-
label="是否上傳至資料庫",
|
|
|
|
|
|
|
| 38 |
)
|
| 39 |
|
| 40 |
with gr.Row():
|
|
@@ -85,7 +87,6 @@ with gr.Blocks() as demo:
|
|
| 85 |
**bot_args
|
| 86 |
).then(lambda: gr.update(interactive=True), None, [user_input], queue=False)
|
| 87 |
|
| 88 |
-
|
| 89 |
# defining workflow of clear state
|
| 90 |
clear_state_args = dict(
|
| 91 |
fn=clear_state,
|
|
@@ -122,7 +123,12 @@ with gr.Blocks() as demo:
|
|
| 122 |
**change_md_args
|
| 123 |
)
|
| 124 |
|
| 125 |
-
video_text_input.submit(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
if __name__ == "__main__":
|
| 128 |
demo.launch()
|
|
|
|
| 34 |
)
|
| 35 |
upload_to_db = gr.CheckboxGroup(
|
| 36 |
["Upload to Database"],
|
| 37 |
+
label="是否上傳至資料庫",
|
| 38 |
+
info="將資料上傳至資料庫時,資料庫會自動建立索引,下次使用時可以直接檢索,預設為僅作這次使用",
|
| 39 |
+
scale=1,
|
| 40 |
)
|
| 41 |
|
| 42 |
with gr.Row():
|
|
|
|
| 87 |
**bot_args
|
| 88 |
).then(lambda: gr.update(interactive=True), None, [user_input], queue=False)
|
| 89 |
|
|
|
|
| 90 |
# defining workflow of clear state
|
| 91 |
clear_state_args = dict(
|
| 92 |
fn=clear_state,
|
|
|
|
| 123 |
**change_md_args
|
| 124 |
)
|
| 125 |
|
| 126 |
+
video_text_input.submit(
|
| 127 |
+
video_bot,
|
| 128 |
+
[test_video_chabot, video_text_input],
|
| 129 |
+
video_text_output,
|
| 130 |
+
api_name="video_bot",
|
| 131 |
+
)
|
| 132 |
|
| 133 |
if __name__ == "__main__":
|
| 134 |
demo.launch()
|
utils/chatbot.py
CHANGED
|
@@ -19,6 +19,7 @@ from .work_flow_controller import WorkFlowController
|
|
| 19 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 20 |
openai.api_key = OPENAI_API_KEY
|
| 21 |
|
|
|
|
| 22 |
class Chatbot:
|
| 23 |
def __init__(self) -> None:
|
| 24 |
self.history = []
|
|
@@ -55,7 +56,7 @@ class Chatbot:
|
|
| 55 |
continue
|
| 56 |
self.knowledge_base = db
|
| 57 |
self.upload_state = "done"
|
| 58 |
-
|
| 59 |
def __get_local_knowledge_base(self):
|
| 60 |
with open(self.csv_result_path, "r", encoding="UTF-8") as fp:
|
| 61 |
knowledge_base = pd.read_csv(fp)
|
|
@@ -71,16 +72,22 @@ class Chatbot:
|
|
| 71 |
# db.to_csv(f"{self.uid}_knowledge_base.csv", index=False)
|
| 72 |
cur_content.to_csv(f"{self.uid}_knowledge_base.csv", index=False)
|
| 73 |
media = MediaFileUpload(f"{self.uid}_knowledge_base.csv", resumable=True)
|
| 74 |
-
request =
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
def __init_drive_service(self):
|
| 77 |
-
SCOPES = [
|
| 78 |
SERVICE_ACCOUNT_FILE = os.getenv("CREDENTIALS")
|
| 79 |
|
| 80 |
-
creds = Credentials.from_service_account_file(
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
| 84 |
def __read_db(self, service):
|
| 85 |
request = service.files().get_media(fileId="1m3ozrphHP221hhdCFMFX9-10nzSDfNyW")
|
| 86 |
fh = io.BytesIO()
|
|
@@ -95,13 +102,13 @@ class Chatbot:
|
|
| 95 |
fh.seek(0)
|
| 96 |
|
| 97 |
return pd.read_csv(fh)
|
| 98 |
-
|
| 99 |
def __read_file(self, service, filename) -> pd.DataFrame:
|
| 100 |
query = f"name='{filename}'"
|
| 101 |
results = service.files().list(q=query).execute()
|
| 102 |
-
files = results.get(
|
| 103 |
|
| 104 |
-
file_id = files[0][
|
| 105 |
|
| 106 |
request = service.files().get_media(fileId=file_id)
|
| 107 |
fh = io.BytesIO()
|
|
@@ -116,25 +123,33 @@ class Chatbot:
|
|
| 116 |
fh.seek(0)
|
| 117 |
|
| 118 |
return pd.read_csv(fh)
|
| 119 |
-
|
| 120 |
def __upload_file(self, service):
|
| 121 |
results = service.files().list(pageSize=10).execute()
|
| 122 |
-
items = results.get(
|
| 123 |
if not items:
|
| 124 |
-
print(
|
| 125 |
else:
|
| 126 |
-
print(
|
| 127 |
for item in items:
|
| 128 |
print(f"{item['name']} ({item['id']})")
|
| 129 |
|
| 130 |
media = MediaFileUpload(self.csv_result_path, resumable=True)
|
| 131 |
-
filename_prefix =
|
| 132 |
-
filename = filename_prefix + self.uid +
|
| 133 |
-
request =
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
def clear_state(self):
|
| 140 |
self.context = None
|
|
@@ -240,11 +255,11 @@ class VideoChatbot:
|
|
| 240 |
def __init__(self) -> None:
|
| 241 |
self.metadata_keys = ["標題", "逐字稿", "摘要", "關鍵字"]
|
| 242 |
self.metadata = {
|
| 243 |
-
"c2fK-hxnPSY":{
|
| 244 |
-
"標題": "可汗學院的創新教學:學生與老師模式解析",
|
| 245 |
"逐字稿": "0:00\n這裡是一個關於西班牙美洲戰爭和AP美國歷史的練習\n0:04\n在可汗學院,我們以學生模式開始,並注意到如果學生要求解釋\n0:11\n它不只是給出答案,它會像一個好的導師一樣,只是試圖引導\n0:15\n學生朝正確的方向前進,並且還注意到老師可以看到\n0:21\n學生正在互動的內容作為安全措施,現在如果我們關閉學生模式,我們\n0:27\n進入老師模式,我們看到當老師要求解釋時,它非常不同,就像\n0:32\n有了老師的指南,它會給出如你所見的非常詳細的解釋,如果老師\n0:39\n想要它的教案,他們只需要要求,他們就會得到一個非常詳細的\n0:44\n教案,包括目標、活動和家庭作業要做的事情,然後如果老師\n0:52\n說太好了,Khanmigo,你說��一個講義或者作為家庭作業給一個反思\n0:58\n實際上給了反思作業,然後它會再次為老師構建那個\n1:03\n如果老師喜歡,他們可以要求自定義這些教案或這些提示或者這些\n1:08\n反思,讓它們更符合他們的學生正在做的事情,這是老師們通常花費\n1:13\n每天好幾個小時工作的事情,我們希望能夠節省\n1:17\n他們很多時間和精力,以利他們自己的健康和他們的學生。",
|
| 246 |
"摘要": "這段文字描述了一個關於西班牙美洲戰爭和AP美國歷史的教學練習。練習首先展示學生模式,強調良好的教導方式並提到教師可以監控學生互動情況作為安全措施。隨後,進入老師模式,提供了詳細的解釋和教案,包括目標、活動和家庭作業。另外,還有一個自定義教案的選項,使其更符合學生的需求。整個過程旨在節省教師的時間和精力,並有助於他們的健康和學生的學習。",
|
| 247 |
-
"關鍵字": ["AP美國歷史", "學生模式", "老師模式", "教案設計", "自定義教學"]
|
| 248 |
}
|
| 249 |
}
|
| 250 |
|
|
@@ -261,16 +276,17 @@ class VideoChatbot:
|
|
| 261 |
你是一個知識檢索系統,我會給你一份文件,請幫我依照文件內容回答問題,並用繁體中文回答。以下是文件內容
|
| 262 |
"""
|
| 263 |
messages = [
|
| 264 |
-
{"role": "system", "content": f"{system_prompt} + '\n' '''{context}'''"},
|
| 265 |
-
|
|
|
|
| 266 |
try:
|
| 267 |
response = openai.ChatCompletion.create(
|
| 268 |
-
model=
|
| 269 |
messages=messages,
|
| 270 |
temperature=1,
|
| 271 |
max_tokens=2048,
|
| 272 |
frequency_penalty=0,
|
| 273 |
-
presence_penalty
|
| 274 |
)
|
| 275 |
bot_answer = response["choices"][0]["message"]["content"]
|
| 276 |
|
|
@@ -281,7 +297,7 @@ class VideoChatbot:
|
|
| 281 |
|
| 282 |
def compute_similariy(self, user_message):
|
| 283 |
threshold = 0.5
|
| 284 |
-
|
| 285 |
user_message_embedding = openai.Embedding.create(
|
| 286 |
input=user_message, engine="text-embedding-ada-002"
|
| 287 |
)["data"][0]["embedding"]
|
|
@@ -290,26 +306,27 @@ class VideoChatbot:
|
|
| 290 |
|
| 291 |
for index in self.metadata_keys:
|
| 292 |
index_embedding[index] = openai.Embedding.create(
|
| 293 |
-
input=self.metadata[self.video_id][index],
|
|
|
|
| 294 |
)["data"][0]["embedding"]
|
| 295 |
|
| 296 |
# turn index_embedding into a dataframe
|
| 297 |
-
index_embedding = pd.DataFrame(
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
|
|
|
|
|
|
| 301 |
|
| 302 |
-
index_embedding[
|
| 303 |
user_message_embedding,
|
| 304 |
-
index_embedding[
|
| 305 |
distance_metric="cosine",
|
| 306 |
)
|
| 307 |
|
| 308 |
-
index_embedding = index_embedding.sort_values(
|
| 309 |
-
by="distance", ascending=True
|
| 310 |
-
)
|
| 311 |
|
| 312 |
if index_embedding["distance"].values[0] > threshold:
|
| 313 |
return None
|
| 314 |
else:
|
| 315 |
-
return index_embedding[
|
|
|
|
| 19 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 20 |
openai.api_key = OPENAI_API_KEY
|
| 21 |
|
| 22 |
+
|
| 23 |
class Chatbot:
|
| 24 |
def __init__(self) -> None:
|
| 25 |
self.history = []
|
|
|
|
| 56 |
continue
|
| 57 |
self.knowledge_base = db
|
| 58 |
self.upload_state = "done"
|
| 59 |
+
|
| 60 |
def __get_local_knowledge_base(self):
|
| 61 |
with open(self.csv_result_path, "r", encoding="UTF-8") as fp:
|
| 62 |
knowledge_base = pd.read_csv(fp)
|
|
|
|
| 72 |
# db.to_csv(f"{self.uid}_knowledge_base.csv", index=False)
|
| 73 |
cur_content.to_csv(f"{self.uid}_knowledge_base.csv", index=False)
|
| 74 |
media = MediaFileUpload(f"{self.uid}_knowledge_base.csv", resumable=True)
|
| 75 |
+
request = (
|
| 76 |
+
service.files()
|
| 77 |
+
.update(fileId="1m3ozrphHP221hhdCFMFX9-10nzSDfNyW", media_body=media)
|
| 78 |
+
.execute()
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
def __init_drive_service(self):
|
| 82 |
+
SCOPES = ["https://www.googleapis.com/auth/drive"]
|
| 83 |
SERVICE_ACCOUNT_FILE = os.getenv("CREDENTIALS")
|
| 84 |
|
| 85 |
+
creds = Credentials.from_service_account_file(
|
| 86 |
+
SERVICE_ACCOUNT_FILE, scopes=SCOPES
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
return build("drive", "v3", credentials=creds)
|
| 90 |
+
|
| 91 |
def __read_db(self, service):
|
| 92 |
request = service.files().get_media(fileId="1m3ozrphHP221hhdCFMFX9-10nzSDfNyW")
|
| 93 |
fh = io.BytesIO()
|
|
|
|
| 102 |
fh.seek(0)
|
| 103 |
|
| 104 |
return pd.read_csv(fh)
|
| 105 |
+
|
| 106 |
def __read_file(self, service, filename) -> pd.DataFrame:
|
| 107 |
query = f"name='{filename}'"
|
| 108 |
results = service.files().list(q=query).execute()
|
| 109 |
+
files = results.get("files", [])
|
| 110 |
|
| 111 |
+
file_id = files[0]["id"]
|
| 112 |
|
| 113 |
request = service.files().get_media(fileId=file_id)
|
| 114 |
fh = io.BytesIO()
|
|
|
|
| 123 |
fh.seek(0)
|
| 124 |
|
| 125 |
return pd.read_csv(fh)
|
| 126 |
+
|
| 127 |
def __upload_file(self, service):
|
| 128 |
results = service.files().list(pageSize=10).execute()
|
| 129 |
+
items = results.get("files", [])
|
| 130 |
if not items:
|
| 131 |
+
print("No files found.")
|
| 132 |
else:
|
| 133 |
+
print("Files:")
|
| 134 |
for item in items:
|
| 135 |
print(f"{item['name']} ({item['id']})")
|
| 136 |
|
| 137 |
media = MediaFileUpload(self.csv_result_path, resumable=True)
|
| 138 |
+
filename_prefix = "ex_bot_database_"
|
| 139 |
+
filename = filename_prefix + self.uid + ".csv"
|
| 140 |
+
request = (
|
| 141 |
+
service.files()
|
| 142 |
+
.create(
|
| 143 |
+
media_body=media,
|
| 144 |
+
body={
|
| 145 |
+
"name": filename,
|
| 146 |
+
"parents": [
|
| 147 |
+
"1Lp21EZlVlqL-c27VQBC6wTbUC1YpKMsG"
|
| 148 |
+
], # Optional, to place the file in a specific folder
|
| 149 |
+
},
|
| 150 |
+
)
|
| 151 |
+
.execute()
|
| 152 |
+
)
|
| 153 |
|
| 154 |
def clear_state(self):
|
| 155 |
self.context = None
|
|
|
|
| 255 |
def __init__(self) -> None:
|
| 256 |
self.metadata_keys = ["標題", "逐字稿", "摘要", "關鍵字"]
|
| 257 |
self.metadata = {
|
| 258 |
+
"c2fK-hxnPSY": {
|
| 259 |
+
"標題": "可汗學院的創新教學:學生與老師模式解析",
|
| 260 |
"逐字稿": "0:00\n這裡是一個關於西班牙美洲戰爭和AP美國歷史的練習\n0:04\n在可汗學院,我們以學生模式開始,並注意到如果學生要求解釋\n0:11\n它不只是給出答案,它會像一個好的導師一樣,只是試圖引導\n0:15\n學生朝正確的方向前進,並且還注意到老師可以看到\n0:21\n學生正在互動的內容作為安全措施,現在如果我們關閉學生模式,我們\n0:27\n進入老師模式,我們看到當老師要求解釋時,它非常不同,就像\n0:32\n有了老師的指南,它會給出如你所見的非常詳細的解釋,如果老師\n0:39\n想要它的教案,他們只需要要求,他們就會得到一個非常詳細的\n0:44\n教案,包括目標、活動和家庭作業要做的事情,然後如果老師\n0:52\n說太好了,Khanmigo,你說��一個講義或者作為家庭作業給一個反思\n0:58\n實際上給了反思作業,然後它會再次為老師構建那個\n1:03\n如果老師喜歡,他們可以要求自定義這些教案或這些提示或者這些\n1:08\n反思,讓它們更符合他們的學生正在做的事情,這是老師們通常花費\n1:13\n每天好幾個小時工作的事情,我們希望能夠節省\n1:17\n他們很多時間和精力,以利他們自己的健康和他們的學生。",
|
| 261 |
"摘要": "這段文字描述了一個關於西班牙美洲戰爭和AP美國歷史的教學練習。練習首先展示學生模式,強調良好的教導方式並提到教師可以監控學生互動情況作為安全措施。隨後,進入老師模式,提供了詳細的解釋和教案,包括目標、活動和家庭作業。另外,還有一個自定義教案的選項,使其更符合學生的需求。整個過程旨在節省教師的時間和精力,並有助於他們的健康和學生的學習。",
|
| 262 |
+
"關鍵字": ["AP美國歷史", "學生模式", "老師模式", "教案設計", "自定義教學"],
|
| 263 |
}
|
| 264 |
}
|
| 265 |
|
|
|
|
| 276 |
你是一個知識檢索系統,我會給你一份文件,請幫我依照文件內容回答問題,並用繁體中文回答。以下是文件內容
|
| 277 |
"""
|
| 278 |
messages = [
|
| 279 |
+
{"role": "system", "content": f"{system_prompt} + '\n' '''{context}'''"},
|
| 280 |
+
{"role": "user", "content": user_message},
|
| 281 |
+
]
|
| 282 |
try:
|
| 283 |
response = openai.ChatCompletion.create(
|
| 284 |
+
model="gpt-3.5-turbo",
|
| 285 |
messages=messages,
|
| 286 |
temperature=1,
|
| 287 |
max_tokens=2048,
|
| 288 |
frequency_penalty=0,
|
| 289 |
+
presence_penalty=0.6,
|
| 290 |
)
|
| 291 |
bot_answer = response["choices"][0]["message"]["content"]
|
| 292 |
|
|
|
|
| 297 |
|
| 298 |
def compute_similariy(self, user_message):
|
| 299 |
threshold = 0.5
|
| 300 |
+
|
| 301 |
user_message_embedding = openai.Embedding.create(
|
| 302 |
input=user_message, engine="text-embedding-ada-002"
|
| 303 |
)["data"][0]["embedding"]
|
|
|
|
| 306 |
|
| 307 |
for index in self.metadata_keys:
|
| 308 |
index_embedding[index] = openai.Embedding.create(
|
| 309 |
+
input=self.metadata[self.video_id][index],
|
| 310 |
+
engine="text-embedding-ada-002",
|
| 311 |
)["data"][0]["embedding"]
|
| 312 |
|
| 313 |
# turn index_embedding into a dataframe
|
| 314 |
+
index_embedding = pd.DataFrame(
|
| 315 |
+
{
|
| 316 |
+
"title": [list(index_embedding.keys())[0]],
|
| 317 |
+
"embedding": [list(index_embedding.values())[0]],
|
| 318 |
+
}
|
| 319 |
+
)
|
| 320 |
|
| 321 |
+
index_embedding["distance"] = distances_from_embeddings(
|
| 322 |
user_message_embedding,
|
| 323 |
+
index_embedding["embedding"].values,
|
| 324 |
distance_metric="cosine",
|
| 325 |
)
|
| 326 |
|
| 327 |
+
index_embedding = index_embedding.sort_values(by="distance", ascending=True)
|
|
|
|
|
|
|
| 328 |
|
| 329 |
if index_embedding["distance"].values[0] > threshold:
|
| 330 |
return None
|
| 331 |
else:
|
| 332 |
+
return index_embedding["title"][0]
|
utils/utils.py
CHANGED
|
@@ -25,5 +25,6 @@ def user(chatbot, *args):
|
|
| 25 |
def bot(chatbot, *args):
|
| 26 |
return chatbot.bot(*args)
|
| 27 |
|
|
|
|
| 28 |
def video_bot(video_chatbot, *args):
|
| 29 |
return video_chatbot.answer_question(*args)
|
|
|
|
| 25 |
def bot(chatbot, *args):
|
| 26 |
return chatbot.bot(*args)
|
| 27 |
|
| 28 |
+
|
| 29 |
def video_bot(video_chatbot, *args):
|
| 30 |
return video_chatbot.answer_question(*args)
|
utils/work_flow_controller.py
CHANGED
|
@@ -109,13 +109,17 @@ class WorkFlowController:
|
|
| 109 |
|
| 110 |
def __dump_to_json(self):
|
| 111 |
with open(
|
| 112 |
-
os.path.join(os.getcwd(), f"{self.uid}_knowledge_base.json"),
|
|
|
|
|
|
|
| 113 |
) as f:
|
| 114 |
print(
|
| 115 |
"Dumping to json, the path is: "
|
| 116 |
+ os.path.join(os.getcwd(), f"{self.uid}_knowledge_base.json")
|
| 117 |
)
|
| 118 |
-
self.json_result_path = os.path.join(
|
|
|
|
|
|
|
| 119 |
json.dump(self.files_info, f, indent=4, ensure_ascii=False)
|
| 120 |
|
| 121 |
def __construct_knowledge_base_dataframe(self):
|
|
@@ -141,12 +145,16 @@ class WorkFlowController:
|
|
| 141 |
|
| 142 |
def __dump_to_csv(self):
|
| 143 |
df = self.__construct_knowledge_base_dataframe()
|
| 144 |
-
df.to_csv(
|
|
|
|
|
|
|
| 145 |
print(
|
| 146 |
"Dumping to csv, the path is: "
|
| 147 |
+ os.path.join(os.getcwd(), f"{self.uid}_knowledge_base.csv")
|
| 148 |
)
|
| 149 |
-
self.csv_result_path = os.path.join(
|
|
|
|
|
|
|
| 150 |
|
| 151 |
def __get_file_name(self, file_src):
|
| 152 |
file_paths = [x.name for x in file_src]
|
|
|
|
| 109 |
|
| 110 |
def __dump_to_json(self):
|
| 111 |
with open(
|
| 112 |
+
os.path.join(os.getcwd(), f"{self.uid}_knowledge_base.json"),
|
| 113 |
+
"w",
|
| 114 |
+
encoding="utf-8",
|
| 115 |
) as f:
|
| 116 |
print(
|
| 117 |
"Dumping to json, the path is: "
|
| 118 |
+ os.path.join(os.getcwd(), f"{self.uid}_knowledge_base.json")
|
| 119 |
)
|
| 120 |
+
self.json_result_path = os.path.join(
|
| 121 |
+
os.getcwd(), f"{self.uid}_knowledge_base.json"
|
| 122 |
+
)
|
| 123 |
json.dump(self.files_info, f, indent=4, ensure_ascii=False)
|
| 124 |
|
| 125 |
def __construct_knowledge_base_dataframe(self):
|
|
|
|
| 145 |
|
| 146 |
def __dump_to_csv(self):
|
| 147 |
df = self.__construct_knowledge_base_dataframe()
|
| 148 |
+
df.to_csv(
|
| 149 |
+
os.path.join(os.getcwd(), f"{self.uid}_knowledge_base.csv"), index=False
|
| 150 |
+
)
|
| 151 |
print(
|
| 152 |
"Dumping to csv, the path is: "
|
| 153 |
+ os.path.join(os.getcwd(), f"{self.uid}_knowledge_base.csv")
|
| 154 |
)
|
| 155 |
+
self.csv_result_path = os.path.join(
|
| 156 |
+
os.getcwd(), f"{self.uid}_knowledge_base.csv"
|
| 157 |
+
)
|
| 158 |
|
| 159 |
def __get_file_name(self, file_src):
|
| 160 |
file_paths = [x.name for x in file_src]
|