Kevin Hu
commited on
Commit
·
1275b47
1
Parent(s):
8069189
Fix out of boundary. (#3786)
Browse files### What problem does this PR solve?
#3769
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- api/apps/sdk/session.py +24 -22
- deepdoc/parser/pdf_parser.py +1 -1
api/apps/sdk/session.py
CHANGED
|
@@ -35,7 +35,7 @@ from api.db.services.llm_service import LLMBundle
|
|
| 35 |
|
| 36 |
@manager.route('/chats/<chat_id>/sessions', methods=['POST'])
|
| 37 |
@token_required
|
| 38 |
-
def create(tenant_id,chat_id):
|
| 39 |
req = request.json
|
| 40 |
req["dialog_id"] = chat_id
|
| 41 |
dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
|
|
@@ -77,7 +77,7 @@ def create_agent_session(tenant_id, agent_id):
|
|
| 77 |
conv = {
|
| 78 |
"id": get_uuid(),
|
| 79 |
"dialog_id": cvs.id,
|
| 80 |
-
"user_id": req.get("usr_id","") if isinstance(req, dict) else "",
|
| 81 |
"message": [{"role": "assistant", "content": canvas.get_prologue()}],
|
| 82 |
"source": "agent"
|
| 83 |
}
|
|
@@ -88,11 +88,11 @@ def create_agent_session(tenant_id, agent_id):
|
|
| 88 |
|
| 89 |
@manager.route('/chats/<chat_id>/sessions/<session_id>', methods=['PUT'])
|
| 90 |
@token_required
|
| 91 |
-
def update(tenant_id,chat_id,session_id):
|
| 92 |
req = request.json
|
| 93 |
req["dialog_id"] = chat_id
|
| 94 |
conv_id = session_id
|
| 95 |
-
conv = ConversationService.query(id=conv_id,dialog_id=chat_id)
|
| 96 |
if not conv:
|
| 97 |
return get_error_data_result(message="Session does not exist")
|
| 98 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
|
@@ -123,12 +123,12 @@ def completion(tenant_id, chat_id):
|
|
| 123 |
return get_error_data_result(message="`name` can not be empty.")
|
| 124 |
ConversationService.save(**conv)
|
| 125 |
e, conv = ConversationService.get_by_id(conv["id"])
|
| 126 |
-
session_id=conv.id
|
| 127 |
else:
|
| 128 |
session_id = req.get("session_id")
|
| 129 |
if not req.get("question"):
|
| 130 |
return get_error_data_result(message="Please input your question.")
|
| 131 |
-
conv = ConversationService.query(id=session_id,dialog_id=chat_id)
|
| 132 |
if not conv:
|
| 133 |
return get_error_data_result(message="Session does not exist")
|
| 134 |
conv = conv[0]
|
|
@@ -182,18 +182,18 @@ def completion(tenant_id, chat_id):
|
|
| 182 |
chunk_list.append(new_chunk)
|
| 183 |
reference["chunks"] = chunk_list
|
| 184 |
ans["id"] = message_id
|
| 185 |
-
ans["session_id"]=session_id
|
| 186 |
|
| 187 |
def stream():
|
| 188 |
nonlocal dia, msg, req, conv
|
| 189 |
try:
|
| 190 |
for ans in chat(dia, msg, **req):
|
| 191 |
fillin_conv(ans)
|
| 192 |
-
yield "data:" + json.dumps({"code": 0,
|
| 193 |
ConversationService.update_by_id(conv.id, conv.to_dict())
|
| 194 |
except Exception as e:
|
| 195 |
yield "data:" + json.dumps({"code": 500, "message": str(e),
|
| 196 |
-
"data": {"answer": "**ERROR**: " + str(e),"reference": []}},
|
| 197 |
ensure_ascii=False) + "\n\n"
|
| 198 |
yield "data:" + json.dumps({"code": 0, "data": True}, ensure_ascii=False) + "\n\n"
|
| 199 |
|
|
@@ -235,7 +235,7 @@ def agent_completion(tenant_id, agent_id):
|
|
| 235 |
conv = {
|
| 236 |
"id": session_id,
|
| 237 |
"dialog_id": cvs.id,
|
| 238 |
-
"user_id": req.get("user_id",""),
|
| 239 |
"message": [{"role": "assistant", "content": canvas.get_prologue()}],
|
| 240 |
"source": "agent"
|
| 241 |
}
|
|
@@ -251,9 +251,9 @@ def agent_completion(tenant_id, agent_id):
|
|
| 251 |
question = req.get("question")
|
| 252 |
if not question:
|
| 253 |
return get_error_data_result("`question` is required.")
|
| 254 |
-
question={
|
| 255 |
-
"role":"user",
|
| 256 |
-
"content":question,
|
| 257 |
"id": str(uuid4())
|
| 258 |
}
|
| 259 |
messages.append(question)
|
|
@@ -375,7 +375,7 @@ def agent_completion(tenant_id, agent_id):
|
|
| 375 |
|
| 376 |
@manager.route('/chats/<chat_id>/sessions', methods=['GET'])
|
| 377 |
@token_required
|
| 378 |
-
def list_session(chat_id,tenant_id):
|
| 379 |
if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
|
| 380 |
return get_error_data_result(message=f"You don't own the assistant {chat_id}.")
|
| 381 |
id = request.args.get("id")
|
|
@@ -387,7 +387,7 @@ def list_session(chat_id,tenant_id):
|
|
| 387 |
desc = False
|
| 388 |
else:
|
| 389 |
desc = True
|
| 390 |
-
convs = ConversationService.get_list(chat_id,page_number,items_per_page,orderby,desc,id,name)
|
| 391 |
if not convs:
|
| 392 |
return get_result(data=[])
|
| 393 |
for conv in convs:
|
|
@@ -429,7 +429,7 @@ def list_session(chat_id,tenant_id):
|
|
| 429 |
|
| 430 |
@manager.route('/chats/<chat_id>/sessions', methods=["DELETE"])
|
| 431 |
@token_required
|
| 432 |
-
def delete(tenant_id,chat_id):
|
| 433 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
| 434 |
return get_error_data_result(message="You don't own the chat")
|
| 435 |
req = request.json
|
|
@@ -437,21 +437,22 @@ def delete(tenant_id,chat_id):
|
|
| 437 |
if not req:
|
| 438 |
ids = None
|
| 439 |
else:
|
| 440 |
-
ids=req.get("ids")
|
| 441 |
|
| 442 |
if not ids:
|
| 443 |
conv_list = []
|
| 444 |
for conv in convs:
|
| 445 |
conv_list.append(conv.id)
|
| 446 |
else:
|
| 447 |
-
conv_list=ids
|
| 448 |
for id in conv_list:
|
| 449 |
-
conv = ConversationService.query(id=id,dialog_id=chat_id)
|
| 450 |
if not conv:
|
| 451 |
return get_error_data_result(message="The chat doesn't own the session")
|
| 452 |
ConversationService.delete_by_id(id)
|
| 453 |
return get_result()
|
| 454 |
|
|
|
|
| 455 |
@manager.route('/sessions/ask', methods=['POST'])
|
| 456 |
@token_required
|
| 457 |
def ask_about(tenant_id):
|
|
@@ -460,17 +461,18 @@ def ask_about(tenant_id):
|
|
| 460 |
return get_error_data_result("`question` is required.")
|
| 461 |
if not req.get("dataset_ids"):
|
| 462 |
return get_error_data_result("`dataset_ids` is required.")
|
| 463 |
-
if not isinstance(req.get("dataset_ids"),list):
|
| 464 |
return get_error_data_result("`dataset_ids` should be a list.")
|
| 465 |
-
req["kb_ids"]=req.pop("dataset_ids")
|
| 466 |
for kb_id in req["kb_ids"]:
|
| 467 |
-
if not KnowledgebaseService.accessible(kb_id,tenant_id):
|
| 468 |
return get_error_data_result(f"You don't own the dataset {kb_id}.")
|
| 469 |
kbs = KnowledgebaseService.query(id=kb_id)
|
| 470 |
kb = kbs[0]
|
| 471 |
if kb.chunk_num == 0:
|
| 472 |
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
| 473 |
uid = tenant_id
|
|
|
|
| 474 |
def stream():
|
| 475 |
nonlocal req, uid
|
| 476 |
try:
|
|
|
|
| 35 |
|
| 36 |
@manager.route('/chats/<chat_id>/sessions', methods=['POST'])
|
| 37 |
@token_required
|
| 38 |
+
def create(tenant_id, chat_id):
|
| 39 |
req = request.json
|
| 40 |
req["dialog_id"] = chat_id
|
| 41 |
dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
|
|
|
|
| 77 |
conv = {
|
| 78 |
"id": get_uuid(),
|
| 79 |
"dialog_id": cvs.id,
|
| 80 |
+
"user_id": req.get("usr_id", "") if isinstance(req, dict) else "",
|
| 81 |
"message": [{"role": "assistant", "content": canvas.get_prologue()}],
|
| 82 |
"source": "agent"
|
| 83 |
}
|
|
|
|
| 88 |
|
| 89 |
@manager.route('/chats/<chat_id>/sessions/<session_id>', methods=['PUT'])
|
| 90 |
@token_required
|
| 91 |
+
def update(tenant_id, chat_id, session_id):
|
| 92 |
req = request.json
|
| 93 |
req["dialog_id"] = chat_id
|
| 94 |
conv_id = session_id
|
| 95 |
+
conv = ConversationService.query(id=conv_id, dialog_id=chat_id)
|
| 96 |
if not conv:
|
| 97 |
return get_error_data_result(message="Session does not exist")
|
| 98 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
|
|
|
| 123 |
return get_error_data_result(message="`name` can not be empty.")
|
| 124 |
ConversationService.save(**conv)
|
| 125 |
e, conv = ConversationService.get_by_id(conv["id"])
|
| 126 |
+
session_id = conv.id
|
| 127 |
else:
|
| 128 |
session_id = req.get("session_id")
|
| 129 |
if not req.get("question"):
|
| 130 |
return get_error_data_result(message="Please input your question.")
|
| 131 |
+
conv = ConversationService.query(id=session_id, dialog_id=chat_id)
|
| 132 |
if not conv:
|
| 133 |
return get_error_data_result(message="Session does not exist")
|
| 134 |
conv = conv[0]
|
|
|
|
| 182 |
chunk_list.append(new_chunk)
|
| 183 |
reference["chunks"] = chunk_list
|
| 184 |
ans["id"] = message_id
|
| 185 |
+
ans["session_id"] = session_id
|
| 186 |
|
| 187 |
def stream():
|
| 188 |
nonlocal dia, msg, req, conv
|
| 189 |
try:
|
| 190 |
for ans in chat(dia, msg, **req):
|
| 191 |
fillin_conv(ans)
|
| 192 |
+
yield "data:" + json.dumps({"code": 0, "data": ans}, ensure_ascii=False) + "\n\n"
|
| 193 |
ConversationService.update_by_id(conv.id, conv.to_dict())
|
| 194 |
except Exception as e:
|
| 195 |
yield "data:" + json.dumps({"code": 500, "message": str(e),
|
| 196 |
+
"data": {"answer": "**ERROR**: " + str(e), "reference": []}},
|
| 197 |
ensure_ascii=False) + "\n\n"
|
| 198 |
yield "data:" + json.dumps({"code": 0, "data": True}, ensure_ascii=False) + "\n\n"
|
| 199 |
|
|
|
|
| 235 |
conv = {
|
| 236 |
"id": session_id,
|
| 237 |
"dialog_id": cvs.id,
|
| 238 |
+
"user_id": req.get("user_id", ""),
|
| 239 |
"message": [{"role": "assistant", "content": canvas.get_prologue()}],
|
| 240 |
"source": "agent"
|
| 241 |
}
|
|
|
|
| 251 |
question = req.get("question")
|
| 252 |
if not question:
|
| 253 |
return get_error_data_result("`question` is required.")
|
| 254 |
+
question = {
|
| 255 |
+
"role": "user",
|
| 256 |
+
"content": question,
|
| 257 |
"id": str(uuid4())
|
| 258 |
}
|
| 259 |
messages.append(question)
|
|
|
|
| 375 |
|
| 376 |
@manager.route('/chats/<chat_id>/sessions', methods=['GET'])
|
| 377 |
@token_required
|
| 378 |
+
def list_session(chat_id, tenant_id):
|
| 379 |
if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
|
| 380 |
return get_error_data_result(message=f"You don't own the assistant {chat_id}.")
|
| 381 |
id = request.args.get("id")
|
|
|
|
| 387 |
desc = False
|
| 388 |
else:
|
| 389 |
desc = True
|
| 390 |
+
convs = ConversationService.get_list(chat_id, page_number, items_per_page, orderby, desc, id, name)
|
| 391 |
if not convs:
|
| 392 |
return get_result(data=[])
|
| 393 |
for conv in convs:
|
|
|
|
| 429 |
|
| 430 |
@manager.route('/chats/<chat_id>/sessions', methods=["DELETE"])
|
| 431 |
@token_required
|
| 432 |
+
def delete(tenant_id, chat_id):
|
| 433 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
| 434 |
return get_error_data_result(message="You don't own the chat")
|
| 435 |
req = request.json
|
|
|
|
| 437 |
if not req:
|
| 438 |
ids = None
|
| 439 |
else:
|
| 440 |
+
ids = req.get("ids")
|
| 441 |
|
| 442 |
if not ids:
|
| 443 |
conv_list = []
|
| 444 |
for conv in convs:
|
| 445 |
conv_list.append(conv.id)
|
| 446 |
else:
|
| 447 |
+
conv_list = ids
|
| 448 |
for id in conv_list:
|
| 449 |
+
conv = ConversationService.query(id=id, dialog_id=chat_id)
|
| 450 |
if not conv:
|
| 451 |
return get_error_data_result(message="The chat doesn't own the session")
|
| 452 |
ConversationService.delete_by_id(id)
|
| 453 |
return get_result()
|
| 454 |
|
| 455 |
+
|
| 456 |
@manager.route('/sessions/ask', methods=['POST'])
|
| 457 |
@token_required
|
| 458 |
def ask_about(tenant_id):
|
|
|
|
| 461 |
return get_error_data_result("`question` is required.")
|
| 462 |
if not req.get("dataset_ids"):
|
| 463 |
return get_error_data_result("`dataset_ids` is required.")
|
| 464 |
+
if not isinstance(req.get("dataset_ids"), list):
|
| 465 |
return get_error_data_result("`dataset_ids` should be a list.")
|
| 466 |
+
req["kb_ids"] = req.pop("dataset_ids")
|
| 467 |
for kb_id in req["kb_ids"]:
|
| 468 |
+
if not KnowledgebaseService.accessible(kb_id, tenant_id):
|
| 469 |
return get_error_data_result(f"You don't own the dataset {kb_id}.")
|
| 470 |
kbs = KnowledgebaseService.query(id=kb_id)
|
| 471 |
kb = kbs[0]
|
| 472 |
if kb.chunk_num == 0:
|
| 473 |
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
| 474 |
uid = tenant_id
|
| 475 |
+
|
| 476 |
def stream():
|
| 477 |
nonlocal req, uid
|
| 478 |
try:
|
deepdoc/parser/pdf_parser.py
CHANGED
|
@@ -152,7 +152,7 @@ class RAGFlowPdfParser:
|
|
| 152 |
max(len(up["text"]), len(down["text"])),
|
| 153 |
len(tks_all) - len(tks_up) - len(tks_down),
|
| 154 |
len(tks_down) - len(tks_up),
|
| 155 |
-
tks_down[-1] == tks_up[-1],
|
| 156 |
max(down["in_row"], up["in_row"]),
|
| 157 |
abs(down["in_row"] - up["in_row"]),
|
| 158 |
len(tks_down) == 1 and rag_tokenizer.tag(tks_down[0]).find("n") >= 0,
|
|
|
|
| 152 |
max(len(up["text"]), len(down["text"])),
|
| 153 |
len(tks_all) - len(tks_up) - len(tks_down),
|
| 154 |
len(tks_down) - len(tks_up),
|
| 155 |
+
tks_down[-1] == tks_up[-1] if tks_down and tks_up else False,
|
| 156 |
max(down["in_row"], up["in_row"]),
|
| 157 |
abs(down["in_row"] - up["in_row"]),
|
| 158 |
len(tks_down) == 1 and rag_tokenizer.tag(tks_down[0]).find("n") >= 0,
|