liuhua
liuhua
commited on
Commit
·
533089d
1
Parent(s):
4b50c07
Fix bugs in API (#3103)
Browse files### What problem does this PR solve?
Fix bugs in API
- [x] Bug Fix (non-breaking change which fixes an issue)
Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
- api/apps/sdk/chat.py +26 -5
- api/apps/sdk/dataset.py +37 -22
- api/apps/sdk/doc.py +38 -18
- api/apps/sdk/session.py +18 -7
- api/utils/api_utils.py +1 -1
- sdk/python/ragflow/modules/chat.py +1 -1
- sdk/python/ragflow/ragflow.py +3 -3
api/apps/sdk/chat.py
CHANGED
|
@@ -14,7 +14,7 @@
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
from flask import request
|
| 17 |
-
|
| 18 |
from api.db import StatusEnum
|
| 19 |
from api.db.services.dialog_service import DialogService
|
| 20 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
|
@@ -40,6 +40,10 @@ def create(tenant_id):
|
|
| 40 |
kb=kbs[0]
|
| 41 |
if kb.chunk_num == 0:
|
| 42 |
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
req["kb_ids"] = ids
|
| 44 |
# llm
|
| 45 |
llm = req.get("llm")
|
|
@@ -149,6 +153,8 @@ def update(tenant_id,chat_id):
|
|
| 149 |
return get_error_data_result(retmsg='You do not own the chat')
|
| 150 |
req =request.json
|
| 151 |
ids = req.get("dataset_ids")
|
|
|
|
|
|
|
| 152 |
if "dataset_ids" in req:
|
| 153 |
if not ids:
|
| 154 |
return get_error_data_result("`datasets` can't be empty")
|
|
@@ -160,6 +166,12 @@ def update(tenant_id,chat_id):
|
|
| 160 |
kb = kbs[0]
|
| 161 |
if kb.chunk_num == 0:
|
| 162 |
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
req["kb_ids"] = ids
|
| 164 |
llm = req.get("llm")
|
| 165 |
if llm:
|
|
@@ -225,10 +237,18 @@ def update(tenant_id,chat_id):
|
|
| 225 |
@token_required
|
| 226 |
def delete(tenant_id):
|
| 227 |
req = request.json
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
| 229 |
if not ids:
|
| 230 |
-
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
|
| 233 |
return get_error_data_result(retmsg=f"You don't own the chat {id}")
|
| 234 |
temp_dict = {"status": StatusEnum.INVALID.value}
|
|
@@ -260,7 +280,8 @@ def list_chat(tenant_id):
|
|
| 260 |
"quote": "show_quote",
|
| 261 |
"system": "prompt",
|
| 262 |
"rerank_id": "rerank_model",
|
| 263 |
-
"vector_similarity_weight": "keywords_similarity_weight"
|
|
|
|
| 264 |
key_list = ["similarity_threshold", "vector_similarity_weight", "top_n", "rerank_id"]
|
| 265 |
for res in chats:
|
| 266 |
for key, value in res["prompt_config"].items():
|
|
|
|
| 14 |
# limitations under the License.
|
| 15 |
#
|
| 16 |
from flask import request
|
| 17 |
+
from api.settings import RetCode
|
| 18 |
from api.db import StatusEnum
|
| 19 |
from api.db.services.dialog_service import DialogService
|
| 20 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
|
|
|
| 40 |
kb=kbs[0]
|
| 41 |
if kb.chunk_num == 0:
|
| 42 |
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
| 43 |
+
kbs = KnowledgebaseService.get_by_ids(ids)
|
| 44 |
+
embd_count = list(set(kb.embd_id for kb in kbs))
|
| 45 |
+
if embd_count != 1:
|
| 46 |
+
return get_result(retmsg='Datasets use different embedding models."',retcode=RetCode.AUTHENTICATION_ERROR)
|
| 47 |
req["kb_ids"] = ids
|
| 48 |
# llm
|
| 49 |
llm = req.get("llm")
|
|
|
|
| 153 |
return get_error_data_result(retmsg='You do not own the chat')
|
| 154 |
req =request.json
|
| 155 |
ids = req.get("dataset_ids")
|
| 156 |
+
if "show_quotation" in req:
|
| 157 |
+
req["do_refer"]=req.pop("show_quotation")
|
| 158 |
if "dataset_ids" in req:
|
| 159 |
if not ids:
|
| 160 |
return get_error_data_result("`datasets` can't be empty")
|
|
|
|
| 166 |
kb = kbs[0]
|
| 167 |
if kb.chunk_num == 0:
|
| 168 |
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
| 169 |
+
kbs = KnowledgebaseService.get_by_ids(ids)
|
| 170 |
+
embd_count=list(set(kb.embd_id for kb in kbs))
|
| 171 |
+
if embd_count != 1 :
|
| 172 |
+
return get_result(
|
| 173 |
+
retmsg='Datasets use different embedding models."',
|
| 174 |
+
retcode=RetCode.AUTHENTICATION_ERROR)
|
| 175 |
req["kb_ids"] = ids
|
| 176 |
llm = req.get("llm")
|
| 177 |
if llm:
|
|
|
|
| 237 |
@token_required
|
| 238 |
def delete(tenant_id):
|
| 239 |
req = request.json
|
| 240 |
+
if not req:
|
| 241 |
+
ids=None
|
| 242 |
+
else:
|
| 243 |
+
ids=req.get("ids")
|
| 244 |
if not ids:
|
| 245 |
+
id_list = []
|
| 246 |
+
dias=DialogService.query(tenant_id=tenant_id,status=StatusEnum.VALID.value)
|
| 247 |
+
for dia in dias:
|
| 248 |
+
id_list.append(dia.id)
|
| 249 |
+
else:
|
| 250 |
+
id_list=ids
|
| 251 |
+
for id in id_list:
|
| 252 |
if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
|
| 253 |
return get_error_data_result(retmsg=f"You don't own the chat {id}")
|
| 254 |
temp_dict = {"status": StatusEnum.INVALID.value}
|
|
|
|
| 280 |
"quote": "show_quote",
|
| 281 |
"system": "prompt",
|
| 282 |
"rerank_id": "rerank_model",
|
| 283 |
+
"vector_similarity_weight": "keywords_similarity_weight",
|
| 284 |
+
"do_refer":"show_quotation"}
|
| 285 |
key_list = ["similarity_threshold", "vector_similarity_weight", "top_n", "rerank_id"]
|
| 286 |
for res in chats:
|
| 287 |
for key, value in res["prompt_config"].items():
|
api/apps/sdk/dataset.py
CHANGED
|
@@ -21,7 +21,7 @@ from api.db.services.document_service import DocumentService
|
|
| 21 |
from api.db.services.file2document_service import File2DocumentService
|
| 22 |
from api.db.services.file_service import FileService
|
| 23 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
| 24 |
-
from api.db.services.llm_service import TenantLLMService
|
| 25 |
from api.db.services.user_service import TenantService
|
| 26 |
from api.settings import RetCode
|
| 27 |
from api.utils import get_uuid
|
|
@@ -68,9 +68,12 @@ def create(tenant_id):
|
|
| 68 |
"BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
|
| 69 |
"nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
|
| 70 |
"text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
|
| 71 |
-
|
| 72 |
-
|
| 73 |
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
|
|
|
|
|
|
|
|
|
| 74 |
key_mapping = {
|
| 75 |
"chunk_num": "chunk_count",
|
| 76 |
"doc_num": "document_count",
|
|
@@ -92,25 +95,32 @@ def create(tenant_id):
|
|
| 92 |
@token_required
|
| 93 |
def delete(tenant_id):
|
| 94 |
req = request.json
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
| 96 |
if not ids:
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id)
|
| 101 |
if not kbs:
|
| 102 |
return get_error_data_result(retmsg=f"You don't own the dataset {id}")
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
return get_error_data_result(
|
| 106 |
-
retmsg="
|
| 107 |
-
|
| 108 |
-
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
| 109 |
-
File2DocumentService.delete_by_document_id(doc.id)
|
| 110 |
-
if not KnowledgebaseService.delete_by_id(id):
|
| 111 |
-
return get_error_data_result(
|
| 112 |
-
retmsg="Delete dataset error.(Database error)")
|
| 113 |
-
return get_result(retcode=RetCode.SUCCESS)
|
| 114 |
|
| 115 |
@manager.route('/datasets/<dataset_id>', methods=['PUT'])
|
| 116 |
@token_required
|
|
@@ -139,8 +149,9 @@ def update(tenant_id,dataset_id):
|
|
| 139 |
retmsg="Can't change `tenant_id`.")
|
| 140 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
| 141 |
if "parser_config" in req:
|
| 142 |
-
|
| 143 |
-
|
|
|
|
| 144 |
if "chunk_count" in req:
|
| 145 |
if req["chunk_count"] != kb.chunk_num:
|
| 146 |
return get_error_data_result(
|
|
@@ -157,7 +168,8 @@ def update(tenant_id,dataset_id):
|
|
| 157 |
retmsg="If `chunk_count` is not 0, `chunk_method` is not changeable.")
|
| 158 |
req['parser_id'] = req.pop('chunk_method')
|
| 159 |
if req['parser_id'] != kb.parser_id:
|
| 160 |
-
req
|
|
|
|
| 161 |
if "embedding_model" in req:
|
| 162 |
if kb.chunk_num != 0 and req['embedding_model'] != kb.embd_id:
|
| 163 |
return get_error_data_result(
|
|
@@ -168,9 +180,12 @@ def update(tenant_id,dataset_id):
|
|
| 168 |
"BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
|
| 169 |
"nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
|
| 170 |
"text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
|
| 171 |
-
|
| 172 |
-
|
| 173 |
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
|
|
|
|
|
|
|
|
|
| 174 |
req['embd_id'] = req.pop('embedding_model')
|
| 175 |
if "name" in req:
|
| 176 |
req["name"] = req["name"].strip()
|
|
|
|
| 21 |
from api.db.services.file2document_service import File2DocumentService
|
| 22 |
from api.db.services.file_service import FileService
|
| 23 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
| 24 |
+
from api.db.services.llm_service import TenantLLMService,LLMService
|
| 25 |
from api.db.services.user_service import TenantService
|
| 26 |
from api.settings import RetCode
|
| 27 |
from api.utils import get_uuid
|
|
|
|
| 68 |
"BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
|
| 69 |
"nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
|
| 70 |
"text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
|
| 71 |
+
embd_model=LLMService.query(llm_name=req["embedding_model"],model_type="embedding")
|
| 72 |
+
if not embd_model:
|
| 73 |
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
| 74 |
+
if embd_model:
|
| 75 |
+
if req["embedding_model"] not in valid_embedding_models and not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model")):
|
| 76 |
+
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
| 77 |
key_mapping = {
|
| 78 |
"chunk_num": "chunk_count",
|
| 79 |
"doc_num": "document_count",
|
|
|
|
| 95 |
@token_required
|
| 96 |
def delete(tenant_id):
|
| 97 |
req = request.json
|
| 98 |
+
if not req:
|
| 99 |
+
ids=None
|
| 100 |
+
else:
|
| 101 |
+
ids=req.get("ids")
|
| 102 |
if not ids:
|
| 103 |
+
id_list = []
|
| 104 |
+
kbs=KnowledgebaseService.query(tenant_id=tenant_id)
|
| 105 |
+
for kb in kbs:
|
| 106 |
+
id_list.append(kb.id)
|
| 107 |
+
else:
|
| 108 |
+
id_list=ids
|
| 109 |
+
for id in id_list:
|
| 110 |
kbs = KnowledgebaseService.query(id=id, tenant_id=tenant_id)
|
| 111 |
if not kbs:
|
| 112 |
return get_error_data_result(retmsg=f"You don't own the dataset {id}")
|
| 113 |
+
for doc in DocumentService.query(kb_id=id):
|
| 114 |
+
if not DocumentService.remove_document(doc, tenant_id):
|
| 115 |
+
return get_error_data_result(
|
| 116 |
+
retmsg="Remove document error.(Database error)")
|
| 117 |
+
f2d = File2DocumentService.get_by_document_id(doc.id)
|
| 118 |
+
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
| 119 |
+
File2DocumentService.delete_by_document_id(doc.id)
|
| 120 |
+
if not KnowledgebaseService.delete_by_id(id):
|
| 121 |
return get_error_data_result(
|
| 122 |
+
retmsg="Delete dataset error.(Database error)")
|
| 123 |
+
return get_result(retcode=RetCode.SUCCESS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
@manager.route('/datasets/<dataset_id>', methods=['PUT'])
|
| 126 |
@token_required
|
|
|
|
| 149 |
retmsg="Can't change `tenant_id`.")
|
| 150 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
| 151 |
if "parser_config" in req:
|
| 152 |
+
temp_dict=kb.parser_config
|
| 153 |
+
temp_dict.update(req["parser_config"])
|
| 154 |
+
req["parser_config"] = temp_dict
|
| 155 |
if "chunk_count" in req:
|
| 156 |
if req["chunk_count"] != kb.chunk_num:
|
| 157 |
return get_error_data_result(
|
|
|
|
| 168 |
retmsg="If `chunk_count` is not 0, `chunk_method` is not changeable.")
|
| 169 |
req['parser_id'] = req.pop('chunk_method')
|
| 170 |
if req['parser_id'] != kb.parser_id:
|
| 171 |
+
if not req.get("parser_config"):
|
| 172 |
+
req["parser_config"] = get_parser_config(chunk_method, parser_config)
|
| 173 |
if "embedding_model" in req:
|
| 174 |
if kb.chunk_num != 0 and req['embedding_model'] != kb.embd_id:
|
| 175 |
return get_error_data_result(
|
|
|
|
| 180 |
"BAAI/bge-small-zh-v1.5","jinaai/jina-embeddings-v2-base-en","jinaai/jina-embeddings-v2-small-en",
|
| 181 |
"nomic-ai/nomic-embed-text-v1.5","sentence-transformers/all-MiniLM-L6-v2","text-embedding-v2",
|
| 182 |
"text-embedding-v3","maidalun1020/bce-embedding-base_v1"]
|
| 183 |
+
embd_model=LLMService.query(llm_name=req["embedding_model"],model_type="embedding")
|
| 184 |
+
if not embd_model:
|
| 185 |
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
| 186 |
+
if embd_model:
|
| 187 |
+
if req["embedding_model"] not in valid_embedding_models and not TenantLLMService.query(tenant_id=tenant_id,model_type="embedding", llm_name=req.get("embedding_model")):
|
| 188 |
+
return get_error_data_result(f"`embedding_model` {req.get('embedding_model')} doesn't exist")
|
| 189 |
req['embd_id'] = req.pop('embedding_model')
|
| 190 |
if "name" in req:
|
| 191 |
req["name"] = req["name"].strip()
|
api/apps/sdk/doc.py
CHANGED
|
@@ -46,6 +46,9 @@ from rag.utils.es_conn import ELASTICSEARCH
|
|
| 46 |
from rag.utils.storage_factory import STORAGE_IMPL
|
| 47 |
import os
|
| 48 |
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
@manager.route('/datasets/<dataset_id>/documents', methods=['POST'])
|
| 51 |
@token_required
|
|
@@ -58,11 +61,21 @@ def upload(dataset_id, tenant_id):
|
|
| 58 |
if file_obj.filename == '':
|
| 59 |
return get_result(
|
| 60 |
retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
| 63 |
if not e:
|
| 64 |
raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
|
| 65 |
-
err, files
|
| 66 |
if err:
|
| 67 |
return get_result(
|
| 68 |
retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
|
|
@@ -140,6 +153,7 @@ def update_doc(tenant_id, dataset_id, document_id):
|
|
| 140 |
if not e:
|
| 141 |
return get_error_data_result(retmsg="Document not found!")
|
| 142 |
req["parser_config"] = get_parser_config(req["chunk_method"], req.get("parser_config"))
|
|
|
|
| 143 |
if doc.token_num > 0:
|
| 144 |
e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
|
| 145 |
doc.process_duation * -1)
|
|
@@ -210,10 +224,10 @@ def list_docs(dataset_id, tenant_id):
|
|
| 210 |
}
|
| 211 |
renamed_doc = {}
|
| 212 |
for key, value in doc.items():
|
| 213 |
-
if key =="run":
|
| 214 |
-
renamed_doc["run"]=run_mapping.get(str(value))
|
| 215 |
new_key = key_mapping.get(key, key)
|
| 216 |
renamed_doc[new_key] = value
|
|
|
|
|
|
|
| 217 |
renamed_doc_list.append(renamed_doc)
|
| 218 |
return get_result(data={"total": tol, "docs": renamed_doc_list})
|
| 219 |
|
|
@@ -280,14 +294,11 @@ def parse(tenant_id,dataset_id):
|
|
| 280 |
doc = DocumentService.query(id=id,kb_id=dataset_id)
|
| 281 |
if not doc:
|
| 282 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
| 283 |
-
if doc[0].progress != 0.0:
|
| 284 |
-
return get_error_data_result("Can't stop parsing document with progress at 0 or 100")
|
| 285 |
info = {"run": "1", "progress": 0}
|
| 286 |
info["progress_msg"] = ""
|
| 287 |
info["chunk_num"] = 0
|
| 288 |
info["token_num"] = 0
|
| 289 |
DocumentService.update_by_id(id, info)
|
| 290 |
-
# if str(req["run"]) == TaskStatus.CANCEL.value:
|
| 291 |
ELASTICSEARCH.deleteByQuery(
|
| 292 |
Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
| 293 |
TaskService.filter_delete([Task.doc_id == id])
|
|
@@ -312,10 +323,8 @@ def stop_parsing(tenant_id,dataset_id):
|
|
| 312 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
| 313 |
if doc[0].progress == 100.0 or doc[0].progress == 0.0:
|
| 314 |
return get_error_data_result("Can't stop parsing document with progress at 0 or 100")
|
| 315 |
-
info = {"run": "2", "progress": 0}
|
| 316 |
DocumentService.update_by_id(id, info)
|
| 317 |
-
# if str(req["run"]) == TaskStatus.CANCEL.value:
|
| 318 |
-
tenant_id = DocumentService.get_tenant_id(id)
|
| 319 |
ELASTICSEARCH.deleteByQuery(
|
| 320 |
Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
| 321 |
return get_result()
|
|
@@ -355,10 +364,10 @@ def list_chunks(tenant_id,dataset_id,document_id):
|
|
| 355 |
doc=doc.to_dict()
|
| 356 |
renamed_doc = {}
|
| 357 |
for key, value in doc.items():
|
| 358 |
-
if key == "run":
|
| 359 |
-
renamed_doc["run"] = run_mapping.get(str(value))
|
| 360 |
new_key = key_mapping.get(key, key)
|
| 361 |
renamed_doc[new_key] = value
|
|
|
|
|
|
|
| 362 |
res = {"total": sres.total, "chunks": [], "doc": renamed_doc}
|
| 363 |
origin_chunks = []
|
| 364 |
sign = 0
|
|
@@ -398,12 +407,17 @@ def list_chunks(tenant_id,dataset_id,document_id):
|
|
| 398 |
"content_with_weight": "content",
|
| 399 |
"doc_id": "document_id",
|
| 400 |
"important_kwd": "important_keywords",
|
| 401 |
-
"img_id": "image_id"
|
|
|
|
| 402 |
}
|
| 403 |
renamed_chunk = {}
|
| 404 |
for key, value in chunk.items():
|
| 405 |
new_key = key_mapping.get(key, key)
|
| 406 |
renamed_chunk[new_key] = value
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
res["chunks"].append(renamed_chunk)
|
| 408 |
return get_result(data=res)
|
| 409 |
|
|
@@ -441,7 +455,7 @@ def add_chunk(tenant_id,dataset_id,document_id):
|
|
| 441 |
embd_id = DocumentService.get_embd_id(document_id)
|
| 442 |
embd_mdl = TenantLLMService.model_instance(
|
| 443 |
tenant_id, LLMType.EMBEDDING.value, embd_id)
|
| 444 |
-
|
| 445 |
v, c = embd_mdl.encode([doc.name, req["content"]])
|
| 446 |
v = 0.1 * v[0] + 0.9 * v[1]
|
| 447 |
d["q_%d_vec" % len(v)] = v.tolist()
|
|
@@ -459,7 +473,7 @@ def add_chunk(tenant_id,dataset_id,document_id):
|
|
| 459 |
"kb_id": "dataset_id",
|
| 460 |
"create_timestamp_flt": "create_timestamp",
|
| 461 |
"create_time": "create_time",
|
| 462 |
-
"document_keyword": "document"
|
| 463 |
}
|
| 464 |
renamed_chunk = {}
|
| 465 |
for key, value in d.items():
|
|
@@ -480,12 +494,18 @@ def rm_chunk(tenant_id,dataset_id,document_id):
|
|
| 480 |
return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
|
| 481 |
doc = doc[0]
|
| 482 |
req = request.json
|
| 483 |
-
if not req.get("chunk_ids"):
|
| 484 |
-
return get_error_data_result("`chunk_ids` is required")
|
| 485 |
query = {
|
| 486 |
"doc_ids": [doc.id], "page": 1, "size": 1024, "question": "", "sort": True}
|
| 487 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
| 488 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 489 |
if chunk_id not in sres.ids:
|
| 490 |
return get_error_data_result(f"Chunk {chunk_id} not found")
|
| 491 |
if not ELASTICSEARCH.deleteByQuery(
|
|
|
|
| 46 |
from rag.utils.storage_factory import STORAGE_IMPL
|
| 47 |
import os
|
| 48 |
|
| 49 |
+
MAXIMUM_OF_UPLOADING_FILES = 256
|
| 50 |
+
|
| 51 |
+
|
| 52 |
|
| 53 |
@manager.route('/datasets/<dataset_id>/documents', methods=['POST'])
|
| 54 |
@token_required
|
|
|
|
| 61 |
if file_obj.filename == '':
|
| 62 |
return get_result(
|
| 63 |
retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
| 64 |
+
# total size
|
| 65 |
+
total_size = 0
|
| 66 |
+
for file_obj in file_objs:
|
| 67 |
+
file_obj.seek(0, os.SEEK_END)
|
| 68 |
+
total_size += file_obj.tell()
|
| 69 |
+
file_obj.seek(0)
|
| 70 |
+
MAX_TOTAL_FILE_SIZE=10*1024*1024
|
| 71 |
+
if total_size > MAX_TOTAL_FILE_SIZE:
|
| 72 |
+
return get_result(
|
| 73 |
+
retmsg=f'Total file size exceeds 10MB limit! ({total_size / (1024 * 1024):.2f} MB)',
|
| 74 |
+
retcode=RetCode.ARGUMENT_ERROR)
|
| 75 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
| 76 |
if not e:
|
| 77 |
raise LookupError(f"Can't find the dataset with ID {dataset_id}!")
|
| 78 |
+
err, files= FileService.upload_document(kb, file_objs, tenant_id)
|
| 79 |
if err:
|
| 80 |
return get_result(
|
| 81 |
retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
|
|
|
|
| 153 |
if not e:
|
| 154 |
return get_error_data_result(retmsg="Document not found!")
|
| 155 |
req["parser_config"] = get_parser_config(req["chunk_method"], req.get("parser_config"))
|
| 156 |
+
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
| 157 |
if doc.token_num > 0:
|
| 158 |
e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
|
| 159 |
doc.process_duation * -1)
|
|
|
|
| 224 |
}
|
| 225 |
renamed_doc = {}
|
| 226 |
for key, value in doc.items():
|
|
|
|
|
|
|
| 227 |
new_key = key_mapping.get(key, key)
|
| 228 |
renamed_doc[new_key] = value
|
| 229 |
+
if key =="run":
|
| 230 |
+
renamed_doc["run"]=run_mapping.get(value)
|
| 231 |
renamed_doc_list.append(renamed_doc)
|
| 232 |
return get_result(data={"total": tol, "docs": renamed_doc_list})
|
| 233 |
|
|
|
|
| 294 |
doc = DocumentService.query(id=id,kb_id=dataset_id)
|
| 295 |
if not doc:
|
| 296 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
|
|
|
|
|
|
| 297 |
info = {"run": "1", "progress": 0}
|
| 298 |
info["progress_msg"] = ""
|
| 299 |
info["chunk_num"] = 0
|
| 300 |
info["token_num"] = 0
|
| 301 |
DocumentService.update_by_id(id, info)
|
|
|
|
| 302 |
ELASTICSEARCH.deleteByQuery(
|
| 303 |
Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
| 304 |
TaskService.filter_delete([Task.doc_id == id])
|
|
|
|
| 323 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
| 324 |
if doc[0].progress == 100.0 or doc[0].progress == 0.0:
|
| 325 |
return get_error_data_result("Can't stop parsing document with progress at 0 or 100")
|
| 326 |
+
info = {"run": "2", "progress": 0,"chunk_num":0}
|
| 327 |
DocumentService.update_by_id(id, info)
|
|
|
|
|
|
|
| 328 |
ELASTICSEARCH.deleteByQuery(
|
| 329 |
Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
| 330 |
return get_result()
|
|
|
|
| 364 |
doc=doc.to_dict()
|
| 365 |
renamed_doc = {}
|
| 366 |
for key, value in doc.items():
|
|
|
|
|
|
|
| 367 |
new_key = key_mapping.get(key, key)
|
| 368 |
renamed_doc[new_key] = value
|
| 369 |
+
if key == "run":
|
| 370 |
+
renamed_doc["run"] = run_mapping.get(str(value))
|
| 371 |
res = {"total": sres.total, "chunks": [], "doc": renamed_doc}
|
| 372 |
origin_chunks = []
|
| 373 |
sign = 0
|
|
|
|
| 407 |
"content_with_weight": "content",
|
| 408 |
"doc_id": "document_id",
|
| 409 |
"important_kwd": "important_keywords",
|
| 410 |
+
"img_id": "image_id",
|
| 411 |
+
"available_int":"available"
|
| 412 |
}
|
| 413 |
renamed_chunk = {}
|
| 414 |
for key, value in chunk.items():
|
| 415 |
new_key = key_mapping.get(key, key)
|
| 416 |
renamed_chunk[new_key] = value
|
| 417 |
+
if renamed_chunk["available"] == "0":
|
| 418 |
+
renamed_chunk["available"] = False
|
| 419 |
+
if renamed_chunk["available"] == "1":
|
| 420 |
+
renamed_chunk["available"] = True
|
| 421 |
res["chunks"].append(renamed_chunk)
|
| 422 |
return get_result(data=res)
|
| 423 |
|
|
|
|
| 455 |
embd_id = DocumentService.get_embd_id(document_id)
|
| 456 |
embd_mdl = TenantLLMService.model_instance(
|
| 457 |
tenant_id, LLMType.EMBEDDING.value, embd_id)
|
| 458 |
+
print(embd_mdl,flush=True)
|
| 459 |
v, c = embd_mdl.encode([doc.name, req["content"]])
|
| 460 |
v = 0.1 * v[0] + 0.9 * v[1]
|
| 461 |
d["q_%d_vec" % len(v)] = v.tolist()
|
|
|
|
| 473 |
"kb_id": "dataset_id",
|
| 474 |
"create_timestamp_flt": "create_timestamp",
|
| 475 |
"create_time": "create_time",
|
| 476 |
+
"document_keyword": "document"
|
| 477 |
}
|
| 478 |
renamed_chunk = {}
|
| 479 |
for key, value in d.items():
|
|
|
|
| 494 |
return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
|
| 495 |
doc = doc[0]
|
| 496 |
req = request.json
|
|
|
|
|
|
|
| 497 |
query = {
|
| 498 |
"doc_ids": [doc.id], "page": 1, "size": 1024, "question": "", "sort": True}
|
| 499 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
| 500 |
+
if not req:
|
| 501 |
+
chunk_ids=None
|
| 502 |
+
else:
|
| 503 |
+
chunk_ids=req.get("chunk_ids")
|
| 504 |
+
if not chunk_ids:
|
| 505 |
+
chunk_list=sres.ids
|
| 506 |
+
else:
|
| 507 |
+
chunk_list=chunk_ids
|
| 508 |
+
for chunk_id in chunk_list:
|
| 509 |
if chunk_id not in sres.ids:
|
| 510 |
return get_error_data_result(f"Chunk {chunk_id} not found")
|
| 511 |
if not ELASTICSEARCH.deleteByQuery(
|
api/apps/sdk/session.py
CHANGED
|
@@ -100,7 +100,7 @@ def completion(tenant_id,chat_id):
|
|
| 100 |
return get_error_data_result(retmsg="Session does not exist")
|
| 101 |
conv = conv[0]
|
| 102 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
| 103 |
-
return get_error_data_result(retmsg="You do not own the
|
| 104 |
msg = []
|
| 105 |
question = {
|
| 106 |
"content": req.get("question"),
|
|
@@ -168,9 +168,6 @@ def list(chat_id,tenant_id):
|
|
| 168 |
return get_error_data_result(retmsg=f"You don't own the assistant {chat_id}.")
|
| 169 |
id = request.args.get("id")
|
| 170 |
name = request.args.get("name")
|
| 171 |
-
session = ConversationService.query(id=id,name=name,dialog_id=chat_id)
|
| 172 |
-
if not session:
|
| 173 |
-
return get_error_data_result(retmsg="The session doesn't exist")
|
| 174 |
page_number = int(request.args.get("page", 1))
|
| 175 |
items_per_page = int(request.args.get("page_size", 1024))
|
| 176 |
orderby = request.args.get("orderby", "create_time")
|
|
@@ -183,6 +180,10 @@ def list(chat_id,tenant_id):
|
|
| 183 |
return get_result(data=[])
|
| 184 |
for conv in convs:
|
| 185 |
conv['messages'] = conv.pop("message")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
conv["chat"] = conv.pop("dialog_id")
|
| 187 |
if conv["reference"]:
|
| 188 |
messages = conv["messages"]
|
|
@@ -218,10 +219,20 @@ def list(chat_id,tenant_id):
|
|
| 218 |
def delete(tenant_id,chat_id):
|
| 219 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
| 220 |
return get_error_data_result(retmsg="You don't own the chat")
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
if not ids:
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
conv = ConversationService.query(id=id,dialog_id=chat_id)
|
| 226 |
if not conv:
|
| 227 |
return get_error_data_result(retmsg="The chat doesn't own the session")
|
|
|
|
| 100 |
return get_error_data_result(retmsg="Session does not exist")
|
| 101 |
conv = conv[0]
|
| 102 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
| 103 |
+
return get_error_data_result(retmsg="You do not own the chat")
|
| 104 |
msg = []
|
| 105 |
question = {
|
| 106 |
"content": req.get("question"),
|
|
|
|
| 168 |
return get_error_data_result(retmsg=f"You don't own the assistant {chat_id}.")
|
| 169 |
id = request.args.get("id")
|
| 170 |
name = request.args.get("name")
|
|
|
|
|
|
|
|
|
|
| 171 |
page_number = int(request.args.get("page", 1))
|
| 172 |
items_per_page = int(request.args.get("page_size", 1024))
|
| 173 |
orderby = request.args.get("orderby", "create_time")
|
|
|
|
| 180 |
return get_result(data=[])
|
| 181 |
for conv in convs:
|
| 182 |
conv['messages'] = conv.pop("message")
|
| 183 |
+
infos = conv["messages"]
|
| 184 |
+
for info in infos:
|
| 185 |
+
if "prompt" in info:
|
| 186 |
+
info.pop("prompt")
|
| 187 |
conv["chat"] = conv.pop("dialog_id")
|
| 188 |
if conv["reference"]:
|
| 189 |
messages = conv["messages"]
|
|
|
|
| 219 |
def delete(tenant_id,chat_id):
|
| 220 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
| 221 |
return get_error_data_result(retmsg="You don't own the chat")
|
| 222 |
+
req = request.json
|
| 223 |
+
convs = ConversationService.query(dialog_id=chat_id)
|
| 224 |
+
if not req:
|
| 225 |
+
ids = None
|
| 226 |
+
else:
|
| 227 |
+
ids=req.get("ids")
|
| 228 |
+
|
| 229 |
if not ids:
|
| 230 |
+
conv_list = []
|
| 231 |
+
for conv in convs:
|
| 232 |
+
conv_list.append(conv.id)
|
| 233 |
+
else:
|
| 234 |
+
conv_list=ids
|
| 235 |
+
for id in conv_list:
|
| 236 |
conv = ConversationService.query(id=id,dialog_id=chat_id)
|
| 237 |
if not conv:
|
| 238 |
return get_error_data_result(retmsg="The chat doesn't own the session")
|
api/utils/api_utils.py
CHANGED
|
@@ -344,7 +344,7 @@ def get_parser_config(chunk_method,parser_config):
|
|
| 344 |
return parser_config
|
| 345 |
if not chunk_method:
|
| 346 |
chunk_method = "naive"
|
| 347 |
-
key_mapping={"naive":{"chunk_token_num": 128, "delimiter": "\\n!?;。;!?", "html4excel": False,"layout_recognize": True, "raptor": {"
|
| 348 |
"qa":{"raptor":{"use_raptor":False}},
|
| 349 |
"resume":None,
|
| 350 |
"manual":{"raptor":{"use_raptor":False}},
|
|
|
|
| 344 |
return parser_config
|
| 345 |
if not chunk_method:
|
| 346 |
chunk_method = "naive"
|
| 347 |
+
key_mapping={"naive":{"chunk_token_num": 128, "delimiter": "\\n!?;。;!?", "html4excel": False,"layout_recognize": True, "raptor": {"use_raptor": False}},
|
| 348 |
"qa":{"raptor":{"use_raptor":False}},
|
| 349 |
"resume":None,
|
| 350 |
"manual":{"raptor":{"use_raptor":False}},
|
sdk/python/ragflow/modules/chat.py
CHANGED
|
@@ -68,7 +68,7 @@ class Chat(Base):
|
|
| 68 |
return result_list
|
| 69 |
raise Exception(res["message"])
|
| 70 |
|
| 71 |
-
def delete_sessions(self,ids):
|
| 72 |
res = self.rm(f"/chats/{self.id}/sessions", {"ids": ids})
|
| 73 |
res = res.json()
|
| 74 |
if res.get("code") != 0:
|
|
|
|
| 68 |
return result_list
|
| 69 |
raise Exception(res["message"])
|
| 70 |
|
| 71 |
+
def delete_sessions(self,ids:List[str]=None):
|
| 72 |
res = self.rm(f"/chats/{self.id}/sessions", {"ids": ids})
|
| 73 |
res = res.json()
|
| 74 |
if res.get("code") != 0:
|
sdk/python/ragflow/ragflow.py
CHANGED
|
@@ -64,7 +64,7 @@ class RAGFlow:
|
|
| 64 |
return DataSet(self, res["data"])
|
| 65 |
raise Exception(res["message"])
|
| 66 |
|
| 67 |
-
def delete_datasets(self, ids: List[str]):
|
| 68 |
res = self.delete("/datasets",{"ids": ids})
|
| 69 |
res=res.json()
|
| 70 |
if res.get("code") != 0:
|
|
@@ -135,9 +135,9 @@ class RAGFlow:
|
|
| 135 |
return Chat(self, res["data"])
|
| 136 |
raise Exception(res["message"])
|
| 137 |
|
| 138 |
-
def delete_chats(self,ids: List[str] = None
|
| 139 |
res = self.delete('/chats',
|
| 140 |
-
{"ids":ids
|
| 141 |
res = res.json()
|
| 142 |
if res.get("code") != 0:
|
| 143 |
raise Exception(res["message"])
|
|
|
|
| 64 |
return DataSet(self, res["data"])
|
| 65 |
raise Exception(res["message"])
|
| 66 |
|
| 67 |
+
def delete_datasets(self, ids: List[str] = None):
|
| 68 |
res = self.delete("/datasets",{"ids": ids})
|
| 69 |
res=res.json()
|
| 70 |
if res.get("code") != 0:
|
|
|
|
| 135 |
return Chat(self, res["data"])
|
| 136 |
raise Exception(res["message"])
|
| 137 |
|
| 138 |
+
def delete_chats(self,ids: List[str] = None) -> bool:
|
| 139 |
res = self.delete('/chats',
|
| 140 |
+
{"ids":ids})
|
| 141 |
res = res.json()
|
| 142 |
if res.get("code") != 0:
|
| 143 |
raise Exception(res["message"])
|