| | from typing import List, Optional |
| | from pydantic import BaseModel |
| | from fastapi import APIRouter, Depends, HTTPException, status, Request, Query |
| | from fastapi.responses import StreamingResponse |
| | from fastapi.concurrency import run_in_threadpool |
| | import logging |
| | import io |
| | import zipfile |
| |
|
| | from sqlalchemy.orm import Session |
| | from open_webui.internal.db import get_session |
| | from open_webui.models.groups import Groups |
| | from open_webui.models.knowledge import ( |
| | KnowledgeFileListResponse, |
| | Knowledges, |
| | KnowledgeForm, |
| | KnowledgeResponse, |
| | KnowledgeUserResponse, |
| | ) |
| | from open_webui.models.files import Files, FileModel, FileMetadataResponse |
| | from open_webui.retrieval.vector.factory import VECTOR_DB_CLIENT |
| | from open_webui.routers.retrieval import ( |
| | process_file, |
| | ProcessFileForm, |
| | process_files_batch, |
| | BatchProcessFilesForm, |
| | ) |
| | from open_webui.storage.provider import Storage |
| |
|
| | from open_webui.constants import ERROR_MESSAGES |
| | from open_webui.utils.auth import get_verified_user, get_admin_user |
| | from open_webui.utils.access_control import has_permission |
| | from open_webui.models.access_grants import AccessGrants, has_public_read_access_grant |
| |
|
| |
|
| | from open_webui.config import BYPASS_ADMIN_ACCESS_CONTROL |
| | from open_webui.models.models import Models, ModelForm |
| |
|
| | log = logging.getLogger(__name__) |
| |
|
| | router = APIRouter() |
| |
|
| | |
| | |
| | |
| |
|
| | PAGE_ITEM_COUNT = 30 |
| |
|
| | |
| | |
| | |
| |
|
| | KNOWLEDGE_BASES_COLLECTION = "knowledge-bases" |
| |
|
| |
|
| | async def embed_knowledge_base_metadata( |
| | request: Request, |
| | knowledge_base_id: str, |
| | name: str, |
| | description: str, |
| | ) -> bool: |
| | """Generate and store embedding for knowledge base.""" |
| | try: |
| | content = f"{name}\n\n{description}" if description else name |
| | embedding = await request.app.state.EMBEDDING_FUNCTION(content) |
| | VECTOR_DB_CLIENT.upsert( |
| | collection_name=KNOWLEDGE_BASES_COLLECTION, |
| | items=[ |
| | { |
| | "id": knowledge_base_id, |
| | "text": content, |
| | "vector": embedding, |
| | "metadata": { |
| | "knowledge_base_id": knowledge_base_id, |
| | }, |
| | } |
| | ], |
| | ) |
| | return True |
| | except Exception as e: |
| | log.error(f"Failed to embed knowledge base {knowledge_base_id}: {e}") |
| | return False |
| |
|
| |
|
| | def remove_knowledge_base_metadata_embedding(knowledge_base_id: str) -> bool: |
| | """Remove knowledge base embedding.""" |
| | try: |
| | VECTOR_DB_CLIENT.delete( |
| | collection_name=KNOWLEDGE_BASES_COLLECTION, |
| | ids=[knowledge_base_id], |
| | ) |
| | return True |
| | except Exception as e: |
| | log.debug(f"Failed to remove embedding for {knowledge_base_id}: {e}") |
| | return False |
| |
|
| |
|
| | class KnowledgeAccessResponse(KnowledgeUserResponse): |
| | write_access: Optional[bool] = False |
| |
|
| |
|
| | class KnowledgeAccessListResponse(BaseModel): |
| | items: list[KnowledgeAccessResponse] |
| | total: int |
| |
|
| |
|
| | @router.get("/", response_model=KnowledgeAccessListResponse) |
| | async def get_knowledge_bases( |
| | page: Optional[int] = 1, |
| | user=Depends(get_verified_user), |
| | db: Session = Depends(get_session), |
| | ): |
| | page = max(page, 1) |
| | limit = PAGE_ITEM_COUNT |
| | skip = (page - 1) * limit |
| |
|
| | filter = {} |
| | groups = Groups.get_groups_by_member_id(user.id, db=db) |
| | user_group_ids = {group.id for group in groups} |
| |
|
| | if not user.role == "admin" or not BYPASS_ADMIN_ACCESS_CONTROL: |
| | if groups: |
| | filter["group_ids"] = [group.id for group in groups] |
| |
|
| | filter["user_id"] = user.id |
| |
|
| | result = Knowledges.search_knowledge_bases( |
| | user.id, filter=filter, skip=skip, limit=limit, db=db |
| | ) |
| |
|
| | |
| | knowledge_base_ids = [knowledge_base.id for knowledge_base in result.items] |
| | writable_knowledge_base_ids = AccessGrants.get_accessible_resource_ids( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_ids=knowledge_base_ids, |
| | permission="write", |
| | user_group_ids=user_group_ids, |
| | db=db, |
| | ) |
| |
|
| | return KnowledgeAccessListResponse( |
| | items=[ |
| | KnowledgeAccessResponse( |
| | **knowledge_base.model_dump(), |
| | write_access=( |
| | user.id == knowledge_base.user_id |
| | or (user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL) |
| | or knowledge_base.id in writable_knowledge_base_ids |
| | ), |
| | ) |
| | for knowledge_base in result.items |
| | ], |
| | total=result.total, |
| | ) |
| |
|
| |
|
| | @router.get("/search", response_model=KnowledgeAccessListResponse) |
| | async def search_knowledge_bases( |
| | query: Optional[str] = None, |
| | view_option: Optional[str] = None, |
| | page: Optional[int] = 1, |
| | user=Depends(get_verified_user), |
| | db: Session = Depends(get_session), |
| | ): |
| | page = max(page, 1) |
| | limit = PAGE_ITEM_COUNT |
| | skip = (page - 1) * limit |
| |
|
| | filter = {} |
| | if query: |
| | filter["query"] = query |
| | if view_option: |
| | filter["view_option"] = view_option |
| |
|
| | groups = Groups.get_groups_by_member_id(user.id, db=db) |
| | user_group_ids = {group.id for group in groups} |
| |
|
| | if not user.role == "admin" or not BYPASS_ADMIN_ACCESS_CONTROL: |
| | if groups: |
| | filter["group_ids"] = [group.id for group in groups] |
| |
|
| | filter["user_id"] = user.id |
| |
|
| | result = Knowledges.search_knowledge_bases( |
| | user.id, filter=filter, skip=skip, limit=limit, db=db |
| | ) |
| |
|
| | |
| | knowledge_base_ids = [knowledge_base.id for knowledge_base in result.items] |
| | writable_knowledge_base_ids = AccessGrants.get_accessible_resource_ids( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_ids=knowledge_base_ids, |
| | permission="write", |
| | user_group_ids=user_group_ids, |
| | db=db, |
| | ) |
| |
|
| | return KnowledgeAccessListResponse( |
| | items=[ |
| | KnowledgeAccessResponse( |
| | **knowledge_base.model_dump(), |
| | write_access=( |
| | user.id == knowledge_base.user_id |
| | or (user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL) |
| | or knowledge_base.id in writable_knowledge_base_ids |
| | ), |
| | ) |
| | for knowledge_base in result.items |
| | ], |
| | total=result.total, |
| | ) |
| |
|
| |
|
| | @router.get("/search/files", response_model=KnowledgeFileListResponse) |
| | async def search_knowledge_files( |
| | query: Optional[str] = None, |
| | page: Optional[int] = 1, |
| | user=Depends(get_verified_user), |
| | db: Session = Depends(get_session), |
| | ): |
| | page = max(page, 1) |
| | limit = PAGE_ITEM_COUNT |
| | skip = (page - 1) * limit |
| |
|
| | filter = {} |
| | if query: |
| | filter["query"] = query |
| |
|
| | groups = Groups.get_groups_by_member_id(user.id, db=db) |
| | if groups: |
| | filter["group_ids"] = [group.id for group in groups] |
| |
|
| | filter["user_id"] = user.id |
| |
|
| | return Knowledges.search_knowledge_files( |
| | filter=filter, skip=skip, limit=limit, db=db |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | @router.post("/create", response_model=Optional[KnowledgeResponse]) |
| | async def create_new_knowledge( |
| | request: Request, |
| | form_data: KnowledgeForm, |
| | user=Depends(get_verified_user), |
| | ): |
| | |
| | |
| | |
| | |
| | if user.role != "admin" and not has_permission( |
| | user.id, "workspace.knowledge", request.app.state.config.USER_PERMISSIONS |
| | ): |
| | raise HTTPException( |
| | status_code=status.HTTP_401_UNAUTHORIZED, |
| | detail=ERROR_MESSAGES.UNAUTHORIZED, |
| | ) |
| |
|
| | |
| | if ( |
| | user.role != "admin" |
| | and has_public_read_access_grant(form_data.access_grants) |
| | and not has_permission( |
| | user.id, |
| | "sharing.public_knowledge", |
| | request.app.state.config.USER_PERMISSIONS, |
| | ) |
| | ): |
| | form_data.access_grants = [] |
| |
|
| | knowledge = Knowledges.insert_new_knowledge(user.id, form_data) |
| |
|
| | if knowledge: |
| | |
| | await embed_knowledge_base_metadata( |
| | request, |
| | knowledge.id, |
| | knowledge.name, |
| | knowledge.description, |
| | ) |
| | return knowledge |
| | else: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.FILE_EXISTS, |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | @router.post("/reindex", response_model=bool) |
| | async def reindex_knowledge_files( |
| | request: Request, |
| | user=Depends(get_verified_user), |
| | db: Session = Depends(get_session), |
| | ): |
| | if user.role != "admin": |
| | raise HTTPException( |
| | status_code=status.HTTP_401_UNAUTHORIZED, |
| | detail=ERROR_MESSAGES.UNAUTHORIZED, |
| | ) |
| |
|
| | knowledge_bases = Knowledges.get_knowledge_bases(db=db) |
| |
|
| | log.info(f"Starting reindexing for {len(knowledge_bases)} knowledge bases") |
| |
|
| | for knowledge_base in knowledge_bases: |
| | try: |
| | files = Knowledges.get_files_by_id(knowledge_base.id, db=db) |
| | try: |
| | if VECTOR_DB_CLIENT.has_collection(collection_name=knowledge_base.id): |
| | VECTOR_DB_CLIENT.delete_collection( |
| | collection_name=knowledge_base.id |
| | ) |
| | except Exception as e: |
| | log.error(f"Error deleting collection {knowledge_base.id}: {str(e)}") |
| | continue |
| |
|
| | failed_files = [] |
| | for file in files: |
| | try: |
| | await run_in_threadpool( |
| | process_file, |
| | request, |
| | ProcessFileForm( |
| | file_id=file.id, collection_name=knowledge_base.id |
| | ), |
| | user=user, |
| | db=db, |
| | ) |
| | except Exception as e: |
| | log.error( |
| | f"Error processing file {file.filename} (ID: {file.id}): {str(e)}" |
| | ) |
| | failed_files.append({"file_id": file.id, "error": str(e)}) |
| | continue |
| |
|
| | except Exception as e: |
| | log.error(f"Error processing knowledge base {knowledge_base.id}: {str(e)}") |
| | |
| | continue |
| |
|
| | if failed_files: |
| | log.warning( |
| | f"Failed to process {len(failed_files)} files in knowledge base {knowledge_base.id}" |
| | ) |
| | for failed in failed_files: |
| | log.warning(f"File ID: {failed['file_id']}, Error: {failed['error']}") |
| |
|
| | log.info(f"Reindexing completed.") |
| | return True |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | @router.post("/metadata/reindex", response_model=dict) |
| | async def reindex_knowledge_base_metadata_embeddings( |
| | request: Request, |
| | user=Depends(get_admin_user), |
| | ): |
| | """Batch embed all existing knowledge bases. Admin only. |
| | |
| | NOTE: We intentionally do NOT use Depends(get_session) here. |
| | This endpoint loops through ALL knowledge bases and calls embed_knowledge_base_metadata() |
| | for each one, making N external embedding API calls. Holding a session during |
| | this entire operation would exhaust the connection pool. |
| | """ |
| | knowledge_bases = Knowledges.get_knowledge_bases() |
| | log.info(f"Reindexing embeddings for {len(knowledge_bases)} knowledge bases") |
| |
|
| | success_count = 0 |
| | for kb in knowledge_bases: |
| | if await embed_knowledge_base_metadata(request, kb.id, kb.name, kb.description): |
| | success_count += 1 |
| |
|
| | log.info(f"Embedding reindex complete: {success_count}/{len(knowledge_bases)}") |
| | return {"total": len(knowledge_bases), "success": success_count} |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | class KnowledgeFilesResponse(KnowledgeResponse): |
| | files: Optional[list[FileMetadataResponse]] = None |
| | write_access: Optional[bool] = False |
| |
|
| |
|
| | @router.get("/{id}", response_model=Optional[KnowledgeFilesResponse]) |
| | async def get_knowledge_by_id( |
| | id: str, user=Depends(get_verified_user), db: Session = Depends(get_session) |
| | ): |
| | knowledge = Knowledges.get_knowledge_by_id(id=id, db=db) |
| |
|
| | if knowledge: |
| | if ( |
| | user.role == "admin" |
| | or knowledge.user_id == user.id |
| | or AccessGrants.has_access( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_id=knowledge.id, |
| | permission="read", |
| | db=db, |
| | ) |
| | ): |
| |
|
| | return KnowledgeFilesResponse( |
| | **knowledge.model_dump(), |
| | write_access=( |
| | user.id == knowledge.user_id |
| | or (user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL) |
| | or AccessGrants.has_access( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_id=knowledge.id, |
| | permission="write", |
| | db=db, |
| | ) |
| | ), |
| | ) |
| | else: |
| | raise HTTPException( |
| | status_code=status.HTTP_401_UNAUTHORIZED, |
| | detail=ERROR_MESSAGES.ACCESS_PROHIBITED, |
| | ) |
| | else: |
| | raise HTTPException( |
| | status_code=status.HTTP_404_NOT_FOUND, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | @router.post("/{id}/update", response_model=Optional[KnowledgeFilesResponse]) |
| | async def update_knowledge_by_id( |
| | request: Request, |
| | id: str, |
| | form_data: KnowledgeForm, |
| | user=Depends(get_verified_user), |
| | ): |
| | |
| | |
| | |
| | |
| | knowledge = Knowledges.get_knowledge_by_id(id=id) |
| | if not knowledge: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| | |
| | if ( |
| | knowledge.user_id != user.id |
| | and not AccessGrants.has_access( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_id=knowledge.id, |
| | permission="write", |
| | ) |
| | and user.role != "admin" |
| | ): |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.ACCESS_PROHIBITED, |
| | ) |
| |
|
| | |
| | if ( |
| | user.role != "admin" |
| | and has_public_read_access_grant(form_data.access_grants) |
| | and not has_permission( |
| | user.id, |
| | "sharing.public_knowledge", |
| | request.app.state.config.USER_PERMISSIONS, |
| | ) |
| | ): |
| | form_data.access_grants = [] |
| |
|
| | knowledge = Knowledges.update_knowledge_by_id(id=id, form_data=form_data) |
| | if knowledge: |
| | |
| | await embed_knowledge_base_metadata( |
| | request, |
| | knowledge.id, |
| | knowledge.name, |
| | knowledge.description, |
| | ) |
| | return KnowledgeFilesResponse( |
| | **knowledge.model_dump(), |
| | files=Knowledges.get_file_metadatas_by_id(knowledge.id), |
| | ) |
| | else: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.ID_TAKEN, |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | class KnowledgeAccessGrantsForm(BaseModel): |
| | access_grants: list[dict] |
| |
|
| |
|
| | @router.post("/{id}/access/update", response_model=Optional[KnowledgeFilesResponse]) |
| | async def update_knowledge_access_by_id( |
| | request: Request, |
| | id: str, |
| | form_data: KnowledgeAccessGrantsForm, |
| | user=Depends(get_verified_user), |
| | db: Session = Depends(get_session), |
| | ): |
| | knowledge = Knowledges.get_knowledge_by_id(id=id, db=db) |
| | if not knowledge: |
| | raise HTTPException( |
| | status_code=status.HTTP_404_NOT_FOUND, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| | if ( |
| | knowledge.user_id != user.id |
| | and not AccessGrants.has_access( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_id=knowledge.id, |
| | permission="write", |
| | db=db, |
| | ) |
| | and user.role != "admin" |
| | ): |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.ACCESS_PROHIBITED, |
| | ) |
| |
|
| | |
| | if ( |
| | user.role != "admin" |
| | and has_public_read_access_grant(form_data.access_grants) |
| | and not has_permission( |
| | user.id, |
| | "sharing.public_knowledge", |
| | request.app.state.config.USER_PERMISSIONS, |
| | ) |
| | ): |
| | form_data.access_grants = [ |
| | grant |
| | for grant in form_data.access_grants |
| | if not ( |
| | grant.get("principal_type") == "user" |
| | and grant.get("principal_id") == "*" |
| | ) |
| | ] |
| |
|
| | AccessGrants.set_access_grants("knowledge", id, form_data.access_grants, db=db) |
| |
|
| | return KnowledgeFilesResponse( |
| | **Knowledges.get_knowledge_by_id(id=id, db=db).model_dump(), |
| | files=Knowledges.get_file_metadatas_by_id(id, db=db), |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | @router.get("/{id}/files", response_model=KnowledgeFileListResponse) |
| | async def get_knowledge_files_by_id( |
| | id: str, |
| | query: Optional[str] = None, |
| | view_option: Optional[str] = None, |
| | order_by: Optional[str] = None, |
| | direction: Optional[str] = None, |
| | page: Optional[int] = 1, |
| | user=Depends(get_verified_user), |
| | db: Session = Depends(get_session), |
| | ): |
| |
|
| | knowledge = Knowledges.get_knowledge_by_id(id=id, db=db) |
| | if not knowledge: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| | if not ( |
| | user.role == "admin" |
| | or knowledge.user_id == user.id |
| | or AccessGrants.has_access( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_id=knowledge.id, |
| | permission="read", |
| | db=db, |
| | ) |
| | ): |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.ACCESS_PROHIBITED, |
| | ) |
| |
|
| | page = max(page, 1) |
| |
|
| | limit = 30 |
| | skip = (page - 1) * limit |
| |
|
| | filter = {} |
| | if query: |
| | filter["query"] = query |
| | if view_option: |
| | filter["view_option"] = view_option |
| | if order_by: |
| | filter["order_by"] = order_by |
| | if direction: |
| | filter["direction"] = direction |
| |
|
| | return Knowledges.search_files_by_id( |
| | id, user.id, filter=filter, skip=skip, limit=limit, db=db |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | class KnowledgeFileIdForm(BaseModel): |
| | file_id: str |
| |
|
| |
|
| | @router.post("/{id}/file/add", response_model=Optional[KnowledgeFilesResponse]) |
| | def add_file_to_knowledge_by_id( |
| | request: Request, |
| | id: str, |
| | form_data: KnowledgeFileIdForm, |
| | user=Depends(get_verified_user), |
| | db: Session = Depends(get_session), |
| | ): |
| | knowledge = Knowledges.get_knowledge_by_id(id=id, db=db) |
| | if not knowledge: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| | if ( |
| | knowledge.user_id != user.id |
| | and not AccessGrants.has_access( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_id=knowledge.id, |
| | permission="write", |
| | db=db, |
| | ) |
| | and user.role != "admin" |
| | ): |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.ACCESS_PROHIBITED, |
| | ) |
| |
|
| | file = Files.get_file_by_id(form_data.file_id, db=db) |
| | if not file: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| | if not file.data: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.FILE_NOT_PROCESSED, |
| | ) |
| |
|
| | |
| | try: |
| | process_file( |
| | request, |
| | ProcessFileForm(file_id=form_data.file_id, collection_name=id), |
| | user=user, |
| | db=db, |
| | ) |
| |
|
| | |
| | Knowledges.add_file_to_knowledge_by_id( |
| | knowledge_id=id, file_id=form_data.file_id, user_id=user.id, db=db |
| | ) |
| | except Exception as e: |
| | log.debug(e) |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=str(e), |
| | ) |
| |
|
| | if knowledge: |
| | return KnowledgeFilesResponse( |
| | **knowledge.model_dump(), |
| | files=Knowledges.get_file_metadatas_by_id(knowledge.id, db=db), |
| | ) |
| | else: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| |
|
| | @router.post("/{id}/file/update", response_model=Optional[KnowledgeFilesResponse]) |
| | def update_file_from_knowledge_by_id( |
| | request: Request, |
| | id: str, |
| | form_data: KnowledgeFileIdForm, |
| | user=Depends(get_verified_user), |
| | db: Session = Depends(get_session), |
| | ): |
| | knowledge = Knowledges.get_knowledge_by_id(id=id, db=db) |
| | if not knowledge: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| | if ( |
| | knowledge.user_id != user.id |
| | and not AccessGrants.has_access( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_id=knowledge.id, |
| | permission="write", |
| | db=db, |
| | ) |
| | and user.role != "admin" |
| | ): |
| |
|
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.ACCESS_PROHIBITED, |
| | ) |
| |
|
| | file = Files.get_file_by_id(form_data.file_id, db=db) |
| | if not file: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| | |
| | VECTOR_DB_CLIENT.delete( |
| | collection_name=knowledge.id, filter={"file_id": form_data.file_id} |
| | ) |
| |
|
| | |
| | try: |
| | process_file( |
| | request, |
| | ProcessFileForm(file_id=form_data.file_id, collection_name=id), |
| | user=user, |
| | db=db, |
| | ) |
| | except Exception as e: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=str(e), |
| | ) |
| |
|
| | if knowledge: |
| | return KnowledgeFilesResponse( |
| | **knowledge.model_dump(), |
| | files=Knowledges.get_file_metadatas_by_id(knowledge.id, db=db), |
| | ) |
| | else: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | @router.post("/{id}/file/remove", response_model=Optional[KnowledgeFilesResponse]) |
| | def remove_file_from_knowledge_by_id( |
| | id: str, |
| | form_data: KnowledgeFileIdForm, |
| | delete_file: bool = Query(True), |
| | user=Depends(get_verified_user), |
| | db: Session = Depends(get_session), |
| | ): |
| | knowledge = Knowledges.get_knowledge_by_id(id=id, db=db) |
| | if not knowledge: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| | if ( |
| | knowledge.user_id != user.id |
| | and not AccessGrants.has_access( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_id=knowledge.id, |
| | permission="write", |
| | db=db, |
| | ) |
| | and user.role != "admin" |
| | ): |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.ACCESS_PROHIBITED, |
| | ) |
| |
|
| | file = Files.get_file_by_id(form_data.file_id, db=db) |
| | if not file: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| | Knowledges.remove_file_from_knowledge_by_id( |
| | knowledge_id=id, file_id=form_data.file_id, db=db |
| | ) |
| |
|
| | |
| | try: |
| | VECTOR_DB_CLIENT.delete( |
| | collection_name=knowledge.id, filter={"file_id": form_data.file_id} |
| | ) |
| |
|
| | VECTOR_DB_CLIENT.delete( |
| | collection_name=knowledge.id, filter={"hash": file.hash} |
| | ) |
| | except Exception as e: |
| | log.debug("This was most likely caused by bypassing embedding processing") |
| | log.debug(e) |
| | pass |
| |
|
| | if delete_file: |
| | try: |
| | |
| | file_collection = f"file-{form_data.file_id}" |
| | if VECTOR_DB_CLIENT.has_collection(collection_name=file_collection): |
| | VECTOR_DB_CLIENT.delete_collection(collection_name=file_collection) |
| | except Exception as e: |
| | log.debug("This was most likely caused by bypassing embedding processing") |
| | log.debug(e) |
| | pass |
| |
|
| | |
| | Files.delete_file_by_id(form_data.file_id, db=db) |
| |
|
| | if knowledge: |
| | return KnowledgeFilesResponse( |
| | **knowledge.model_dump(), |
| | files=Knowledges.get_file_metadatas_by_id(knowledge.id, db=db), |
| | ) |
| | else: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | @router.delete("/{id}/delete", response_model=bool) |
| | async def delete_knowledge_by_id( |
| | id: str, user=Depends(get_verified_user), db: Session = Depends(get_session) |
| | ): |
| | knowledge = Knowledges.get_knowledge_by_id(id=id, db=db) |
| | if not knowledge: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| | if ( |
| | knowledge.user_id != user.id |
| | and not AccessGrants.has_access( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_id=knowledge.id, |
| | permission="write", |
| | db=db, |
| | ) |
| | and user.role != "admin" |
| | ): |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.ACCESS_PROHIBITED, |
| | ) |
| |
|
| | log.info(f"Deleting knowledge base: {id} (name: {knowledge.name})") |
| |
|
| | |
| | models = Models.get_all_models(db=db) |
| | log.info(f"Found {len(models)} models to check for knowledge base {id}") |
| |
|
| | |
| | for model in models: |
| | if model.meta and hasattr(model.meta, "knowledge"): |
| | knowledge_list = model.meta.knowledge or [] |
| | |
| | updated_knowledge = [k for k in knowledge_list if k.get("id") != id] |
| |
|
| | |
| | if len(updated_knowledge) != len(knowledge_list): |
| | log.info(f"Updating model {model.id} to remove knowledge base {id}") |
| | model.meta.knowledge = updated_knowledge |
| | |
| | model_form = ModelForm( |
| | id=model.id, |
| | name=model.name, |
| | base_model_id=model.base_model_id, |
| | meta=model.meta, |
| | params=model.params, |
| | access_grants=model.access_grants, |
| | is_active=model.is_active, |
| | ) |
| | Models.update_model_by_id(model.id, model_form, db=db) |
| |
|
| | |
| | try: |
| | VECTOR_DB_CLIENT.delete_collection(collection_name=id) |
| | except Exception as e: |
| | log.debug(e) |
| | pass |
| |
|
| | |
| | remove_knowledge_base_metadata_embedding(id) |
| |
|
| | result = Knowledges.delete_knowledge_by_id(id=id, db=db) |
| | return result |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | @router.post("/{id}/reset", response_model=Optional[KnowledgeResponse]) |
| | async def reset_knowledge_by_id( |
| | id: str, user=Depends(get_verified_user), db: Session = Depends(get_session) |
| | ): |
| | knowledge = Knowledges.get_knowledge_by_id(id=id, db=db) |
| | if not knowledge: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| | if ( |
| | knowledge.user_id != user.id |
| | and not AccessGrants.has_access( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_id=knowledge.id, |
| | permission="write", |
| | db=db, |
| | ) |
| | and user.role != "admin" |
| | ): |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.ACCESS_PROHIBITED, |
| | ) |
| |
|
| | try: |
| | VECTOR_DB_CLIENT.delete_collection(collection_name=id) |
| | except Exception as e: |
| | log.debug(e) |
| | pass |
| |
|
| | knowledge = Knowledges.reset_knowledge_by_id(id=id, db=db) |
| | return knowledge |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | @router.post("/{id}/files/batch/add", response_model=Optional[KnowledgeFilesResponse]) |
| | async def add_files_to_knowledge_batch( |
| | request: Request, |
| | id: str, |
| | form_data: list[KnowledgeFileIdForm], |
| | user=Depends(get_verified_user), |
| | db: Session = Depends(get_session), |
| | ): |
| | """ |
| | Add multiple files to a knowledge base |
| | """ |
| | knowledge = Knowledges.get_knowledge_by_id(id=id, db=db) |
| | if not knowledge: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| | if ( |
| | knowledge.user_id != user.id |
| | and not AccessGrants.has_access( |
| | user_id=user.id, |
| | resource_type="knowledge", |
| | resource_id=knowledge.id, |
| | permission="write", |
| | db=db, |
| | ) |
| | and user.role != "admin" |
| | ): |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=ERROR_MESSAGES.ACCESS_PROHIBITED, |
| | ) |
| |
|
| | |
| | log.info(f"files/batch/add - {len(form_data)} files") |
| | file_ids = [form.file_id for form in form_data] |
| | files = Files.get_files_by_ids(file_ids, db=db) |
| |
|
| | |
| | found_ids = {file.id for file in files} |
| | missing_ids = [fid for fid in file_ids if fid not in found_ids] |
| | if missing_ids: |
| | raise HTTPException( |
| | status_code=status.HTTP_400_BAD_REQUEST, |
| | detail=f"File {missing_ids[0]} not found", |
| | ) |
| |
|
| | |
| | try: |
| | result = await process_files_batch( |
| | request=request, |
| | form_data=BatchProcessFilesForm(files=files, collection_name=id), |
| | user=user, |
| | db=db, |
| | ) |
| | except Exception as e: |
| | log.error( |
| | f"add_files_to_knowledge_batch: Exception occurred: {e}", exc_info=True |
| | ) |
| | raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) |
| |
|
| | |
| | successful_file_ids = [r.file_id for r in result.results if r.status == "completed"] |
| | for file_id in successful_file_ids: |
| | Knowledges.add_file_to_knowledge_by_id( |
| | knowledge_id=id, file_id=file_id, user_id=user.id, db=db |
| | ) |
| |
|
| | |
| | if result.errors: |
| | error_details = [f"{err.file_id}: {err.error}" for err in result.errors] |
| | return KnowledgeFilesResponse( |
| | **knowledge.model_dump(), |
| | files=Knowledges.get_file_metadatas_by_id(knowledge.id, db=db), |
| | warnings={ |
| | "message": "Some files failed to process", |
| | "errors": error_details, |
| | }, |
| | ) |
| |
|
| | return KnowledgeFilesResponse( |
| | **knowledge.model_dump(), |
| | files=Knowledges.get_file_metadatas_by_id(knowledge.id, db=db), |
| | ) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | @router.get("/{id}/export") |
| | async def export_knowledge_by_id( |
| | id: str, user=Depends(get_admin_user), db: Session = Depends(get_session) |
| | ): |
| | """ |
| | Export a knowledge base as a zip file containing .txt files. |
| | Admin only. |
| | """ |
| |
|
| | knowledge = Knowledges.get_knowledge_by_id(id=id, db=db) |
| | if not knowledge: |
| | raise HTTPException( |
| | status_code=status.HTTP_404_NOT_FOUND, |
| | detail=ERROR_MESSAGES.NOT_FOUND, |
| | ) |
| |
|
| | files = Knowledges.get_files_by_id(id, db=db) |
| |
|
| | |
| | zip_buffer = io.BytesIO() |
| | with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf: |
| | for file in files: |
| | content = file.data.get("content", "") if file.data else "" |
| | if content: |
| | |
| | filename = file.filename |
| | if not filename.endswith(".txt"): |
| | filename = f"{filename}.txt" |
| | zf.writestr(filename, content) |
| |
|
| | zip_buffer.seek(0) |
| |
|
| | |
| | safe_name = "".join(c if c.isalnum() or c in " -_" else "_" for c in knowledge.name) |
| | zip_filename = f"{safe_name}.zip" |
| |
|
| | return StreamingResponse( |
| | zip_buffer, |
| | media_type="application/zip", |
| | headers={"Content-Disposition": f"attachment; filename={zip_filename}"}, |
| | ) |
| |
|