# Directory for storing cache files used by the DiskCache library
DISKCACHE_DIR = "diskcache_dir"

# Directory for storing SQLite database files
SQLITE_DB_DIR = "sqlite_dir"

# Name of the SQLite database file
SQLITE_DB_NAME = "mydatabase.sqlite3"

# Maximum number of concurrent requests allowed for web crawling
MAX_CRAWL_PARALLEL_REQUEST = 5

# Directory for storing Chroma vector database files
CHROMA_DB_DIR = "chroma_dir"

# Name of the collection in the Chroma vector database
CHROMA_COLLECTION_NAME = "mychroma_collection"

# Name of the OpenAI model used for embedding text
OPENAI_EMBEDDING_MODEL_NAME = "text-embedding-3-small"

# Name of the ZhipuAI model used for embedding text
ZHIPUAI_EMBEDDING_MODEL_NAME = "embedding-2"

# Name of the Ollama model used for embedding text
OLLAMA_EMBEDDING_MODEL_NAME = "mxbai-embed-large"

# Maximum length of text chunks when splitting up large documents
MAX_CHUNK_LENGTH = 1300

# Amount of overlap between consecutive text chunks
CHUNK_OVERLAP = 100

# Maximum allowable length for a single query string
MAX_QUERY_LENGTH = 200

# Number of top documents to recall for initial retrieval in search operations
RECALL_TOP_K = 5

# Number of top documents to recall when using re-ranking
RERANK_RECALL_TOP_K = 10

# Defines the model used for re-ranking.
# 'ms-marco-TinyBERT-L-2-v2': Nano (~4MB), blazing fast model & competitive performance (ranking precision).
# 'ms-marco-MiniLM-L-12-v2': Small (~34MB), slightly slower & best performance (ranking precision).
RERANK_MODEL_NAME = "ms-marco-MiniLM-L-12-v2"

# Maximum number of historical user sessions to retain
MAX_HISTORY_SESSION_LENGTH = 2

# Duration in seconds before a session expires
SESSION_EXPIRE_TIME = 1800

# Base directory for serving static files
STATIC_DIR = "web"

# Sub-directory under STATIC_DIR where media files are stored
MEDIA_DIR = "media_dir"

# Unique identifier for the distributed lock in the DiskCache
DISTRIBUTED_LOCK_ID = "open_kf:distributed_lock"

# Expiration time for the distributed lock (in seconds)
DISTRIBUTED_LOCK_EXPIRE_TIME = 20

# Constant to indicate adding content to the sitemap
ADD_SITEMAP_CONTENT = 1

# Constant to indicate deleting content from the sitemap
DELETE_SITEMAP_CONTENT = 2

# Constant to indicate updating content in the sitemap
UPDATE_SITEMAP_CONTENT = 3

# Constant to indicate adding content to isolated URLs
ADD_ISOLATED_URL_CONTENT = 1

# Constant to indicate deleting content from isolated URLs
DELETE_ISOLATED_URL_CONTENT = 2

# Maximum number of isolated URLs that can be processed in a batch
MAX_ISOLATED_URL_BATCH_LENGTH = 10

# Directory where downloaded local files are stored
LOCAL_FILE_DOWNLOAD_DIR = "download_dir"

# Maximum number of concurrent requests allowed for file writing
MAX_CONCURRENT_WRITES = 5

# Maximum file size (30MB in bytes)
MAX_FILE_SIZE = 30 * 1024 * 1024

# Maximum number of files per upload
MAX_LOCAL_FILE_BATCH_LENGTH = 10

# Supported file extensions
FILE_LOADER_EXTENSIONS = {
    ".txt",
    ".md",
    ".pdf",
    ".epub",
    ".mobi",
    ".html",
    ".docx",
    ".pptx",
    ".xlsx",
    ".csv",
}

# in t_sitemap_domain_tab
# `domain_status` meanings:
#  1 - 'Domain statistics gathering'
#  2 - 'Domain statistics gathering collected'
#  3 - 'Domain processing'
#  4 - 'Domain processed'
DOMAIN_STATISTICS_GATHERING = 1
DOMAIN_STATISTICS_GATHERING_COLLECTED = 2
DOMAIN_PROCESSING = 3
DOMAIN_PROCESSED = 4

# in t_sitemap_url_tab
# `doc_status` meanings:
#  0 - 'Process failed'
#  1 - 'Sitemaps web page recorded'
#  2 - 'Sitemaps web page crawling'
#  3 - 'Sitemaps web page crawling completed'
#  4 - 'Sitemaps web text Embedding stored in VectorDB'
#  5 - 'Sitemaps web page expired and needed crawled again'
SITEMAP_URL_PROCESS_FAILED = 0
SITEMAP_URL_RECORDED = 1
SITEMAP_URL_CRAWLING = 2
SITEMAP_URL_CRAWLING_COMPLETED = 3
SITEMAP_URL_EMBEDDED = 4
SITEMAP_URL_EXPIRED = 5

# in t_isolated_url_tab
# `doc_status` meanings:
#  0 - 'Process failed'
#  1 - 'Isolated web page recorded'
#  2 - 'Isolated web page crawling'
#  3 - 'Isolated web page crawling completed'
#  4 - 'Isolated web text Embedding stored in VectorDB'
ISOLATED_URL_PROCESS_FAILED = 0
ISOLATED_URL_RECORDED = 1
ISOLATED_URL_CRAWLING = 2
ISOLATED_URL_CRAWLING_COMPLETED = 3
ISOLATED_URL_EMBEDDED = 4

# in t_local_file_tab
# `doc_status` meanings:
#  0 - 'Process failed'
#  1 - 'Local files recorded'
#  2 - 'Local files parsing'
#  3 - 'Local files parsing completed'
#  4 - 'Local files text Embedding stored in VectorDB'
LOCAL_FILE_PROCESS_FAILED = 0
LOCAL_FILE_RECORDED = 1
LOCAL_FILE_PARSING = 2
LOCAL_FILE_PARSING_COMPLETED = 3
LOCAL_FILE_EMBEDDED = 4

# in t_doc_embedding_map_tab
# `doc_source` meanings:
#  1 - 'from sitemap URLs'
#  2 - 'from isolated URLs'
#  3 - 'from local files'
FROM_SITEMAP_URL = 1
FROM_ISOLATED_URL = 2
FROM_LOCAL_FILE = 3