| from __future__ import annotations
|
|
|
| from enum import Enum
|
| from pathlib import Path
|
|
|
| _root_path = Path(__file__).parent.parent.resolve()
|
|
|
|
|
|
|
| class ENV:
|
|
|
| ROOT_DIR: Path = _root_path
|
| DATA_DIR: Path = ROOT_DIR / "data"
|
|
|
| DATASETS_DIR: Path = ROOT_DIR / "data" / "datasets"
|
| BASE_DATASETS_DIR: Path = DATASETS_DIR / "base_v2"
|
| SYN_DATASETS_PREPARED_DIR: Path = DATASETS_DIR / "synthesized_prepared"
|
| SYN_DATASETS_DIR: Path = DATASETS_DIR / "synthesized_datasets"
|
|
|
| VISUAL_ELEMENT_PREFABS_DIR: Path = DATA_DIR / "visual_element_prefabs"
|
|
|
| EMBEDDINGS_DIR: Path = DATA_DIR / "embeddings"
|
| GT_EMBEDDINGS_DIR: Path = DATA_DIR / "gt_embeddings"
|
| CLUSTERS_DIR: Path = DATA_DIR / "clusters"
|
| CLUSTER_PLOTS: Path = DATA_DIR / "cluster_plots"
|
| SYN_DATASET_STAT_PLOTS: Path = DATA_DIR / "syn_dataste_statistics_plots"
|
|
|
| ANALYZATION_DIR: Path = DATA_DIR / "analyzation"
|
| GT_ANALYZATION_DIR: Path = ANALYZATION_DIR / "gt"
|
| KIE_GT_ANALYZATION_DIR: Path = GT_ANALYZATION_DIR / "kie"
|
| CLS_GT_ANALYZATION_DIR: Path = GT_ANALYZATION_DIR / "cls"
|
| QA_GT_ANALYZATION_DIR: Path = GT_ANALYZATION_DIR / "qa"
|
| DLA_GT_ANALYZATION_DIR: Path = GT_ANALYZATION_DIR / "dla"
|
|
|
| WEBAPP_CACHE_DIR: Path = DATA_DIR / "webapp_cache"
|
| QA_GT_WEBAPP_CACHE_DIR: Path = WEBAPP_CACHE_DIR / "qa_gt"
|
|
|
| TEMP_DIR: Path = DATA_DIR / "temp"
|
|
|
| MODELS_DIR: Path = DATA_DIR / "models"
|
| RUNS_DIR: Path = DATA_DIR / "runs"
|
|
|
| EXPORTS_DIR: Path = DATA_DIR / "exports"
|
|
|
|
|
| PREPARED_DATASETS_DIR: Path = DATASETS_DIR / "prepared"
|
|
|
| SYN_DATA_DEFINITIONS_DIR: Path = DATA_DIR / "syn_dataset_definitions"
|
| PROMPT_TEMPLATES_DIR: Path = DATA_DIR / "prompt_templates"
|
| SEED_IMAGES_DIR: Path = DATA_DIR / "seed-images"
|
|
|
|
|
| ENV.BASE_DATASETS_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.SYN_DATASETS_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.SYN_DATASETS_PREPARED_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.VISUAL_ELEMENT_PREFABS_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.PREPARED_DATASETS_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.EMBEDDINGS_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.CLUSTERS_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.TEMP_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.MODELS_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.EXPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.CLUSTER_PLOTS.mkdir(parents=True, exist_ok=True)
|
| ENV.SYN_DATASET_STAT_PLOTS.mkdir(parents=True, exist_ok=True)
|
| ENV.GT_EMBEDDINGS_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.KIE_GT_ANALYZATION_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.CLS_GT_ANALYZATION_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.DLA_GT_ANALYZATION_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.QA_GT_ANALYZATION_DIR.mkdir(parents=True, exist_ok=True)
|
| ENV.QA_GT_WEBAPP_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
| class LLM:
|
| CLAUDE_SONNET_4 = "claude-sonnet-4-20250514"
|
| CLAUDE_SONNET_4_5 = "claude-sonnet-4-5-20250929"
|
| CLAUDE_HAIKU_4_5 = "claude-haiku-4-5-20251001"
|
| TINYLLM_CLAUDE_SONNET_4 = "anthropic/claude-sonnet-4-20250514"
|
|
|
|
|
|
|
| class GENERATION:
|
| LLM = LLM.CLAUDE_SONNET_4_5
|
| MAX_TOKENS = 16384
|
| HANDWRITING_MODEL_CHECKPOINT = ENV.MODELS_DIR / "handwriting" / "latest.pt"
|
|
|