File size: 3,365 Bytes
dc4e6da | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | from __future__ import annotations
from enum import Enum
from pathlib import Path
_root_path = Path(__file__).parent.parent.resolve()
# Project paths
class ENV:
# General
ROOT_DIR: Path = _root_path
DATA_DIR: Path = ROOT_DIR / "data"
DATASETS_DIR: Path = ROOT_DIR / "data" / "datasets"
BASE_DATASETS_DIR: Path = DATASETS_DIR / "base_v2"
SYN_DATASETS_PREPARED_DIR: Path = DATASETS_DIR / "synthesized_prepared"
SYN_DATASETS_DIR: Path = DATASETS_DIR / "synthesized_datasets"
VISUAL_ELEMENT_PREFABS_DIR: Path = DATA_DIR / "visual_element_prefabs"
EMBEDDINGS_DIR: Path = DATA_DIR / "embeddings"
GT_EMBEDDINGS_DIR: Path = DATA_DIR / "gt_embeddings"
CLUSTERS_DIR: Path = DATA_DIR / "clusters"
CLUSTER_PLOTS: Path = DATA_DIR / "cluster_plots"
SYN_DATASET_STAT_PLOTS: Path = DATA_DIR / "syn_dataste_statistics_plots"
ANALYZATION_DIR: Path = DATA_DIR / "analyzation"
GT_ANALYZATION_DIR: Path = ANALYZATION_DIR / "gt"
KIE_GT_ANALYZATION_DIR: Path = GT_ANALYZATION_DIR / "kie"
CLS_GT_ANALYZATION_DIR: Path = GT_ANALYZATION_DIR / "cls"
QA_GT_ANALYZATION_DIR: Path = GT_ANALYZATION_DIR / "qa"
DLA_GT_ANALYZATION_DIR: Path = GT_ANALYZATION_DIR / "dla"
WEBAPP_CACHE_DIR: Path = DATA_DIR / "webapp_cache"
QA_GT_WEBAPP_CACHE_DIR: Path = WEBAPP_CACHE_DIR / "qa_gt"
TEMP_DIR: Path = DATA_DIR / "temp"
MODELS_DIR: Path = DATA_DIR / "models"
RUNS_DIR: Path = DATA_DIR / "runs"
EXPORTS_DIR: Path = DATA_DIR / "exports"
# Contains combined datasets (original and synthetic)
PREPARED_DATASETS_DIR: Path = DATASETS_DIR / "prepared"
SYN_DATA_DEFINITIONS_DIR: Path = DATA_DIR / "syn_dataset_definitions"
PROMPT_TEMPLATES_DIR: Path = DATA_DIR / "prompt_templates"
SEED_IMAGES_DIR: Path = DATA_DIR / "seed-images"
ENV.BASE_DATASETS_DIR.mkdir(parents=True, exist_ok=True)
ENV.SYN_DATASETS_DIR.mkdir(parents=True, exist_ok=True)
ENV.SYN_DATASETS_PREPARED_DIR.mkdir(parents=True, exist_ok=True)
ENV.VISUAL_ELEMENT_PREFABS_DIR.mkdir(parents=True, exist_ok=True)
ENV.PREPARED_DATASETS_DIR.mkdir(parents=True, exist_ok=True)
ENV.EMBEDDINGS_DIR.mkdir(parents=True, exist_ok=True)
ENV.CLUSTERS_DIR.mkdir(parents=True, exist_ok=True)
ENV.TEMP_DIR.mkdir(parents=True, exist_ok=True)
ENV.MODELS_DIR.mkdir(parents=True, exist_ok=True)
ENV.EXPORTS_DIR.mkdir(parents=True, exist_ok=True)
ENV.CLUSTER_PLOTS.mkdir(parents=True, exist_ok=True)
ENV.SYN_DATASET_STAT_PLOTS.mkdir(parents=True, exist_ok=True)
ENV.GT_EMBEDDINGS_DIR.mkdir(parents=True, exist_ok=True)
ENV.KIE_GT_ANALYZATION_DIR.mkdir(parents=True, exist_ok=True)
ENV.CLS_GT_ANALYZATION_DIR.mkdir(parents=True, exist_ok=True)
ENV.DLA_GT_ANALYZATION_DIR.mkdir(parents=True, exist_ok=True)
ENV.QA_GT_ANALYZATION_DIR.mkdir(parents=True, exist_ok=True)
ENV.QA_GT_WEBAPP_CACHE_DIR.mkdir(parents=True, exist_ok=True)
class LLM:
CLAUDE_SONNET_4 = "claude-sonnet-4-20250514"
CLAUDE_SONNET_4_5 = "claude-sonnet-4-5-20250929"
CLAUDE_HAIKU_4_5 = "claude-haiku-4-5-20251001"
TINYLLM_CLAUDE_SONNET_4 = "anthropic/claude-sonnet-4-20250514"
# Default values for generation
class GENERATION:
LLM = LLM.CLAUDE_SONNET_4_5
MAX_TOKENS = 16384
HANDWRITING_MODEL_CHECKPOINT = ENV.MODELS_DIR / "handwriting" / "latest.pt"
|