from __future__ import annotations from typing import Optional from pathlib import Path import yaml from pydantic import BaseModel, Field class BootstrapConfig(BaseModel): url: str = ""; dest: str; retries: int = 5; timeout: int = 300 class ServerConfig(BaseModel): host: str = "0.0.0.0"; port: int = 8000; workers: int = 1; module: str = "app.main:app" class ElasticsearchConfig(BaseModel): host: str = "http://elasticsearch:9200"; index: Optional[str] = None timeout: int = 30; max_retries: int = 3 class RegexRule(BaseModel): find: str; rep: str; loop: bool = False class Subfield(BaseModel): src: str; dest: str; type: str = "string"; es_type: str = "keyword" facet: bool = False; facet_size: int = 10; suggest: bool = False; keyword_subfield: bool = False class SearchField(BaseModel): field: str; boost: float = 1.0; subfield: Optional[str] = None class FieldDef(BaseModel): field: str; type: str = "string"; key: bool = False; required: bool = False embed: bool = False; es_type: str = "text"; index: bool = True exclude_source: bool = False; facet: bool = False; facet_size: int = 10 facet_histogram: bool = False; suggest: bool = False; keyword_subfield: bool = False timestamp_fmt: int = 1; regex: list[RegexRule] = Field(default_factory=list) subfields: list[Subfield] = Field(default_factory=list) class DataConfig(BaseModel): index_name: str; display_name: str = ""; id_field: str; data_format: str = "jsonl" bootstrap: Optional[BootstrapConfig] = None server: ServerConfig = Field(default_factory=ServerConfig) elasticsearch: ElasticsearchConfig = Field(default_factory=ElasticsearchConfig) analyzer: str = "standard"; search_fields: list[SearchField] = Field(default_factory=list) campos_filter: list[FieldDef] = Field(default_factory=list); chunk_size: int = 500 @property def jsonl_path(self): return self.bootstrap.dest if self.bootstrap else f"/app/data/{self.index_name}.jsonl" @property def es_index(self): return self.elasticsearch.index or self.index_name @property def es_host(self): return self.elasticsearch.host @property def key_field(self): return next((f for f in self.campos_filter if f.key), None) @property def flat_fields(self): return [f for f in self.campos_filter if f.type != "object"] @property def facet_fields(self): r=[] for f in self.campos_filter: if getattr(f,"facet",False): r.append(f) for sf in getattr(f,"subfields",[]): if sf.facet: r.append(sf) return r @property def suggest_fields(self): r=[] for f in self.campos_filter: if f.suggest: r.append(f.field) for sf in f.subfields: if sf.suggest: r.append(sf.dest) return r @property def source_excludes(self): return [f.field for f in self.campos_filter if f.exclude_source] @property def search_fields_es(self): r=[] for sf in self.search_fields: name=f"{sf.field}.{sf.subfield}" if sf.subfield else sf.field r.append(f"{name}^{sf.boost}" if sf.boost!=1.0 else name) return r def load_config(path) -> DataConfig: with open(path, encoding="utf-8") as fh: return DataConfig.model_validate(yaml.safe_load(fh))