para.AI_ASSUNTOS_CNJ / app /core /config_loader.py
Carlexxx
para.AI beta
d456722
from __future__ import annotations
from typing import Optional
from pathlib import Path
import yaml
from pydantic import BaseModel, Field
class BootstrapConfig(BaseModel):
url: str = ""; dest: str; retries: int = 5; timeout: int = 300
class ServerConfig(BaseModel):
host: str = "0.0.0.0"; port: int = 8000; workers: int = 1; module: str = "app.main:app"
class ElasticsearchConfig(BaseModel):
host: str = "http://elasticsearch:9200"; index: Optional[str] = None
timeout: int = 30; max_retries: int = 3
class RegexRule(BaseModel):
find: str; rep: str; loop: bool = False
class Subfield(BaseModel):
src: str; dest: str; type: str = "string"; es_type: str = "keyword"
facet: bool = False; facet_size: int = 10; suggest: bool = False; keyword_subfield: bool = False
class SearchField(BaseModel):
field: str; boost: float = 1.0; subfield: Optional[str] = None
class FieldDef(BaseModel):
field: str; type: str = "string"; key: bool = False; required: bool = False
embed: bool = False; es_type: str = "text"; index: bool = True
exclude_source: bool = False; facet: bool = False; facet_size: int = 10
facet_histogram: bool = False; suggest: bool = False; keyword_subfield: bool = False
timestamp_fmt: int = 1; regex: list[RegexRule] = Field(default_factory=list)
subfields: list[Subfield] = Field(default_factory=list)
class DataConfig(BaseModel):
index_name: str; display_name: str = ""; id_field: str; data_format: str = "jsonl"
bootstrap: Optional[BootstrapConfig] = None
server: ServerConfig = Field(default_factory=ServerConfig)
elasticsearch: ElasticsearchConfig = Field(default_factory=ElasticsearchConfig)
analyzer: str = "standard"; search_fields: list[SearchField] = Field(default_factory=list)
campos_filter: list[FieldDef] = Field(default_factory=list); chunk_size: int = 500
@property
def jsonl_path(self): return self.bootstrap.dest if self.bootstrap else f"/app/data/{self.index_name}.jsonl"
@property
def es_index(self): return self.elasticsearch.index or self.index_name
@property
def es_host(self): return self.elasticsearch.host
@property
def key_field(self): return next((f for f in self.campos_filter if f.key), None)
@property
def flat_fields(self): return [f for f in self.campos_filter if f.type != "object"]
@property
def facet_fields(self):
r=[]
for f in self.campos_filter:
if getattr(f,"facet",False): r.append(f)
for sf in getattr(f,"subfields",[]):
if sf.facet: r.append(sf)
return r
@property
def suggest_fields(self):
r=[]
for f in self.campos_filter:
if f.suggest: r.append(f.field)
for sf in f.subfields:
if sf.suggest: r.append(sf.dest)
return r
@property
def source_excludes(self): return [f.field for f in self.campos_filter if f.exclude_source]
@property
def search_fields_es(self):
r=[]
for sf in self.search_fields:
name=f"{sf.field}.{sf.subfield}" if sf.subfield else sf.field
r.append(f"{name}^{sf.boost}" if sf.boost!=1.0 else name)
return r
def load_config(path) -> DataConfig:
with open(path, encoding="utf-8") as fh: return DataConfig.model_validate(yaml.safe_load(fh))