| # HITL-KG Configuration File | |
| # Copy to config.yaml and customize | |
| # Server settings | |
| host: "0.0.0.0" | |
| port: 7860 | |
| debug: false | |
| # Paths (relative to app root) | |
| data_dir: "./data" | |
| cache_dir: "./data/cache" | |
| sessions_dir: "./data/sessions" | |
| # Session management | |
| session_max_age_hours: 24 | |
| session_cleanup_interval_minutes: 5 | |
| max_sessions: 1000 | |
| # Default language | |
| default_language: "en" | |
| supported_languages: | |
| - "en" | |
| - "uk" | |
| - "ru" | |
| - "es" | |
| - "de" | |
| - "fr" | |
| # Embedding configuration | |
| embedding: | |
| # Multilingual model supporting 50+ languages | |
| model_name: "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" | |
| cache_dir: "./data/embeddings" | |
| dimension: 384 | |
| batch_size: 32 | |
| device: "cpu" # "cpu", "cuda", or "mps" (Apple Silicon) | |
| # LLM configuration | |
| llm: | |
| provider: "local" # "openai" or "local" | |
| model: "gpt-4o-mini" | |
| temperature: 0.7 | |
| max_tokens: 2048 | |
| # api_key: "" # Or set OPENAI_API_KEY environment variable | |
| # Datasets configuration | |
| # The system supports multiple dataset formats: obo, csv, json | |
| datasets: | |
| # Disease Ontology (DOID) | |
| - name: "disease_ontology" | |
| source_type: "obo" | |
| source_url: "https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/main/src/ontology/doid.obo" | |
| entity_category: "disease" | |
| cache_enabled: true | |
| cache_max_age_days: 7 | |
| # Symptom Ontology (SYMP) | |
| - name: "symptom_ontology" | |
| source_type: "obo" | |
| source_url: "https://raw.githubusercontent.com/DiseaseOntology/SymptomOntology/main/symp.obo" | |
| entity_category: "symptom" | |
| cache_enabled: true | |
| cache_max_age_days: 7 | |
| # Example: Custom CSV dataset (uncomment to use) | |
| # - name: "custom_symptoms" | |
| # source_type: "csv" | |
| # source_path: "./data/custom_symptoms.csv" | |
| # entity_category: "symptom" | |
| # cache_enabled: false | |
| # Advanced settings | |
| # embedding: | |
| # # For domain-specific embeddings, consider: | |
| # # - "dmis-lab/biobert-base-cased-v1.2" (biomedical) | |
| # # - "emilyalsentzer/Bio_ClinicalBERT" (clinical) | |
| # model_name: "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" | |