vietqa-api / src /config.py
quanho114
Deploy VietQA API with Secure Firebase
4f8c5b9
import os
from pathlib import Path
from dotenv import load_dotenv
from pydantic import Field
from pydantic_settings import BaseSettings
load_dotenv()
PROJECT_ROOT = Path(__file__).resolve().parent.parent
DATA_DIR = Path(os.getenv("DATA_DIR", PROJECT_ROOT / "data"))
DATA_INPUT_DIR = Path(os.getenv("DATA_INPUT_DIR", PROJECT_ROOT / "test_data"))
DATA_OUTPUT_DIR = Path(os.getenv("DATA_OUTPUT_DIR", PROJECT_ROOT / "output"))
DATA_CRAWLED_DIR = Path(os.getenv("DATA_CRAWLED_DIR", DATA_DIR / "crawl"))
BATCH_SIZE = 1
class Settings(BaseSettings):
"""Application settings with environment variable support."""
# MegaLLM API settings (for small model)
megallm_api_key: str = Field(
default="",
alias="MEGALLM_API_KEY",
description="API key for MegaLLM",
)
megallm_base_url: str = Field(
default="https://ai.megallm.io/v1",
alias="MEGALLM_BASE_URL",
)
# Groq API settings (for large model)
groq_api_key: str = Field(
default="",
alias="GROQ_API_KEY",
description="API key for Groq",
)
groq_base_url: str = Field(
default="https://api.groq.com/openai/v1",
alias="GROQ_BASE_URL",
)
# OpenRouter API (fallback)
openrouter_api_key: str = Field(
default="",
alias="OPENROUTER_API_KEY",
description="API key for OpenRouter (fallback)",
)
# Model names
model_small: str = Field(
default="qwen/qwen3-32b",
alias="MODEL_SMALL",
description="Small model for routing, reranking, and RAG",
)
model_large: str = Field(
default="meta-llama/llama-4-scout-17b-16e-instruct",
alias="MODEL_LARGE",
description="Large model for logic/direct answering",
)
# Available large models for testing
available_large_models: list[str] = [
"llama-3.3-70b-versatile",
"meta-llama/llama-4-scout-17b-16e-instruct",
"moonshotai/kimi-k2-instruct-0905",
"openai/gpt-oss-120b"
]
# Local embedding model (Vietnamese)
embedding_model: str = Field(
default="bkai-foundation-models/vietnamese-bi-encoder",
alias="EMBEDDING_MODEL",
)
# Vector database
qdrant_collection: str = Field(
default="vnpt_knowledge_base",
alias="QDRANT_COLLECTION",
)
vector_db_path: str = Field(
default="",
alias="VECTOR_DB_PATH",
description="Path to Qdrant storage. Defaults to DATA_DIR/qdrant_storage if empty.",
)
# Firebase Admin
firebase_service_account_path: str = Field(
default="serviceAccountKey.json",
alias="FIREBASE_SERVICE_ACCOUNT_PATH",
description="Path to Firebase Service Account JSON",
)
firebase_credentials_json: str = Field(
default="",
alias="FIREBASE_CREDENTIALS_JSON",
description="Raw JSON string of service account key (for Cloud/HF Env)",
)
chunk_size: int = 1000
chunk_overlap: int = 200
top_k_retrieval: int = 10
top_k_rerank: int = 3
@property
def vector_db_path_resolved(self) -> Path:
"""Resolve vector database path, defaulting to DATA_DIR/qdrant_storage."""
if self.vector_db_path:
return Path(self.vector_db_path)
return DATA_DIR / "qdrant_storage"
class Config:
env_file = ".env"
extra = "ignore"
settings = Settings()
# Validate API key on import
if not settings.megallm_api_key:
import warnings
warnings.warn("MEGALLM_API_KEY not set. LLM calls will fail.")