# Copy to .env and adjust. All values have sane defaults; the service runs with # none of these set (in offline mode). # Run the deterministic offline stub (1) or load the real model (0). # Offline needs no network and no torch/transformers; it's the default. OFFLINE=1 # Hugging Face repo (or local path) used only when OFFLINE=0. MODEL_ID=LaelaZ/distilbert-emotion # Port the API listens on. PORT=8000 # Logging. LOG_LEVEL=INFO # Max items accepted in one /predict call and coalesced into one forward pass. MAX_BATCH_SIZE=64 # Max characters allowed in a single input (cheap guard before tokenization). MAX_TEXT_LENGTH=2000 # How long the micro-batcher waits (ms) to fill a batch before flushing. # Smaller -> lower latency; larger -> higher throughput under concurrency. BATCH_MAX_DELAY_MS=5