QUEST / .env.example
Lzy01241010's picture
rename: Quest-35B -> QUEST-35B (uniform uppercase branding)
0c32859
# =============================================================================
# Required
# =============================================================================
# Personal HF token with read access to osunlp/QUEST-35B.
HF_TOKEN=hf_xxx
# Dedicated HF Inference Endpoint URL that serves osunlp/QUEST-35B.
# Must end with /v1/.
QUEST_BASE_URL=https://your-endpoint-id.aws.endpoints.huggingface.cloud/v1/
# Model name the endpoint responds to. TGI containers usually use "tgi";
# vLLM containers usually use the original repo id ("osunlp/QUEST-35B").
QUEST_ENDPOINT_MODEL=tgi
# Bearer token sent to QUEST_BASE_URL. Optional. When unset, HF_TOKEN is used
# (matches legacy behaviour for HF Inference Endpoints). Set this to the
# `--api-key` of a self-hosted vLLM (or any other OpenAI-compatible server
# you tunnel through Cloudflare/ngrok) so the real HF_TOKEN never reaches
# third-party logs.
QUEST_API_KEY=
# Default model preselected in the dropdown.
DEFAULT_MODEL=osunlp/QUEST-35B
# =============================================================================
# Recommended: strongly improves latency and reliability
# =============================================================================
# Google Serper API key. When set, the `search` tool uses Serper first and only
# falls back to the DuckDuckGo HTML backend if Serper fails. Serper is ~10x
# faster than scraping DDG and is not subject to the 202 Ratelimit that hits
# shared HF Space IPs. Get one at https://serper.dev/api-key
# Either name is accepted to match the research repo's convention:
SERPER_API_KEY=
# SERPER_KEY_ID=
# Max tokens the Quest endpoint is allowed to emit per turn. 4096 gives the
# <think> block enough room; raise to 6144 for very long research reports.
QUEST_MAX_NEW_TOKENS=4096
# =============================================================================
# Optional: not currently wired into app.py (listed for reference)
# =============================================================================
# The research repo (QUEST-main/inference) uses these to plug in Jina Reader
# for HTML-to-markdown extraction and GPT for condenser/summarization, but the
# Space starter does not call either of them. Setting them here has no effect
# today; they are listed only so you know what you'd plug in for the full
# research pipeline.
# JINA_API_KEYS=
# API_KEY= # OpenAI API key
# SUMMARY_MODEL_NAME=gpt-5-mini
# MEMORY_MODEL_NAME=gpt-5-mini
# MEMORY_OPENAI_API_KEY=