# ============================================================================= # Required # ============================================================================= # Personal HF token with read access to osunlp/QUEST-35B. HF_TOKEN=hf_xxx # Dedicated HF Inference Endpoint URL that serves osunlp/QUEST-35B. # Must end with /v1/. QUEST_BASE_URL=https://your-endpoint-id.aws.endpoints.huggingface.cloud/v1/ # Model name the endpoint responds to. TGI containers usually use "tgi"; # vLLM containers usually use the original repo id ("osunlp/QUEST-35B"). QUEST_ENDPOINT_MODEL=tgi # Bearer token sent to QUEST_BASE_URL. Optional. When unset, HF_TOKEN is used # (matches legacy behaviour for HF Inference Endpoints). Set this to the # `--api-key` of a self-hosted vLLM (or any other OpenAI-compatible server # you tunnel through Cloudflare/ngrok) so the real HF_TOKEN never reaches # third-party logs. QUEST_API_KEY= # Default model preselected in the dropdown. DEFAULT_MODEL=osunlp/QUEST-35B # ============================================================================= # Recommended: strongly improves latency and reliability # ============================================================================= # Google Serper API key. When set, the `search` tool uses Serper first and only # falls back to the DuckDuckGo HTML backend if Serper fails. Serper is ~10x # faster than scraping DDG and is not subject to the 202 Ratelimit that hits # shared HF Space IPs. Get one at https://serper.dev/api-key # Either name is accepted to match the research repo's convention: SERPER_API_KEY= # SERPER_KEY_ID= # Max tokens the Quest endpoint is allowed to emit per turn. 4096 gives the # block enough room; raise to 6144 for very long research reports. QUEST_MAX_NEW_TOKENS=4096 # ============================================================================= # Optional: not currently wired into app.py (listed for reference) # ============================================================================= # The research repo (QUEST-main/inference) uses these to plug in Jina Reader # for HTML-to-markdown extraction and GPT for condenser/summarization, but the # Space starter does not call either of them. Setting them here has no effect # today; they are listed only so you know what you'd plug in for the full # research pipeline. # JINA_API_KEYS= # API_KEY= # OpenAI API key # SUMMARY_MODEL_NAME=gpt-5-mini # MEMORY_MODEL_NAME=gpt-5-mini # MEMORY_OPENAI_API_KEY=