| # ============================================================================= | |
| # Required | |
| # ============================================================================= | |
| # Personal HF token with read access to osunlp/QUEST-35B. | |
| HF_TOKEN=hf_xxx | |
| # Dedicated HF Inference Endpoint URL that serves osunlp/QUEST-35B. | |
| # Must end with /v1/. | |
| QUEST_BASE_URL=https://your-endpoint-id.aws.endpoints.huggingface.cloud/v1/ | |
| # Model name the endpoint responds to. TGI containers usually use "tgi"; | |
| # vLLM containers usually use the original repo id ("osunlp/QUEST-35B"). | |
| QUEST_ENDPOINT_MODEL=tgi | |
| # Bearer token sent to QUEST_BASE_URL. Optional. When unset, HF_TOKEN is used | |
| # (matches legacy behaviour for HF Inference Endpoints). Set this to the | |
| # `--api-key` of a self-hosted vLLM (or any other OpenAI-compatible server | |
| # you tunnel through Cloudflare/ngrok) so the real HF_TOKEN never reaches | |
| # third-party logs. | |
| QUEST_API_KEY= | |
| # Default model preselected in the dropdown. | |
| DEFAULT_MODEL=osunlp/QUEST-35B | |
| # ============================================================================= | |
| # Recommended: strongly improves latency and reliability | |
| # ============================================================================= | |
| # Google Serper API key. When set, the `search` tool uses Serper first and only | |
| # falls back to the DuckDuckGo HTML backend if Serper fails. Serper is ~10x | |
| # faster than scraping DDG and is not subject to the 202 Ratelimit that hits | |
| # shared HF Space IPs. Get one at https://serper.dev/api-key | |
| # Either name is accepted to match the research repo's convention: | |
| SERPER_API_KEY= | |
| # SERPER_KEY_ID= | |
| # Max tokens the Quest endpoint is allowed to emit per turn. 4096 gives the | |
| # <think> block enough room; raise to 6144 for very long research reports. | |
| QUEST_MAX_NEW_TOKENS=4096 | |
| # ============================================================================= | |
| # Optional: not currently wired into app.py (listed for reference) | |
| # ============================================================================= | |
| # The research repo (QUEST-main/inference) uses these to plug in Jina Reader | |
| # for HTML-to-markdown extraction and GPT for condenser/summarization, but the | |
| # Space starter does not call either of them. Setting them here has no effect | |
| # today; they are listed only so you know what you'd plug in for the full | |
| # research pipeline. | |
| # JINA_API_KEYS= | |
| # API_KEY= # OpenAI API key | |
| # SUMMARY_MODEL_NAME=gpt-5-mini | |
| # MEMORY_MODEL_NAME=gpt-5-mini | |
| # MEMORY_OPENAI_API_KEY= | |