# Generation: gemini (API) or local (Qwen on CPU). auto = Gemini if GEMINI_API_KEY is set. GENERATION_BACKEND=gemini GEMINI_API_KEY= GOOGLE_API_KEY= GEMINI_MODEL=gemini-2.0-flash SKIP_LOCAL_LLM_HUB_DOWNLOAD=1 LOCAL_EMBEDDING_MODEL=all-MiniLM-L6-v2 LOCAL_LLM_MODEL=Qwen/Qwen2.5-1.5B-Instruct TASK_A_REVIEWS_EMBEDDED=data/task_a_reviews_embedded.jsonl TASK_A_RAG_TOP_K=5 TASK_A_MAX_TOKENS=1024 TASK_A_TEMPERATURE=0.35 TASK_B_EMBEDDED_CATALOG=data/business_catalog_embedded.jsonl TASK_B_LLM_CANDIDATE_CAP=6 TASK_B_TEMPERATURE=0.2 TASK_B_MAX_OUTPUT_TOKENS=256 # llm (Gemini or local Qwen) or retrieval — retrieval is instant (no API) TASK_B_RANK_MODE=llm TASK_B_FAST_RANK= STARTUP_PREWARM=all SKIP_STARTUP_PREWARM= HF_TOKEN=