Spaces:

ihtesham0345
/

key_word_Fast_API

Running

feat: Add LongCat API as primary model with Qwen fallback + cross-map fix

249b12a 28 days ago

861 Bytes

	# AI Model Configuration
	# Options:
	# Qwen/Qwen2.5-0.5B-Instruct (Nano - ~1GB, fastest, CPU-friendly)
	# Qwen/Qwen2.5-1.5B-Instruct (Goldilocks - ~3GB, recommended for GPU)
	# Qwen/Qwen2.5-7B-Instruct (Smartest - needs 4-bit quantization + GPU)
	MODEL_ID=Qwen/Qwen2.5-0.5B-Instruct

	# Quantization mode: auto, 4bit, 8bit, none
	# auto = 4-bit on GPU for 7B, 8-bit on GPU for 1.5B, bf16 on CPU
	# 4bit = force 4-bit quantization (requires GPU + bitsandbytes)
	# 8bit = force 8-bit quantization
	# none = disable quantization, uses bfloat16
	QUANTIZATION=auto

	# Double quantization for 4-bit (saves ~10% more memory)
	USE_DOUBLE_QUANT=true

	# LongCat API (primary model, falls back to local Qwen on quota exhaustion)
	LONGCAT_API_KEY=ak_2cA3Uc1hN6nj5k69km3dt4lT9Yf74
	LONGCAT_BASE_URL=https://api.longcat.chat/openai
	LONGCAT_MODEL=LongCat-2.0-Preview