""" Central configuration for the telecalling agent. Edit the values in this file to match your local setup. """ import os from pathlib import Path # Project root ROOT_DIR = Path(__file__).parent DATA_DIR = ROOT_DIR / "data" MODELS_DIR = ROOT_DIR / "models" DATA_DIR.mkdir(exist_ok=True) MODELS_DIR.mkdir(exist_ok=True) # Hugging Face # Optional: set HF_TOKEN for private models or authenticated downloads. HF_TOKEN = os.environ.get("HF_TOKEN", "") # ASR: Hugging Face Moonshine TRANSCRIBE_MODEL_ID = "UsefulSensors/moonshine-tiny" TRANSCRIBE_LANGUAGE = "en" # Moonshine Tiny is English ASR TRANSCRIBE_DEVICE = "cpu" # CPU-only for zero-gpu deployment TRANSCRIBE_LOCAL_ONLY = os.environ.get("TRANSCRIBE_LOCAL_ONLY", "0").lower() in ("1", "true", "yes") # Set TRANSCRIBE_LOCAL_ONLY=1 to require a pre-downloaded Moonshine cache and disable remote Hugging Face Hub downloads. # Intent Parser: Qwen2.5-7B-Instruct (GGUF via llama-cpp-python) # Using q2_k quantization for smaller size on CPU (~2.3 GB). # Download from HF Hub on first run if not cached. QWEN_GGUF_PATH = MODELS_DIR / "qwen2.5-7b-instruct-q2_k.gguf" QWEN_N_GPU_LAYERS = 0 # CPU-only: no GPU layer offloading QWEN_N_CTX = 4096 # context window sufficient for a call transcript QWEN_MAX_TOKENS = 512 # max tokens for the structured JSON response QWEN_TEMPERATURE = 0.1 # near-deterministic for structured output # Evaluator: MiniCPM3-4B (CPU, bitsandbytes 4-bit) MINICPM_MODEL_ID = "openbmb/MiniCPM3-4B" MINICPM_DEVICE = "cpu" # runs after Qwen is done; no VRAM conflict MINICPM_MAX_TOKENS = 256 # VAD: Silero VAD (ONNX, CPU) VAD_SAMPLE_RATE = 16000 # Hz; Silero and Moonshine both use 16kHz VAD_CHUNK_MS = 250 # ms per audio chunk fed to VAD VAD_CHUNK_SAMPLES = int(VAD_SAMPLE_RATE * VAD_CHUNK_MS / 1000) # 4000 VAD_SILENCE_THRESHOLD = 0.5 VAD_SILENCE_DURATION_S = 0.8 VAD_MIN_SPEECH_S = 0.5 # SQLite database DB_PATH = DATA_DIR / "calls.db" # Scheduling rules injected into MiniCPM's system prompt. SCHEDULING_RULES = """ 1. Meetings can only be booked Monday-Friday, 09:00-18:00. 2. Minimum meeting duration is 15 minutes; maximum is 120 minutes. 3. Back-to-back meetings are not allowed; require a 15-minute gap between slots. 4. If the caller does not provide a date or time, ask for one before confirming. 5. If the requested slot is already booked, suggest the next available slot. 6. Always confirm the caller's name before booking. """ # Gradio UI APP_TITLE = "📞 AI Telecalling Agent" APP_DESCRIPTION = "Speak naturally — the agent will schedule your meeting automatically." SERVER_PORT = 7860 SERVER_NAME = "0.0.0.0" # bind to all interfaces for HF Spaces