Spaces:
Running
Running
| """ | |
| Central configuration for the telecalling agent. | |
| Edit the values in this file to match your local setup. | |
| """ | |
| import os | |
| from pathlib import Path | |
| # Project root | |
| ROOT_DIR = Path(__file__).parent | |
| DATA_DIR = ROOT_DIR / "data" | |
| MODELS_DIR = ROOT_DIR / "models" | |
| DATA_DIR.mkdir(exist_ok=True) | |
| MODELS_DIR.mkdir(exist_ok=True) | |
| # Hugging Face | |
| # Optional: set HF_TOKEN for private models or authenticated downloads. | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| # ASR: Hugging Face Moonshine | |
| TRANSCRIBE_MODEL_ID = "UsefulSensors/moonshine-tiny" | |
| TRANSCRIBE_LANGUAGE = "en" # Moonshine Tiny is English ASR | |
| TRANSCRIBE_DEVICE = "cpu" # CPU-only for zero-gpu deployment | |
| TRANSCRIBE_LOCAL_ONLY = os.environ.get("TRANSCRIBE_LOCAL_ONLY", "0").lower() in ("1", "true", "yes") | |
| # Set TRANSCRIBE_LOCAL_ONLY=1 to require a pre-downloaded Moonshine cache and disable remote Hugging Face Hub downloads. | |
| # Intent Parser: Qwen2.5-7B-Instruct (GGUF via llama-cpp-python) | |
| # Using q2_k quantization for smaller size on CPU (~2.3 GB). | |
| # Download from HF Hub on first run if not cached. | |
| QWEN_GGUF_PATH = MODELS_DIR / "qwen2.5-7b-instruct-q2_k.gguf" | |
| QWEN_N_GPU_LAYERS = 0 # CPU-only: no GPU layer offloading | |
| QWEN_N_CTX = 4096 # context window sufficient for a call transcript | |
| QWEN_MAX_TOKENS = 512 # max tokens for the structured JSON response | |
| QWEN_TEMPERATURE = 0.1 # near-deterministic for structured output | |
| # Evaluator: MiniCPM3-4B (CPU, bitsandbytes 4-bit) | |
| MINICPM_MODEL_ID = "openbmb/MiniCPM3-4B" | |
| MINICPM_DEVICE = "cpu" # runs after Qwen is done; no VRAM conflict | |
| MINICPM_MAX_TOKENS = 256 | |
| # VAD: Silero VAD (ONNX, CPU) | |
| VAD_SAMPLE_RATE = 16000 # Hz; Silero and Moonshine both use 16kHz | |
| VAD_CHUNK_MS = 250 # ms per audio chunk fed to VAD | |
| VAD_CHUNK_SAMPLES = int(VAD_SAMPLE_RATE * VAD_CHUNK_MS / 1000) # 4000 | |
| VAD_SILENCE_THRESHOLD = 0.5 | |
| VAD_SILENCE_DURATION_S = 0.8 | |
| VAD_MIN_SPEECH_S = 0.5 | |
| # SQLite database | |
| DB_PATH = DATA_DIR / "calls.db" | |
| # Scheduling rules injected into MiniCPM's system prompt. | |
| SCHEDULING_RULES = """ | |
| 1. Meetings can only be booked Monday-Friday, 09:00-18:00. | |
| 2. Minimum meeting duration is 15 minutes; maximum is 120 minutes. | |
| 3. Back-to-back meetings are not allowed; require a 15-minute gap between slots. | |
| 4. If the caller does not provide a date or time, ask for one before confirming. | |
| 5. If the requested slot is already booked, suggest the next available slot. | |
| 6. Always confirm the caller's name before booking. | |
| """ | |
| # Gradio UI | |
| APP_TITLE = "π AI Telecalling Agent" | |
| APP_DESCRIPTION = "Speak naturally β the agent will schedule your meeting automatically." | |
| SERVER_PORT = 7860 | |
| SERVER_NAME = "0.0.0.0" # bind to all interfaces for HF Spaces | |