Spaces:

build-small-hackathon
/

TeleAgent

Running

File size: 2,703 Bytes

"""
Central configuration for the telecalling agent.
Edit the values in this file to match your local setup.
"""

import os
from pathlib import Path

# Project root
ROOT_DIR = Path(__file__).parent
DATA_DIR = ROOT_DIR / "data"
MODELS_DIR = ROOT_DIR / "models"

DATA_DIR.mkdir(exist_ok=True)
MODELS_DIR.mkdir(exist_ok=True)

# Hugging Face
# Optional: set HF_TOKEN for private models or authenticated downloads.
HF_TOKEN = os.environ.get("HF_TOKEN", "")

# ASR: Hugging Face Moonshine
TRANSCRIBE_MODEL_ID = "UsefulSensors/moonshine-tiny"
TRANSCRIBE_LANGUAGE = "en"          # Moonshine Tiny is English ASR
TRANSCRIBE_DEVICE = "cpu"           # CPU-only for zero-gpu deployment
TRANSCRIBE_LOCAL_ONLY = os.environ.get("TRANSCRIBE_LOCAL_ONLY", "0").lower() in ("1", "true", "yes")
# Set TRANSCRIBE_LOCAL_ONLY=1 to require a pre-downloaded Moonshine cache and disable remote Hugging Face Hub downloads.

# Intent Parser: Qwen2.5-7B-Instruct (GGUF via llama-cpp-python)
# Using q2_k quantization for smaller size on CPU (~2.3 GB).
# Download from HF Hub on first run if not cached.
QWEN_GGUF_PATH = MODELS_DIR / "qwen2.5-7b-instruct-q2_k.gguf"
QWEN_N_GPU_LAYERS = 0      # CPU-only: no GPU layer offloading
QWEN_N_CTX = 4096          # context window sufficient for a call transcript
QWEN_MAX_TOKENS = 512      # max tokens for the structured JSON response
QWEN_TEMPERATURE = 0.1     # near-deterministic for structured output

# Evaluator: MiniCPM3-4B (CPU, bitsandbytes 4-bit)
MINICPM_MODEL_ID = "openbmb/MiniCPM3-4B"
MINICPM_DEVICE = "cpu"     # runs after Qwen is done; no VRAM conflict
MINICPM_MAX_TOKENS = 256

# VAD: Silero VAD (ONNX, CPU)
VAD_SAMPLE_RATE = 16000    # Hz; Silero and Moonshine both use 16kHz
VAD_CHUNK_MS = 250         # ms per audio chunk fed to VAD
VAD_CHUNK_SAMPLES = int(VAD_SAMPLE_RATE * VAD_CHUNK_MS / 1000)  # 4000
VAD_SILENCE_THRESHOLD = 0.5
VAD_SILENCE_DURATION_S = 0.8
VAD_MIN_SPEECH_S = 0.5

# SQLite database
DB_PATH = DATA_DIR / "calls.db"

# Scheduling rules injected into MiniCPM's system prompt.
SCHEDULING_RULES = """
1. Meetings can only be booked Monday-Friday, 09:00-18:00.
2. Minimum meeting duration is 15 minutes; maximum is 120 minutes.
3. Back-to-back meetings are not allowed; require a 15-minute gap between slots.
4. If the caller does not provide a date or time, ask for one before confirming.
5. If the requested slot is already booked, suggest the next available slot.
6. Always confirm the caller's name before booking.
"""

# Gradio UI
APP_TITLE = "📞 AI Telecalling Agent"
APP_DESCRIPTION = "Speak naturally — the agent will schedule your meeting automatically."
SERVER_PORT = 7860
SERVER_NAME = "0.0.0.0"   # bind to all interfaces for HF Spaces