File size: 2,703 Bytes
9a02b57
 
 
 
 
 
 
 
959b417
 
 
9a02b57
 
 
 
 
959b417
 
9a02b57
 
959b417
 
 
e38990c
3e394c4
 
9a02b57
959b417
e38990c
 
 
 
959b417
 
 
9a02b57
959b417
 
 
9a02b57
 
959b417
 
 
 
 
 
 
9a02b57
959b417
9a02b57
 
959b417
9a02b57
959b417
9a02b57
959b417
9a02b57
 
 
 
 
959b417
 
9a02b57
959b417
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""
Central configuration for the telecalling agent.
Edit the values in this file to match your local setup.
"""

import os
from pathlib import Path

# Project root
ROOT_DIR = Path(__file__).parent
DATA_DIR = ROOT_DIR / "data"
MODELS_DIR = ROOT_DIR / "models"

DATA_DIR.mkdir(exist_ok=True)
MODELS_DIR.mkdir(exist_ok=True)

# Hugging Face
# Optional: set HF_TOKEN for private models or authenticated downloads.
HF_TOKEN = os.environ.get("HF_TOKEN", "")

# ASR: Hugging Face Moonshine
TRANSCRIBE_MODEL_ID = "UsefulSensors/moonshine-tiny"
TRANSCRIBE_LANGUAGE = "en"          # Moonshine Tiny is English ASR
TRANSCRIBE_DEVICE = "cpu"           # CPU-only for zero-gpu deployment
TRANSCRIBE_LOCAL_ONLY = os.environ.get("TRANSCRIBE_LOCAL_ONLY", "0").lower() in ("1", "true", "yes")
# Set TRANSCRIBE_LOCAL_ONLY=1 to require a pre-downloaded Moonshine cache and disable remote Hugging Face Hub downloads.

# Intent Parser: Qwen2.5-7B-Instruct (GGUF via llama-cpp-python)
# Using q2_k quantization for smaller size on CPU (~2.3 GB).
# Download from HF Hub on first run if not cached.
QWEN_GGUF_PATH = MODELS_DIR / "qwen2.5-7b-instruct-q2_k.gguf"
QWEN_N_GPU_LAYERS = 0      # CPU-only: no GPU layer offloading
QWEN_N_CTX = 4096          # context window sufficient for a call transcript
QWEN_MAX_TOKENS = 512      # max tokens for the structured JSON response
QWEN_TEMPERATURE = 0.1     # near-deterministic for structured output

# Evaluator: MiniCPM3-4B (CPU, bitsandbytes 4-bit)
MINICPM_MODEL_ID = "openbmb/MiniCPM3-4B"
MINICPM_DEVICE = "cpu"     # runs after Qwen is done; no VRAM conflict
MINICPM_MAX_TOKENS = 256

# VAD: Silero VAD (ONNX, CPU)
VAD_SAMPLE_RATE = 16000    # Hz; Silero and Moonshine both use 16kHz
VAD_CHUNK_MS = 250         # ms per audio chunk fed to VAD
VAD_CHUNK_SAMPLES = int(VAD_SAMPLE_RATE * VAD_CHUNK_MS / 1000)  # 4000
VAD_SILENCE_THRESHOLD = 0.5
VAD_SILENCE_DURATION_S = 0.8
VAD_MIN_SPEECH_S = 0.5

# SQLite database
DB_PATH = DATA_DIR / "calls.db"

# Scheduling rules injected into MiniCPM's system prompt.
SCHEDULING_RULES = """
1. Meetings can only be booked Monday-Friday, 09:00-18:00.
2. Minimum meeting duration is 15 minutes; maximum is 120 minutes.
3. Back-to-back meetings are not allowed; require a 15-minute gap between slots.
4. If the caller does not provide a date or time, ask for one before confirming.
5. If the requested slot is already booked, suggest the next available slot.
6. Always confirm the caller's name before booking.
"""

# Gradio UI
APP_TITLE = "📞 AI Telecalling Agent"
APP_DESCRIPTION = "Speak naturally — the agent will schedule your meeting automatically."
SERVER_PORT = 7860
SERVER_NAME = "0.0.0.0"   # bind to all interfaces for HF Spaces