"""ORM model definitions for the voicebox SQLite database.""" from datetime import datetime import uuid from sqlalchemy import Column, String, Integer, Float, DateTime, Text, ForeignKey, Boolean, JSON from sqlalchemy.ext.declarative import declarative_base from ..utils.capture_chords import ( default_push_to_talk_chord, default_toggle_to_talk_chord, ) Base = declarative_base() class VoiceProfile(Base): """Voice profile. voice_type discriminates three flavours: - "cloned" — traditional reference-audio profiles (all cloning engines) - "preset" — engine-specific pre-built voice (e.g. Kokoro voices) - "designed" — text-described voice (e.g. Qwen CustomVoice, future) """ __tablename__ = "profiles" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) name = Column(String, unique=True, nullable=False) description = Column(Text) language = Column(String, default="en") avatar_path = Column(String, nullable=True) effects_chain = Column(Text, nullable=True) # Voice type system — added v0.3.x voice_type = Column(String, default="cloned") # "cloned" | "preset" | "designed" preset_engine = Column(String, nullable=True) # e.g. "kokoro" — only for preset preset_voice_id = Column(String, nullable=True) # e.g. "am_adam" — only for preset design_prompt = Column(Text, nullable=True) # text description — only for designed default_engine = Column(String, nullable=True) # auto-selected engine, locked for preset # Free-form character prompt used by the compose button and the # personality-rewrite path on /generate. Describes *what* this voice # says and how, orthogonal to how it sounds (handled by the preset / # cloning metadata above). personality = Column(Text, nullable=True) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) class ProfileSample(Base): """Audio sample attached to a voice profile.""" __tablename__ = "profile_samples" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) profile_id = Column(String, ForeignKey("profiles.id"), nullable=False) audio_path = Column(String, nullable=False) reference_text = Column(Text, nullable=False) class Generation(Base): """A single TTS generation.""" __tablename__ = "generations" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) profile_id = Column(String, ForeignKey("profiles.id"), nullable=False) text = Column(Text, nullable=False) language = Column(String, default="en") audio_path = Column(String, nullable=True) duration = Column(Float, nullable=True) seed = Column(Integer) instruct = Column(Text) engine = Column(String, default="qwen") model_size = Column(String, nullable=True) status = Column(String, default="completed") error = Column(Text, nullable=True) is_favorited = Column(Boolean, default=False) # Origin of this generation — "manual" for plain /generate calls, # "personality_speak" for rows whose text was rewritten through the # profile's personality LLM before TTS. Future sources (bulk import, # agent replies, etc.) can extend this. source = Column(String, nullable=False, default="manual") created_at = Column(DateTime, default=datetime.utcnow) class Story(Base): """A story that sequences multiple generations.""" __tablename__ = "stories" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) name = Column(String, nullable=False) description = Column(Text) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) class StoryItem(Base): """Links a generation to a story at a specific timecode.""" __tablename__ = "story_items" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) story_id = Column(String, ForeignKey("stories.id"), nullable=False) generation_id = Column(String, ForeignKey("generations.id"), nullable=False) version_id = Column(String, ForeignKey("generation_versions.id"), nullable=True) start_time_ms = Column(Integer, nullable=False, default=0) track = Column(Integer, nullable=False, default=0) trim_start_ms = Column(Integer, nullable=False, default=0) trim_end_ms = Column(Integer, nullable=False, default=0) volume = Column(Float, nullable=False, default=1.0) created_at = Column(DateTime, default=datetime.utcnow) class Project(Base): """Audio studio project (JSON blob).""" __tablename__ = "projects" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) name = Column(String, nullable=False) data = Column(Text) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) class GenerationVersion(Base): """A version of a generation's audio (original, processed, alternate takes).""" __tablename__ = "generation_versions" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) generation_id = Column(String, ForeignKey("generations.id"), nullable=False) label = Column(String, nullable=False) audio_path = Column(String, nullable=False) effects_chain = Column(Text, nullable=True) source_version_id = Column(String, ForeignKey("generation_versions.id"), nullable=True) is_default = Column(Boolean, default=False) created_at = Column(DateTime, default=datetime.utcnow) class EffectPreset(Base): """Saved effect chain preset.""" __tablename__ = "effect_presets" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) name = Column(String, unique=True, nullable=False) description = Column(Text, nullable=True) effects_chain = Column(Text, nullable=False) is_builtin = Column(Boolean, default=False) sort_order = Column(Integer, default=100) created_at = Column(DateTime, default=datetime.utcnow) class AudioChannel(Base): """Audio output channel (bus).""" __tablename__ = "audio_channels" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) name = Column(String, nullable=False) is_default = Column(Boolean, default=False) created_at = Column(DateTime, default=datetime.utcnow) class ChannelDeviceMapping(Base): """Mapping between a channel and an OS audio device.""" __tablename__ = "channel_device_mappings" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) channel_id = Column(String, ForeignKey("audio_channels.id"), nullable=False) device_id = Column(String, nullable=False) class ProfileChannelMapping(Base): """Many-to-many mapping between voice profiles and audio channels.""" __tablename__ = "profile_channel_mappings" profile_id = Column(String, ForeignKey("profiles.id"), primary_key=True) channel_id = Column(String, ForeignKey("audio_channels.id"), primary_key=True) class CaptureSettings(Base): """Singleton row holding user defaults for the capture/refine flow. Kept server-side so every window, CLI client, and API consumer reads the same preferences. The ``id`` column is always 1. """ __tablename__ = "capture_settings" id = Column(Integer, primary_key=True, default=1) stt_model = Column(String, nullable=False, default="turbo") language = Column(String, nullable=False, default="auto") auto_refine = Column(Boolean, nullable=False, default=True) llm_model = Column(String, nullable=False, default="0.6B") smart_cleanup = Column(Boolean, nullable=False, default=True) self_correction = Column(Boolean, nullable=False, default=True) preserve_technical = Column(Boolean, nullable=False, default=True) allow_auto_paste = Column(Boolean, nullable=False, default=True) default_playback_voice_id = Column(String, nullable=True) # Default OFF — opting in is what triggers the macOS Input Monitoring TCC # prompt. We deliberately don't spawn the global keyboard tap until the # user flips this on so a fresh-install user doesn't see a scary # "Voicebox would like to receive keystrokes from any application" dialog # before they've even opened the Captures tab. hotkey_enabled = Column(Boolean, nullable=False, default=False) # Lists of keytap key names (e.g. "MetaRight", "ControlRight"). Right-hand # modifiers by default so they don't collide with left-hand shortcuts. chord_push_to_talk_keys = Column( JSON, nullable=False, default=default_push_to_talk_chord ) chord_toggle_to_talk_keys = Column( JSON, nullable=False, default=default_toggle_to_talk_chord ) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) class GenerationSettings(Base): """Singleton row for long-form TTS generation preferences.""" __tablename__ = "generation_settings" id = Column(Integer, primary_key=True, default=1) max_chunk_chars = Column(Integer, nullable=False, default=800) crossfade_ms = Column(Integer, nullable=False, default=50) normalize_audio = Column(Boolean, nullable=False, default=True) autoplay_on_generate = Column(Boolean, nullable=False, default=True) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) class MCPClientBinding(Base): """Per-MCP-client settings (voice profile, engine, personality default). Lets users bind distinct voices to distinct agents — e.g. Claude Code speaks in "Morgan," Cursor in "Scarlett." The MCP client identifies itself via the ``X-Voicebox-Client-Id`` HTTP header; direct-HTTP clients set it in their MCP config's ``headers`` block, the stdio shim forwards it from the ``VOICEBOX_CLIENT_ID`` env var. """ __tablename__ = "mcp_client_bindings" client_id = Column(String, primary_key=True) label = Column(String, nullable=True) # display name profile_id = Column(String, ForeignKey("profiles.id"), nullable=True) default_engine = Column(String, nullable=True) # When true, voicebox.speak routes through the profile's personality LLM # (rewrite) before TTS by default. Callers can still override per call. default_personality = Column(Boolean, nullable=False, default=False) last_seen_at = Column(DateTime, nullable=True) created_at = Column(DateTime, default=datetime.utcnow) updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) class Capture(Base): """A single voice input capture (dictation, recording, or uploaded file). Stores the original audio alongside the raw transcript and, optionally, a refined version produced by the LLM. Refinement flags are serialized as JSON so we can reproduce the prompt that generated the refined text. """ __tablename__ = "captures" id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4())) audio_path = Column(String, nullable=False) source = Column(String, nullable=False, default="file") # dictation | recording | file language = Column(String, nullable=True) duration_ms = Column(Integer, nullable=True) transcript_raw = Column(Text, nullable=False, default="") transcript_refined = Column(Text, nullable=True) stt_model = Column(String, nullable=True) llm_model = Column(String, nullable=True) refinement_flags = Column(Text, nullable=True) # JSON blob created_at = Column(DateTime, default=datetime.utcnow)