voiceforge / backend /app /models /transcript.py
lordofgaming
Initial VoiceForge deployment (clean)
673435a
"""
Transcript Model
"""
from datetime import datetime
from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey, JSON, Float
from sqlalchemy.orm import relationship
from .base import Base
from ..core.security_encryption import EncryptedString
class Transcript(Base):
"""Transcript database model"""
__tablename__ = "transcripts"
id = Column(Integer, primary_key=True, index=True)
audio_file_id = Column(Integer, ForeignKey("audio_files.id"), nullable=True, index=True)
audio_file_id = Column(Integer, ForeignKey("audio_files.id"), nullable=True, index=True)
# user_id removed (Auth disabled for portfolio)
# Transcript content - ENCRYPTED
raw_text = Column(EncryptedString(10000), nullable=True) # Original transcription
processed_text = Column(EncryptedString(10000), nullable=True) # After NLP processing
# Segments with timestamps and speaker info (JSON array)
# Format: [{"start": 0.0, "end": 1.5, "text": "Hello", "speaker": "SPEAKER_1", "confidence": 0.95}]
segments = Column(JSON, nullable=True)
# Word-level timestamps (JSON array)
# Format: [{"word": "hello", "start": 0.0, "end": 0.5, "confidence": 0.98}]
words = Column(JSON, nullable=True)
# Language info
language = Column(String(10), nullable=True) # Transcription language
translation_language = Column(String(10), nullable=True) # If translated
translated_text = Column(Text, nullable=True)
# NLP Analysis (Phase 2)
sentiment = Column(JSON, nullable=True) # {"overall": "positive", "score": 0.8, "segments": [...]}
topics = Column(JSON, nullable=True) # ["technology", "business"]
keywords = Column(JSON, nullable=True) # [{"word": "AI", "score": 0.9}]
action_items = Column(JSON, nullable=True) # [{"text": "Email John", "assignee": "Speaker 1"}]
attendees = Column(JSON, nullable=True) # ["Speaker 1", "Speaker 2"]
summary = Column(EncryptedString(5000), nullable=True) # ENCRYPTED
# Metadata
confidence = Column(Float, nullable=True) # Overall confidence score
duration = Column(Float, nullable=True) # Audio duration in seconds
word_count = Column(Integer, nullable=True)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow, index=True)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
audio_file = relationship("AudioFile", back_populates="transcripts")
audio_file = relationship("AudioFile", back_populates="transcripts")
# user relationship removed
def __repr__(self):
preview = self.raw_text[:50] + "..." if self.raw_text and len(self.raw_text) > 50 else self.raw_text
return f"<Transcript(id={self.id}, preview='{preview}')>"
# Import Float for confidence field