Spaces:
Sleeping
Sleeping
File size: 2,859 Bytes
673435a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
"""
Transcript Model
"""
from datetime import datetime
from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey, JSON, Float
from sqlalchemy.orm import relationship
from .base import Base
from ..core.security_encryption import EncryptedString
class Transcript(Base):
"""Transcript database model"""
__tablename__ = "transcripts"
id = Column(Integer, primary_key=True, index=True)
audio_file_id = Column(Integer, ForeignKey("audio_files.id"), nullable=True, index=True)
audio_file_id = Column(Integer, ForeignKey("audio_files.id"), nullable=True, index=True)
# user_id removed (Auth disabled for portfolio)
# Transcript content - ENCRYPTED
raw_text = Column(EncryptedString(10000), nullable=True) # Original transcription
processed_text = Column(EncryptedString(10000), nullable=True) # After NLP processing
# Segments with timestamps and speaker info (JSON array)
# Format: [{"start": 0.0, "end": 1.5, "text": "Hello", "speaker": "SPEAKER_1", "confidence": 0.95}]
segments = Column(JSON, nullable=True)
# Word-level timestamps (JSON array)
# Format: [{"word": "hello", "start": 0.0, "end": 0.5, "confidence": 0.98}]
words = Column(JSON, nullable=True)
# Language info
language = Column(String(10), nullable=True) # Transcription language
translation_language = Column(String(10), nullable=True) # If translated
translated_text = Column(Text, nullable=True)
# NLP Analysis (Phase 2)
sentiment = Column(JSON, nullable=True) # {"overall": "positive", "score": 0.8, "segments": [...]}
topics = Column(JSON, nullable=True) # ["technology", "business"]
keywords = Column(JSON, nullable=True) # [{"word": "AI", "score": 0.9}]
action_items = Column(JSON, nullable=True) # [{"text": "Email John", "assignee": "Speaker 1"}]
attendees = Column(JSON, nullable=True) # ["Speaker 1", "Speaker 2"]
summary = Column(EncryptedString(5000), nullable=True) # ENCRYPTED
# Metadata
confidence = Column(Float, nullable=True) # Overall confidence score
duration = Column(Float, nullable=True) # Audio duration in seconds
word_count = Column(Integer, nullable=True)
# Timestamps
created_at = Column(DateTime, default=datetime.utcnow, index=True)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
# Relationships
audio_file = relationship("AudioFile", back_populates="transcripts")
audio_file = relationship("AudioFile", back_populates="transcripts")
# user relationship removed
def __repr__(self):
preview = self.raw_text[:50] + "..." if self.raw_text and len(self.raw_text) > 50 else self.raw_text
return f"<Transcript(id={self.id}, preview='{preview}')>"
# Import Float for confidence field
|