Spaces:
Sleeping
Sleeping
| """ | |
| Transcript Model | |
| """ | |
| from datetime import datetime | |
| from sqlalchemy import Column, Integer, String, Text, DateTime, ForeignKey, JSON, Float | |
| from sqlalchemy.orm import relationship | |
| from .base import Base | |
| from ..core.security_encryption import EncryptedString | |
| class Transcript(Base): | |
| """Transcript database model""" | |
| __tablename__ = "transcripts" | |
| id = Column(Integer, primary_key=True, index=True) | |
| audio_file_id = Column(Integer, ForeignKey("audio_files.id"), nullable=True, index=True) | |
| audio_file_id = Column(Integer, ForeignKey("audio_files.id"), nullable=True, index=True) | |
| # user_id removed (Auth disabled for portfolio) | |
| # Transcript content - ENCRYPTED | |
| raw_text = Column(EncryptedString(10000), nullable=True) # Original transcription | |
| processed_text = Column(EncryptedString(10000), nullable=True) # After NLP processing | |
| # Segments with timestamps and speaker info (JSON array) | |
| # Format: [{"start": 0.0, "end": 1.5, "text": "Hello", "speaker": "SPEAKER_1", "confidence": 0.95}] | |
| segments = Column(JSON, nullable=True) | |
| # Word-level timestamps (JSON array) | |
| # Format: [{"word": "hello", "start": 0.0, "end": 0.5, "confidence": 0.98}] | |
| words = Column(JSON, nullable=True) | |
| # Language info | |
| language = Column(String(10), nullable=True) # Transcription language | |
| translation_language = Column(String(10), nullable=True) # If translated | |
| translated_text = Column(Text, nullable=True) | |
| # NLP Analysis (Phase 2) | |
| sentiment = Column(JSON, nullable=True) # {"overall": "positive", "score": 0.8, "segments": [...]} | |
| topics = Column(JSON, nullable=True) # ["technology", "business"] | |
| keywords = Column(JSON, nullable=True) # [{"word": "AI", "score": 0.9}] | |
| action_items = Column(JSON, nullable=True) # [{"text": "Email John", "assignee": "Speaker 1"}] | |
| attendees = Column(JSON, nullable=True) # ["Speaker 1", "Speaker 2"] | |
| summary = Column(EncryptedString(5000), nullable=True) # ENCRYPTED | |
| # Metadata | |
| confidence = Column(Float, nullable=True) # Overall confidence score | |
| duration = Column(Float, nullable=True) # Audio duration in seconds | |
| word_count = Column(Integer, nullable=True) | |
| # Timestamps | |
| created_at = Column(DateTime, default=datetime.utcnow, index=True) | |
| updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) | |
| # Relationships | |
| audio_file = relationship("AudioFile", back_populates="transcripts") | |
| audio_file = relationship("AudioFile", back_populates="transcripts") | |
| # user relationship removed | |
| def __repr__(self): | |
| preview = self.raw_text[:50] + "..." if self.raw_text and len(self.raw_text) > 50 else self.raw_text | |
| return f"<Transcript(id={self.id}, preview='{preview}')>" | |
| # Import Float for confidence field | |