File size: 3,197 Bytes
1c167a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import enum
import json
import uuid
from datetime import datetime
from typing import Any

from sqlalchemy import DateTime, Float, ForeignKey, Integer, String, Text
from sqlalchemy.orm import Mapped, mapped_column, relationship

from app.database import Base


class JobStatus(str, enum.Enum):
    pending = "pending"
    processing = "processing"
    transcribing = "transcribing"
    analyzing_frames = "analyzing_frames"
    embedding = "embedding"
    ready = "ready"
    failed = "failed"
    cancelled = "cancelled"


class Job(Base):
    __tablename__ = "jobs"

    id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
    status: Mapped[str] = mapped_column(String(32), default=JobStatus.pending.value, index=True)
    original_filename: Mapped[str] = mapped_column(String(512))
    storage_path: Mapped[str] = mapped_column(String(1024))
    duration_seconds: Mapped[float | None] = mapped_column(Float, nullable=True)
    title: Mapped[str | None] = mapped_column(String(256), nullable=True)
    subject: Mapped[str | None] = mapped_column(String(128), nullable=True)
    thumbnail: Mapped[str | None] = mapped_column(String(512), nullable=True)
    progress_message: Mapped[str | None] = mapped_column(String(512), nullable=True)
    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
    pipeline_timings_json: Mapped[str | None] = mapped_column(Text, nullable=True)
    activity_log_json: Mapped[str | None] = mapped_column(Text, nullable=True)
    whisper_language: Mapped[str | None] = mapped_column(String(32), nullable=True)
    whisper_task: Mapped[str] = mapped_column(String(16), default="transcribe")
    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
    updated_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)

    segments: Mapped[list["TranscriptSegment"]] = relationship(
        back_populates="job",
        cascade="all, delete-orphan",
        order_by="TranscriptSegment.start_sec",
    )

    @property
    def pipeline_timings(self) -> dict[str, Any] | None:
        if not self.pipeline_timings_json:
            return None
        try:
            data = json.loads(self.pipeline_timings_json)
            return data if isinstance(data, dict) else None
        except json.JSONDecodeError:
            return None

    @property
    def activity_log(self) -> list[dict[str, Any]]:
        if not self.activity_log_json:
            return []
        try:
            data = json.loads(self.activity_log_json)
            return data if isinstance(data, list) else []
        except json.JSONDecodeError:
            return []


class TranscriptSegment(Base):
    __tablename__ = "transcript_segments"

    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
    job_id: Mapped[str] = mapped_column(String(36), ForeignKey("jobs.id", ondelete="CASCADE"), index=True)
    start_sec: Mapped[float] = mapped_column(Float, index=True)
    end_sec: Mapped[float] = mapped_column(Float)
    text: Mapped[str] = mapped_column(Text)

    job: Mapped[Job] = relationship(back_populates="segments")