Bromeo777 commited on
Commit
0eda893
·
verified ·
1 Parent(s): 644b58f

Add app\models\extraction.py

Browse files
Files changed (1) hide show
  1. app//models//extraction.py +130 -0
app//models//extraction.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from typing import Optional, Dict, Any, TYPE_CHECKING
3
+ import enum
4
+
5
+ from sqlalchemy import (
6
+ String,
7
+ Integer,
8
+ DateTime,
9
+ ForeignKey,
10
+ Text,
11
+ Enum,
12
+ Index,
13
+ JSON # FIX: Reviewer 1 #30 - Native JSON support
14
+ )
15
+ from sqlalchemy.orm import Mapped, mapped_column, relationship
16
+ from sqlalchemy.sql import func
17
+
18
+ from app.models.base import Base
19
+
20
+ if TYPE_CHECKING:
21
+ from app.models.paper import Paper
22
+ from app.models.user import User
23
+
24
+
25
+ class ExtractionStatus(str, enum.Enum):
26
+ """State machine for extraction job lifecycle."""
27
+ QUEUED = "queued"
28
+ PROCESSING = "processing"
29
+ COMPLETED = "completed"
30
+ FAILED = "failed"
31
+
32
+
33
+ class Extraction(Base):
34
+ """
35
+ Stores structured AI extractions from research papers.
36
+
37
+ System Role:
38
+ - Provides a native JSON data store for multi-dimensional research metadata.
39
+ - Orchestrates async AI tasks via status-tracking and job_id.
40
+ """
41
+
42
+ __tablename__ = "extractions"
43
+
44
+ # ------------------------------------------------------------------
45
+ # Identifiers & Ownership
46
+ # ------------------------------------------------------------------
47
+ id: Mapped[int] = mapped_column(Integer, primary_key=True)
48
+
49
+ paper_id: Mapped[int] = mapped_column(
50
+ Integer,
51
+ ForeignKey("papers.id", ondelete="CASCADE"),
52
+ nullable=False,
53
+ )
54
+
55
+ user_id: Mapped[Optional[int]] = mapped_column(
56
+ Integer,
57
+ ForeignKey("users.id", ondelete="SET NULL"),
58
+ nullable=True,
59
+ )
60
+
61
+ # ------------------------------------------------------------------
62
+ # Job Coordination
63
+ # ------------------------------------------------------------------
64
+ job_id: Mapped[str] = mapped_column(
65
+ String(100),
66
+ unique=True,
67
+ nullable=False,
68
+ index=True
69
+ )
70
+
71
+ status: Mapped[ExtractionStatus] = mapped_column(
72
+ Enum(ExtractionStatus, name="extraction_status"),
73
+ default=ExtractionStatus.QUEUED,
74
+ server_default="queued",
75
+ )
76
+
77
+ error_message: Mapped[Optional[str]] = mapped_column(Text)
78
+
79
+ # ------------------------------------------------------------------
80
+ # Native JSON Storage (FIX: Reviewer 1 #30)
81
+ # ------------------------------------------------------------------
82
+ # PICO is now a structured dictionary for native DB-level querying
83
+ pico_data: Mapped[Optional[Dict[str, Any]]] = mapped_column(
84
+ JSON,
85
+ nullable=True,
86
+ comment="Structured PICO (Population, Intervention, Comparison, Outcome)"
87
+ )
88
+
89
+ # Risk-of-Bias (RoB) assessment
90
+ risk_of_bias: Mapped[Optional[Dict[str, Any]]] = mapped_column(
91
+ JSON,
92
+ nullable=True,
93
+ comment="Native JSON structure for domain-specific bias metrics"
94
+ )
95
+
96
+ # ------------------------------------------------------------------
97
+ # Audit & Metadata
98
+ # ------------------------------------------------------------------
99
+ model_version: Mapped[str] = mapped_column(
100
+ String(50),
101
+ default="llama-3.1-70b",
102
+ server_default="llama-3.1-70b",
103
+ )
104
+
105
+ created_at: Mapped[datetime] = mapped_column(
106
+ DateTime(timezone=True),
107
+ server_default=func.now(),
108
+ )
109
+
110
+ updated_at: Mapped[datetime] = mapped_column(
111
+ DateTime(timezone=True),
112
+ server_default=func.now(),
113
+ onupdate=func.now(),
114
+ )
115
+
116
+ completed_at: Mapped[Optional[datetime]] = mapped_column(
117
+ DateTime(timezone=True)
118
+ )
119
+
120
+ # ------------------------------------------------------------------
121
+ # Relationships
122
+ # ------------------------------------------------------------------
123
+ paper: Mapped["Paper"] = relationship("Paper", lazy="joined")
124
+ user: Mapped[Optional["User"]] = relationship("User", lazy="select")
125
+
126
+ # ------------------------------------------------------------------
127
+ # Representation
128
+ # ------------------------------------------------------------------
129
+ def __repr__(self) -> str:
130
+ return f"<Extraction(job_id={self.job_id}, status={self.status})>"