Merlintxu commited on
Commit
198f50f
verified
1 Parent(s): f4b2919

Update conversation_storyline/schemas.py

Browse files
Files changed (1) hide show
  1. conversation_storyline/schemas.py +92 -31
conversation_storyline/schemas.py CHANGED
@@ -1,31 +1,92 @@
1
- from dataclasses import dataclass
2
- from typing import Optional, Dict, Any
3
-
4
-
5
- @dataclass
6
- class Interaction:
7
- message_id: int
8
- speaker: str
9
- text: str
10
-
11
- # inferred
12
- reply_to_id: Optional[int] = None
13
- topic_id: Optional[int] = None
14
- topic_label: Optional[str] = None
15
-
16
- # optional metrics
17
- sentiment: Optional[float] = None
18
- confidence_reply: Optional[float] = None
19
-
20
-
21
- @dataclass
22
- class PipelineArtifacts:
23
- out_dir: str
24
- storyline_png: str
25
- storyline_html: str
26
- metrics_csv: str
27
- interactions_jsonl: str
28
- graph_json: str
29
-
30
- summary_text: str
31
- figs: Dict[str, Any]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # conversation_storyline/schemas.py
2
+ from __future__ import annotations
3
+
4
+ from typing import List, Optional, Literal, Dict, Any
5
+ from pydantic import BaseModel, Field, ConfigDict
6
+
7
+ InteractionType = Literal[
8
+ "statement",
9
+ "question",
10
+ "answer",
11
+ "interruption",
12
+ "agreement",
13
+ "disagreement",
14
+ ]
15
+
16
+ class RawMessage(BaseModel):
17
+ """
18
+ Representa un mensaje crudo tras la ingesta (TXT/CSV o texto pegado).
19
+ """
20
+ model_config = ConfigDict(extra="ignore")
21
+
22
+ id: int = Field(..., description="ID secuencial global (0..n-1).")
23
+ speaker: str = Field(..., description="Nombre normalizado del hablante.")
24
+ content: str = Field(..., description="Texto original del turno.")
25
+ timestamp: Optional[str] = Field(None, description="Timestamp opcional si existe.")
26
+
27
+
28
+ class InteractionNode(BaseModel):
29
+ """
30
+ Turno estructurado (salida del m贸dulo SIE / LLM).
31
+ """
32
+ model_config = ConfigDict(extra="ignore")
33
+
34
+ id: int = Field(..., description="ID secuencial global del turno.")
35
+ speaker: str = Field(..., description="Nombre normalizado del hablante.")
36
+ content_summary: str = Field(..., description="Resumen breve (1 frase).")
37
+
38
+ reply_to_id: Optional[int] = Field(
39
+ None,
40
+ description="ID del mensaje al que responde. Null si inicia hilo o no se infiere con confianza.",
41
+ )
42
+
43
+ topic_label: str = Field(..., description="Etiqueta breve del tema (ej: 'Presupuesto').")
44
+ is_topic_shift: bool = Field(False, description="True si hay cambio sustancial de tema.")
45
+ sentiment_score: float = Field(..., ge=-1.0, le=1.0, description="Sentimiento [-1, 1].")
46
+ interaction_type: InteractionType = Field(..., description="Tipo de acto de habla.")
47
+
48
+ confidence: float = Field(0.75, ge=0.0, le=1.0, description="Confianza global del modelo.")
49
+ target_speaker: Optional[str] = Field(None, description="Hablante objetivo si reply_to_id se resuelve.")
50
+
51
+
52
+ class ConversationSegment(BaseModel):
53
+ """
54
+ Salida estructurada por ventana/segmento de an谩lisis.
55
+ """
56
+ model_config = ConfigDict(extra="ignore")
57
+
58
+ interactions: List[InteractionNode] = Field(..., description="Interacciones estructuradas en este segmento.")
59
+ active_participants: List[str] = Field(default_factory=list, description="Participantes activos del segmento.")
60
+ dominant_topic: str = Field(..., description="Tema dominante del segmento.")
61
+ notes: Optional[str] = Field(None, description="Notas opcionales.")
62
+
63
+
64
+ class GraphExport(BaseModel):
65
+ """
66
+ Exportaci贸n tipo D3: nodes/links.
67
+ """
68
+ model_config = ConfigDict(extra="ignore")
69
+
70
+ nodes: List[Dict[str, Any]]
71
+ links: List[Dict[str, Any]]
72
+
73
+
74
+ class ReplyToPick(BaseModel):
75
+ """
76
+ Output estructurado para elegir reply_to_id entre candidatos (Top-K).
77
+ """
78
+ model_config = ConfigDict(extra="ignore")
79
+
80
+ reply_to_id: Optional[int] = Field(None, description="ID elegido (o null).")
81
+ confidence: float = Field(0.75, ge=0.0, le=1.0, description="Confianza del pick.")
82
+ rationale: Optional[str] = Field(None, description="Breve justificaci贸n.")
83
+
84
+
85
+ class TopicLabeling(BaseModel):
86
+ """
87
+ Output estructurado para etiquetar un segmento tem谩tico detectado (change-points).
88
+ """
89
+ model_config = ConfigDict(extra="ignore")
90
+
91
+ topic_label: str = Field(..., description="Etiqueta breve del tema (3-6 palabras).")
92
+ keywords: List[str] = Field(default_factory=list, description="Keywords representativas.")