Spaces:

Merlintxu
/

conversation

Runtime error

App Files Files Community

Merlintxu commited on Jan 8

Commit

198f50f

verified ·

1 Parent(s): f4b2919

Update conversation_storyline/schemas.py

Browse files

Files changed (1) hide show

conversation_storyline/schemas.py +92 -31

conversation_storyline/schemas.py CHANGED Viewed

@@ -1,31 +1,92 @@
-from dataclasses import dataclass
-from typing import Optional, Dict, Any
-@dataclass
-class Interaction:
-    message_id: int
-    speaker: str
-    text: str
-    # inferred
-    reply_to_id: Optional[int] = None
-    topic_id: Optional[int] = None
-    topic_label: Optional[str] = None
-    # optional metrics
-    sentiment: Optional[float] = None
-    confidence_reply: Optional[float] = None
-@dataclass
-class PipelineArtifacts:
-    out_dir: str
-    storyline_png: str
-    storyline_html: str
-    metrics_csv: str
-    interactions_jsonl: str
-    graph_json: str
-    summary_text: str
-    figs: Dict[str, Any]

+# conversation_storyline/schemas.py
+from __future__ import annotations
+from typing import List, Optional, Literal, Dict, Any
+from pydantic import BaseModel, Field, ConfigDict
+InteractionType = Literal[
+    "statement",
+    "question",
+    "answer",
+    "interruption",
+    "agreement",
+    "disagreement",
+]
+class RawMessage(BaseModel):
+    """
+    Representa un mensaje crudo tras la ingesta (TXT/CSV o texto pegado).
+    """
+    model_config = ConfigDict(extra="ignore")
+    id: int = Field(..., description="ID secuencial global (0..n-1).")
+    speaker: str = Field(..., description="Nombre normalizado del hablante.")
+    content: str = Field(..., description="Texto original del turno.")
+    timestamp: Optional[str] = Field(None, description="Timestamp opcional si existe.")
+class InteractionNode(BaseModel):
+    """
+    Turno estructurado (salida del módulo SIE / LLM).
+    """
+    model_config = ConfigDict(extra="ignore")
+    id: int = Field(..., description="ID secuencial global del turno.")
+    speaker: str = Field(..., description="Nombre normalizado del hablante.")
+    content_summary: str = Field(..., description="Resumen breve (1 frase).")
+    reply_to_id: Optional[int] = Field(
+        None,
+        description="ID del mensaje al que responde. Null si inicia hilo o no se infiere con confianza.",
+    )
+    topic_label: str = Field(..., description="Etiqueta breve del tema (ej: 'Presupuesto').")
+    is_topic_shift: bool = Field(False, description="True si hay cambio sustancial de tema.")
+    sentiment_score: float = Field(..., ge=-1.0, le=1.0, description="Sentimiento [-1, 1].")
+    interaction_type: InteractionType = Field(..., description="Tipo de acto de habla.")
+    confidence: float = Field(0.75, ge=0.0, le=1.0, description="Confianza global del modelo.")
+    target_speaker: Optional[str] = Field(None, description="Hablante objetivo si reply_to_id se resuelve.")
+class ConversationSegment(BaseModel):
+    """
+    Salida estructurada por ventana/segmento de análisis.
+    """
+    model_config = ConfigDict(extra="ignore")
+    interactions: List[InteractionNode] = Field(..., description="Interacciones estructuradas en este segmento.")
+    active_participants: List[str] = Field(default_factory=list, description="Participantes activos del segmento.")
+    dominant_topic: str = Field(..., description="Tema dominante del segmento.")
+    notes: Optional[str] = Field(None, description="Notas opcionales.")
+class GraphExport(BaseModel):
+    """
+    Exportación tipo D3: nodes/links.
+    """
+    model_config = ConfigDict(extra="ignore")
+    nodes: List[Dict[str, Any]]
+    links: List[Dict[str, Any]]
+class ReplyToPick(BaseModel):
+    """
+    Output estructurado para elegir reply_to_id entre candidatos (Top-K).
+    """
+    model_config = ConfigDict(extra="ignore")
+    reply_to_id: Optional[int] = Field(None, description="ID elegido (o null).")
+    confidence: float = Field(0.75, ge=0.0, le=1.0, description="Confianza del pick.")
+    rationale: Optional[str] = Field(None, description="Breve justificación.")
+class TopicLabeling(BaseModel):
+    """
+    Output estructurado para etiquetar un segmento temático detectado (change-points).
+    """
+    model_config = ConfigDict(extra="ignore")
+    topic_label: str = Field(..., description="Etiqueta breve del tema (3-6 palabras).")
+    keywords: List[str] = Field(default_factory=list, description="Keywords representativas.")