File size: 2,828 Bytes
aca8ab4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""
LangGraph state schema for the multi-agent workflow.
"""
from typing import Any, Dict, List, Optional, TypedDict
from utils.schemas import Paper, PaperChunk, Analysis, SynthesisResult, ValidatedOutput


class AgentState(TypedDict, total=False):
    """
    State dictionary that flows through the LangGraph workflow.

    This TypedDict provides type hints for LangGraph's state management
    while maintaining compatibility with the existing dictionary-based state.
    """
    # Input fields
    query: str  # User's research question
    category: Optional[str]  # arXiv category filter (e.g., "cs.AI")
    num_papers: int  # Number of papers to analyze

    # Retriever outputs
    papers: List[Paper]  # Papers retrieved from arXiv
    chunks: List[PaperChunk]  # Chunked paper content

    # Analyzer outputs
    analyses: List[Analysis]  # Individual paper analyses
    filtered_analyses: List[Analysis]  # Analyses with confidence > 0

    # Synthesis output
    synthesis: Optional[SynthesisResult]  # Cross-paper synthesis

    # Citation output
    validated_output: Optional[ValidatedOutput]  # Final validated output

    # Metadata and tracking
    errors: List[str]  # Accumulated error messages
    token_usage: Dict[str, int]  # Token usage tracking
    start_time: float  # Unix timestamp
    model_desc: Dict[str, str]  # Model metadata

    # LangFuse tracing metadata
    trace_id: Optional[str]  # LangFuse trace ID
    session_id: Optional[str]  # User session ID
    user_id: Optional[str]  # User identifier (for multi-user systems)


def create_initial_state(
    query: str,
    category: Optional[str],
    num_papers: int,
    model_desc: Dict[str, str],
    start_time: float,
    session_id: Optional[str] = None,
    user_id: Optional[str] = None,
) -> AgentState:
    """
    Create initial state for LangGraph workflow.

    Args:
        query: User's research question
        category: arXiv category filter
        num_papers: Number of papers to analyze
        model_desc: Model metadata (llm_model, embedding_model)
        start_time: Unix timestamp
        session_id: Optional session identifier
        user_id: Optional user identifier

    Returns:
        Initial AgentState dictionary
    """
    return {
        "query": query,
        "category": category,
        "num_papers": num_papers,
        "papers": [],
        "chunks": [],
        "analyses": [],
        "filtered_analyses": [],
        "synthesis": None,
        "validated_output": None,
        "errors": [],
        "token_usage": {
            "input_tokens": 0,
            "output_tokens": 0,
            "embedding_tokens": 0,
        },
        "start_time": start_time,
        "model_desc": model_desc,
        "trace_id": None,
        "session_id": session_id,
        "user_id": user_id,
    }