File size: 3,159 Bytes
ba016aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from __future__ import annotations
from pydantic import BaseModel
from typing import Optional, List
from datetime import datetime


class DatasetInfo(BaseModel):
    id: int
    name: str
    file_name: Optional[str] = None
    sheet_name: Optional[str] = None
    dataset_role: str
    data_scope: str
    vector_fields: Optional[str] = None
    row_count: int = 0

    class Config:
        from_attributes = True


class TaskCreate(BaseModel):
    match_mode: str = "two_file"
    top_k: int = 3
    min_threshold: float = 0.70
    candidate_scope: str = "current_task_target"


class TaskProgress(BaseModel):
    id: int
    task_code: str
    status: str
    source_row_count: int
    target_row_count: int
    reused_vectors: int
    new_vectors: int
    progress_parse_source: int
    progress_parse_target: int
    progress_vectorize: int
    progress_load_candidates: int
    progress_similarity: int
    progress_rerank: int = 0
    progress_save_results: int

    class Config:
        from_attributes = True


class TaskDetail(BaseModel):
    id: int
    task_code: str
    match_mode: str
    candidate_scope: str
    top_k: int
    min_threshold: float
    status: str
    source_row_count: int
    target_row_count: int
    high_match_count: int
    low_confidence_count: int
    reused_vectors: int
    new_vectors: int
    source_dataset: Optional[DatasetInfo] = None
    target_dataset: Optional[DatasetInfo] = None
    created_time: Optional[datetime] = None
    updated_time: Optional[datetime] = None

    class Config:
        from_attributes = True


class TaskListItem(BaseModel):
    id: int
    task_code: str
    match_mode: str
    candidate_scope: str
    source_dataset_name: Optional[str] = None
    target_dataset_name: Optional[str] = None
    status: str
    is_archived: int = 0
    is_delete: int = 0
    created_time: Optional[datetime] = None


class MatchResultItem(BaseModel):
    id: int
    source_row_id: int
    source_row_number: int
    source_text: str
    target_text: str
    similarity_score: float
    rerank_score: Optional[float] = None
    match_level: str
    candidate_scope: Optional[str] = None
    is_confirmed: int = 0


class MatchResultPage(BaseModel):
    items: List[MatchResultItem]
    total: int
    page: int
    page_size: int


class CandidateDetail(BaseModel):
    rank: int
    rerank_rank: Optional[int] = None
    target_row_id: int
    target_text: str
    similarity_score: float
    rerank_score: Optional[float] = None
    match_level: str
    dataset_role: str
    candidate_scope: Optional[str] = None
    data_row_id: int
    is_confirmed: int = 0


class SourceWithCandidates(BaseModel):
    source_row_id: int
    source_text: str
    source_row_number: int
    dataset_role: str
    data_row_id: int
    candidates: List[CandidateDetail]


class SheetInfo(BaseModel):
    sheet_names: List[str]
    columns: dict


class UploadResponse(BaseModel):
    dataset_id: int
    file_name: str
    sheet_names: List[str]
    columns: dict
    all_columns: dict = {}


class SettingItem(BaseModel):
    key: str
    value: str


class SettingsResponse(BaseModel):
    settings: dict