File size: 873 Bytes
b5e0c74
 
 
 
 
 
 
 
 
9707a84
b5e0c74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
from typing import Any


class SourceType(str, Enum):
    PDF = "pdf"
    ARXIV = "arxiv"
    MEDIUM = "medium"


@dataclass(frozen=True)
class Document:
    source_type: SourceType
    title: str
    text: str
    source: str
    metadata: dict[str, Any] = field(default_factory=dict)


@dataclass(frozen=True)
class Chunk:
    id: str
    text: str
    index: int
    source_type: SourceType
    source: str
    title: str
    metadata: dict[str, Any] = field(default_factory=dict)


@dataclass(frozen=True)
class IngestionResult:
    document: Document
    chunks: list[Chunk]
    collection_name: str
    export_path: Path


@dataclass(frozen=True)
class SearchResult:
    score: float
    text: str
    title: str
    source: str
    source_type: str
    metadata: dict[str, Any]