File size: 2,779 Bytes
52a0fe9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a2aa7c3
52a0fe9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38365d2
52a0fe9
 
 
 
 
 
 
 
 
 
38365d2
 
 
52a0fe9
 
 
 
 
38365d2
52a0fe9
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
"""
Pydantic models for request/response schemas.
"""
from pydantic import BaseModel
from typing import Optional, List, Dict, Any
from enum import Enum
import time
import uuid


class TaskStatus(str, Enum):
    PENDING = "pending"
    PROCESSING = "processing"
    COMPLETED = "completed"
    ERROR = "error"


class FileType(str, Enum):
    PDF = "pdf"
    DOCX = "docx"
    IMAGE = "image"


class UploadResponse(BaseModel):
    file_id: str
    filename: str
    file_type: str
    size_bytes: int
    size_human: str
    message: str


class DocumentMetadata(BaseModel):
    title: Optional[str] = None
    author: Optional[str] = None
    creation_date: Optional[str] = None
    modification_date: Optional[str] = None
    page_count: Optional[int] = None
    word_count: int = 0
    character_count: int = 0
    file_type: str = ""
    extra: Dict[str, Any] = {}


class ExtractionResult(BaseModel):
    raw_text: str
    metadata: DocumentMetadata
    success: bool = True
    error_message: Optional[str] = None
    extraction_time_ms: float = 0


class SummaryResult(BaseModel):
    summary: str
    key_points: List[str] = []
    original_length: int
    summary_length: int
    compression_ratio: float
    sentence_count: int
    algorithm: str


class Entity(BaseModel):
    text: str
    label: str
    label_description: str
    count: int = 1
    positions: List[int] = []


class EntityResult(BaseModel):
    entities: List[Entity]
    entity_counts: Dict[str, int]
    total_entities: int


class SentimentBreakdown(BaseModel):
    text: str
    compound: float
    positive: float
    negative: float
    neutral: float
    label: str


class SentimentResult(BaseModel):
    overall_compound: float
    overall_positive: float
    overall_negative: float
    overall_neutral: float
    overall_label: str
    sentence_breakdown: List[SentimentBreakdown]
    confidence: float


class ProcessingResult(BaseModel):
    file_id: str
    filename: str
    fileName: Optional[str] = None  # CamelCase for external testers
    file_type: str
    status: TaskStatus
    extraction: Optional[ExtractionResult] = None
    summary: Optional[SummaryResult] = None
    entities: Optional[EntityResult] = None
    sentiment: Optional[SentimentResult] = None
    processing_time_ms: float = 0
    error_message: Optional[str] = None
    timestamp: float = 0

    class Config:
        allow_population_by_field_name = True

    @staticmethod
    def create_pending(file_id: str, filename: str, file_type: str) -> "ProcessingResult":
        return ProcessingResult(
            file_id=file_id,
            filename=filename,
            fileName=filename,
            file_type=file_type,
            status=TaskStatus.PENDING,
            timestamp=time.time(),
        )