File size: 2,560 Bytes
b651663
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
"""
Pydantic models for the scraper API.
"""

from datetime import datetime
from typing import Any, Dict, List, Optional

from pydantic import BaseModel, Field


class TaskInput(BaseModel):
    title: str = Field(..., description="Task title")
    content: str = Field(..., description="Task body")
    source_url: str = Field(..., description="Source URL")
    task_type: Optional[str] = Field(None, description="Task type")
    images: Optional[List[str]] = Field(default_factory=list, description="Task images")
    variants: Optional[List[str]] = Field(default_factory=list, description="Answer variants")


class TaskResponse(BaseModel):
    id: Optional[int] = None
    title: str
    content: str
    source_url: str
    task_type: Optional[str] = None
    images: Optional[List[str]] = None
    variants: Optional[List[str]] = None
    task_number: Optional[int] = None
    source_kind: Optional[str] = None
    task_guid: Optional[str] = None
    can_check_answer: bool = False
    scraped_at: Optional[datetime] = None
    rubert_analysis: Optional[Dict[str, Any]] = None


class ScrapeRequest(BaseModel):
    subject: Optional[str] = Field("russian", description="Subject code")
    urls: Optional[List[str]] = Field(default_factory=list, description="Explicit URLs to scrape")
    query: Optional[str] = Field(None, description="Search query")
    full_refresh: bool = Field(
        False,
        description="When true, also scrape official archives and open-variant PDFs",
    )


class ScrapeResponse(BaseModel):
    success: bool
    tasks_scraped: int
    tasks_saved: int
    duplicates_skipped: int
    message: str


class CheckAnswerRequest(BaseModel):
    answer: str = Field(..., min_length=1, description="Submitted answer")


class CheckAnswerResponse(BaseModel):
    success: bool
    is_correct: bool
    status_code: str
    status_label: str
    submitted_answer: str
    normalized_answer: str
    message: str


class AnalysisRequest(BaseModel):
    text: str = Field(..., description="Text to analyze")


class AnalysisResponse(BaseModel):
    category: str
    keywords: List[str]
    confidence: float
    embedding: Optional[List[float]] = None


class HealthResponse(BaseModel):
    status: str
    timestamp: datetime
    services: Dict[str, bool]


class StatsResponse(BaseModel):
    total_tasks: int
    by_type: Dict[str, int]
    last_scrape: Optional[datetime] = None


class ErrorResponse(BaseModel):
    error: str
    detail: Optional[str] = None
    timestamp: datetime = Field(default_factory=datetime.utcnow)