File size: 2,560 Bytes
b651663 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | """
Pydantic models for the scraper API.
"""
from datetime import datetime
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
class TaskInput(BaseModel):
title: str = Field(..., description="Task title")
content: str = Field(..., description="Task body")
source_url: str = Field(..., description="Source URL")
task_type: Optional[str] = Field(None, description="Task type")
images: Optional[List[str]] = Field(default_factory=list, description="Task images")
variants: Optional[List[str]] = Field(default_factory=list, description="Answer variants")
class TaskResponse(BaseModel):
id: Optional[int] = None
title: str
content: str
source_url: str
task_type: Optional[str] = None
images: Optional[List[str]] = None
variants: Optional[List[str]] = None
task_number: Optional[int] = None
source_kind: Optional[str] = None
task_guid: Optional[str] = None
can_check_answer: bool = False
scraped_at: Optional[datetime] = None
rubert_analysis: Optional[Dict[str, Any]] = None
class ScrapeRequest(BaseModel):
subject: Optional[str] = Field("russian", description="Subject code")
urls: Optional[List[str]] = Field(default_factory=list, description="Explicit URLs to scrape")
query: Optional[str] = Field(None, description="Search query")
full_refresh: bool = Field(
False,
description="When true, also scrape official archives and open-variant PDFs",
)
class ScrapeResponse(BaseModel):
success: bool
tasks_scraped: int
tasks_saved: int
duplicates_skipped: int
message: str
class CheckAnswerRequest(BaseModel):
answer: str = Field(..., min_length=1, description="Submitted answer")
class CheckAnswerResponse(BaseModel):
success: bool
is_correct: bool
status_code: str
status_label: str
submitted_answer: str
normalized_answer: str
message: str
class AnalysisRequest(BaseModel):
text: str = Field(..., description="Text to analyze")
class AnalysisResponse(BaseModel):
category: str
keywords: List[str]
confidence: float
embedding: Optional[List[float]] = None
class HealthResponse(BaseModel):
status: str
timestamp: datetime
services: Dict[str, bool]
class StatsResponse(BaseModel):
total_tasks: int
by_type: Dict[str, int]
last_scrape: Optional[datetime] = None
class ErrorResponse(BaseModel):
error: str
detail: Optional[str] = None
timestamp: datetime = Field(default_factory=datetime.utcnow)
|