scraper / models.py
greeta's picture
Upload 3 files
b651663 verified
"""
Pydantic models for the scraper API.
"""
from datetime import datetime
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field
class TaskInput(BaseModel):
title: str = Field(..., description="Task title")
content: str = Field(..., description="Task body")
source_url: str = Field(..., description="Source URL")
task_type: Optional[str] = Field(None, description="Task type")
images: Optional[List[str]] = Field(default_factory=list, description="Task images")
variants: Optional[List[str]] = Field(default_factory=list, description="Answer variants")
class TaskResponse(BaseModel):
id: Optional[int] = None
title: str
content: str
source_url: str
task_type: Optional[str] = None
images: Optional[List[str]] = None
variants: Optional[List[str]] = None
task_number: Optional[int] = None
source_kind: Optional[str] = None
task_guid: Optional[str] = None
can_check_answer: bool = False
scraped_at: Optional[datetime] = None
rubert_analysis: Optional[Dict[str, Any]] = None
class ScrapeRequest(BaseModel):
subject: Optional[str] = Field("russian", description="Subject code")
urls: Optional[List[str]] = Field(default_factory=list, description="Explicit URLs to scrape")
query: Optional[str] = Field(None, description="Search query")
full_refresh: bool = Field(
False,
description="When true, also scrape official archives and open-variant PDFs",
)
class ScrapeResponse(BaseModel):
success: bool
tasks_scraped: int
tasks_saved: int
duplicates_skipped: int
message: str
class CheckAnswerRequest(BaseModel):
answer: str = Field(..., min_length=1, description="Submitted answer")
class CheckAnswerResponse(BaseModel):
success: bool
is_correct: bool
status_code: str
status_label: str
submitted_answer: str
normalized_answer: str
message: str
class AnalysisRequest(BaseModel):
text: str = Field(..., description="Text to analyze")
class AnalysisResponse(BaseModel):
category: str
keywords: List[str]
confidence: float
embedding: Optional[List[float]] = None
class HealthResponse(BaseModel):
status: str
timestamp: datetime
services: Dict[str, bool]
class StatsResponse(BaseModel):
total_tasks: int
by_type: Dict[str, int]
last_scrape: Optional[datetime] = None
class ErrorResponse(BaseModel):
error: str
detail: Optional[str] = None
timestamp: datetime = Field(default_factory=datetime.utcnow)