ai-rag / cv_module /src /api /schemas.py
robrtt's picture
Clean rebuild: all features fixed
149c85a
from pydantic import BaseModel, Field, HttpUrl
from typing import List, Optional
# === Shared ===
class BBoxSchema(BaseModel):
x1: float
y1: float
x2: float
y2: float
width: float
height: float
class DetectionSchema(BaseModel):
label: str
confidence: float
bbox: BBoxSchema
class_id: int
class OCRBoxSchema(BaseModel):
text: str
confidence: float
bbox: list
# === Requests ===
class AnalyzeURLRequest(BaseModel):
url: str = Field(..., description="URL gambar yang akan dianalisis")
run_caption: bool = Field(True, description="Generate image caption")
run_detection: bool = Field(False, description="Deteksi objek dengan YOLO (off by default — opt-in)")
run_ocr: bool = Field(False, description="Ekstrak teks dari gambar")
classification_labels: Optional[List[str]] = Field(
None,
description="Label untuk zero-shot CLIP classification, e.g. ['kucing','anjing']",
example=["indoor", "outdoor", "nature", "city"],
)
class ClassifyRequest(BaseModel):
url: str
labels: List[str] = Field(..., min_length=2, description="Minimal 2 label kandidat")
class SimilarityRequest(BaseModel):
url: str
text: str = Field(..., min_length=1)
class VisualQARequest(BaseModel):
url: str
question: str = Field(..., description="Pertanyaan tentang isi gambar")
# === Responses ===
class CaptionResponse(BaseModel):
caption: str
model: str
class DetectionResponse(BaseModel):
detections: List[DetectionSchema]
count: int
labels_summary: dict
image_width: int
image_height: int
inference_time_ms: float
class ClassificationResponse(BaseModel):
top_label: str
top_score: float
labels: List[str]
probabilities: List[float]
class OCRResponse(BaseModel):
full_text: str
boxes: List[OCRBoxSchema]
word_count: int
language: str
engine: str
class FullAnalysisResponse(BaseModel):
image_width: int
image_height: int
source: str
caption: Optional[CaptionResponse] = None
detections: Optional[DetectionResponse] = None
classification: Optional[ClassificationResponse] = None
ocr: Optional[OCRResponse] = None
summary_text: str = Field(..., description="Ringkasan teks dari semua model — siap dipakai sebagai konteks LLM")
models_used: List[str]
total_latency_ms: float
class SimilarityResponse(BaseModel):
similarity_score: float
text: str
interpretation: str
class VisualQAResponse(BaseModel):
question: str
answer: str