Spaces:

robrtt
/

ai-rag

Sleeping

App Files Files Community

ai-rag / cv_module /src /api /schemas.py

robrtt

Clean rebuild: all features fixed

149c85a 15 days ago

raw

history blame contribute delete

2.58 kB

	from pydantic import BaseModel, Field, HttpUrl
	from typing import List, Optional


	# === Shared ===

	class BBoxSchema(BaseModel):
	x1: float
	y1: float
	x2: float
	y2: float
	width: float
	height: float


	class DetectionSchema(BaseModel):
	label: str
	confidence: float
	bbox: BBoxSchema
	class_id: int


	class OCRBoxSchema(BaseModel):
	text: str
	confidence: float
	bbox: list


	# === Requests ===

	class AnalyzeURLRequest(BaseModel):
	url: str = Field(..., description="URL gambar yang akan dianalisis")
	run_caption: bool = Field(True, description="Generate image caption")
	run_detection: bool = Field(False, description="Deteksi objek dengan YOLO (off by default — opt-in)")
	run_ocr: bool = Field(False, description="Ekstrak teks dari gambar")
	classification_labels: Optional[List[str]] = Field(
	None,
	description="Label untuk zero-shot CLIP classification, e.g. ['kucing','anjing']",
	example=["indoor", "outdoor", "nature", "city"],
	)


	class ClassifyRequest(BaseModel):
	url: str
	labels: List[str] = Field(..., min_length=2, description="Minimal 2 label kandidat")


	class SimilarityRequest(BaseModel):
	url: str
	text: str = Field(..., min_length=1)


	class VisualQARequest(BaseModel):
	url: str
	question: str = Field(..., description="Pertanyaan tentang isi gambar")


	# === Responses ===

	class CaptionResponse(BaseModel):
	caption: str
	model: str


	class DetectionResponse(BaseModel):
	detections: List[DetectionSchema]
	count: int
	labels_summary: dict
	image_width: int
	image_height: int
	inference_time_ms: float


	class ClassificationResponse(BaseModel):
	top_label: str
	top_score: float
	labels: List[str]
	probabilities: List[float]


	class OCRResponse(BaseModel):
	full_text: str
	boxes: List[OCRBoxSchema]
	word_count: int
	language: str
	engine: str


	class FullAnalysisResponse(BaseModel):
	image_width: int
	image_height: int
	source: str
	caption: Optional[CaptionResponse] = None
	detections: Optional[DetectionResponse] = None
	classification: Optional[ClassificationResponse] = None
	ocr: Optional[OCRResponse] = None
	summary_text: str = Field(..., description="Ringkasan teks dari semua model — siap dipakai sebagai konteks LLM")
	models_used: List[str]
	total_latency_ms: float


	class SimilarityResponse(BaseModel):
	similarity_score: float
	text: str
	interpretation: str


	class VisualQAResponse(BaseModel):
	question: str
	answer: str