Spaces:

iroy99
/

dataset-tool

Runtime error

App Files Files Community

dataset-tool / app /schemas /dataset.py

iaroy

Deploy full application code

fdc5d7a 7 months ago

raw

history blame contribute delete

3.82 kB

	import logging
	from typing import Dict, List, Optional, Any
	from datetime import datetime
	from pydantic import BaseModel, Field

	from app.schemas.dataset_common import ImpactLevel, DatasetMetrics

	# Log for this module
	log = logging.getLogger(__name__)

	# Supported strategies for dataset combination
	SUPPORTED_STRATEGIES = ["merge", "intersect", "filter"]

	class ImpactAssessment(BaseModel):
	dataset_id: str = Field(..., description="The ID of the dataset being assessed")
	impact_level: ImpactLevel = Field(..., description="The impact level: low, medium, or high")
	assessment_method: str = Field(
	"unknown",
	description="Method used to determine impact level (e.g., size_based, downloads_and_likes_based)"
	)
	metrics: DatasetMetrics = Field(
	...,
	description="Metrics used for impact assessment"
	)
	thresholds: Dict[str, Dict[str, str]] = Field(
	{},
	description="Thresholds used for determining impact levels (for reference)"
	)

	class DatasetInfo(BaseModel):
	id: str
	impact_level: Optional[ImpactLevel] = None
	impact_assessment: Optional[Dict] = None
	# Add other fields as needed
	class Config:
	extra = "allow" # Allow extra fields from the API

	class DatasetBase(BaseModel):
	name: str
	description: Optional[str] = None
	tags: Optional[List[str]] = None

	class DatasetCreate(DatasetBase):
	files: Optional[List[str]] = None

	class DatasetUpdate(DatasetBase):
	name: Optional[str] = None # Make fields optional for updates

	class Dataset(DatasetBase):
	id: int # or str depending on your ID format
	owner_id: str # Assuming user IDs are strings
	created_at: Optional[str] = None
	updated_at: Optional[str] = None
	class Config:
	pass # Removed orm_mode = True since ORM is not used

	class DatasetCombineRequest(BaseModel):
	source_datasets: List[str] = Field(..., description="List of dataset IDs to combine")
	name: str = Field(..., description="Name for the combined dataset")
	description: Optional[str] = Field(None, description="Description for the combined dataset")
	combination_strategy: str = Field("merge", description="Strategy to use when combining datasets (e.g., 'merge', 'intersect', 'filter')")
	filter_criteria: Optional[Dict[str, Any]] = Field(None, description="Criteria for filtering when combining datasets")

	class CombinedDataset(BaseModel):
	id: str = Field(..., description="ID of the combined dataset")
	name: str = Field(..., description="Name of the combined dataset")
	description: Optional[str] = Field(None, description="Description of the combined dataset")
	source_datasets: List[str] = Field(..., description="IDs of the source datasets")
	created_at: datetime = Field(..., description="Creation timestamp")
	created_by: str = Field(..., description="ID of the user who created this combined dataset")
	impact_level: Optional[ImpactLevel] = Field(None, description="Calculated impact level of the combined dataset")
	status: str = Field("processing", description="Status of the dataset combination process")
	combination_strategy: str = Field(..., description="Strategy used when combining datasets")
	metrics: Optional[DatasetMetrics] = Field(None, description="Metrics for the combined dataset")
	storage_bucket_id: Optional[str] = Field(None, description="ID of the storage bucket containing dataset files")
	storage_folder_path: Optional[str] = Field(None, description="Path to the dataset files within the bucket")
	class Config:
	extra = "allow" # Allow extra fields for flexibility

	__all__ = ["ImpactLevel", "ImpactAssessment", "DatasetInfo", "DatasetMetrics",
	"Dataset", "DatasetCreate", "DatasetUpdate", "DatasetCombineRequest", "CombinedDataset"]