| """ |
| Validation Agent |
| |
| Validates business profiles for schema compliance, completeness, and data quality. |
| """ |
| import re |
| from typing import List, Dict, Any, Optional |
| from datetime import datetime |
|
|
| from pydantic import BaseModel, Field |
|
|
| from backend.models.schemas import ( |
| BusinessProfile, |
| BusinessInfo, |
| Product, |
| Service, |
| ValidationError as ProfileValidationError, |
| ValidationInput as ValidationInputSchema, |
| ) |
| from backend.models.enums import BusinessType |
| from backend.utils.logger import get_logger |
|
|
|
|
| logger = get_logger(__name__) |
|
|
|
|
| class ValidationOutput(BaseModel): |
| """ |
| Output from validation |
| """ |
| job_id: str |
| is_valid: bool |
| errors: List[ProfileValidationError] = Field(default_factory=list) |
| warnings: List[ProfileValidationError] = Field(default_factory=list) |
| completeness_score: float = Field(0.0, ge=0.0, le=1.0) |
| field_scores: Dict[str, float] = Field(default_factory=dict) |
| validated_profile: Optional[BusinessProfile] = None |
| validated_at: datetime = Field(default_factory=datetime.now) |
|
|
|
|
| class ValidationAgent: |
| """ |
| Validates business profiles |
| |
| Features: |
| - Schema validation |
| - Completeness scoring |
| - Data quality checks |
| - Business rule validation |
| - Anomaly detection |
| """ |
| |
| def __init__(self): |
| """Initialize Validation Agent""" |
| self.errors = [] |
| self.warnings = [] |
| |
| def validate(self, input: ValidationInputSchema) -> ValidationOutput: |
| """ |
| Validate business profile |
| |
| Args: |
| input: Validation input with profile |
| |
| Returns: |
| ValidationOutput with results |
| """ |
| start_time = datetime.now() |
| self.errors = [] |
| self.warnings = [] |
| |
| logger.info(f"Starting validation for job {input.job_id}") |
| |
| try: |
| profile = input.profile |
| |
| if not profile: |
| return ValidationOutput( |
| job_id=input.job_id, |
| is_valid=False, |
| errors=[ProfileValidationError( |
| field="profile", |
| error_type="missing", |
| message="No profile to validate", |
| severity="error" |
| )], |
| warnings=[], |
| completeness_score=0.0, |
| field_scores={}, |
| validated_profile=None, |
| validated_at=datetime.now() |
| ) |
| |
| |
| self._validate_business_info(profile.business_info) |
| |
| |
| if profile.business_type == BusinessType.PRODUCT or profile.business_type == BusinessType.MIXED: |
| if profile.products: |
| self._validate_products(profile.products) |
| else: |
| self.warnings.append(ProfileValidationError( |
| field="products", |
| error_type="missing", |
| message="Product business has no products", |
| severity="warning" |
| )) |
| |
| |
| if profile.business_type == BusinessType.SERVICE or profile.business_type == BusinessType.MIXED: |
| if profile.services: |
| self._validate_services(profile.services) |
| else: |
| self.warnings.append(ProfileValidationError( |
| field="services", |
| error_type="missing", |
| message="Service business has no services", |
| severity="warning" |
| )) |
| |
| |
| completeness_score = self._calculate_completeness(profile) |
| field_scores = self._calculate_field_scores(profile) |
| |
| |
| is_valid = len(self.errors) == 0 |
| |
| logger.info( |
| f"Validation complete: {is_valid}, " |
| f"completeness: {completeness_score:.0%}, " |
| f"{len(self.errors)} errors, {len(self.warnings)} warnings" |
| ) |
| |
| return ValidationOutput( |
| job_id=input.job_id, |
| is_valid=is_valid, |
| errors=self.errors, |
| warnings=self.warnings, |
| completeness_score=completeness_score, |
| field_scores=field_scores, |
| validated_profile=profile, |
| validated_at=datetime.now() |
| ) |
| |
| except Exception as e: |
| logger.error(f"Validation failed: {e}") |
| return ValidationOutput( |
| job_id=input.job_id, |
| is_valid=False, |
| errors=[ProfileValidationError( |
| field="profile", |
| error_type="validation_error", |
| message=str(e), |
| severity="error" |
| )], |
| warnings=[], |
| completeness_score=0.0, |
| field_scores={}, |
| validated_profile=None, |
| validated_at=datetime.now() |
| ) |
| |
| def _validate_business_info(self, business_info: BusinessInfo): |
| """ |
| Validate business information |
| |
| Args: |
| business_info: BusinessInfo object |
| """ |
| if not business_info: |
| self.errors.append(ProfileValidationError( |
| field="business_info", |
| error_type="missing", |
| message="Business information is missing", |
| severity="error" |
| )) |
| return |
| |
| |
| if not business_info.name: |
| self.warnings.append(ProfileValidationError( |
| field="business_info.name", |
| error_type="missing", |
| message="Business name not found", |
| severity="warning" |
| )) |
| |
| |
| if not business_info.description: |
| self.warnings.append(ProfileValidationError( |
| field="business_info.description", |
| error_type="missing", |
| message="Business description not found", |
| severity="warning" |
| )) |
| elif len(business_info.description) < 20: |
| self.warnings.append(ProfileValidationError( |
| field="business_info.description", |
| error_type="too_short", |
| message="Business description is too short (< 20 chars)", |
| severity="warning" |
| )) |
| |
| |
| if business_info.contact: |
| self._validate_contact(business_info.contact) |
| |
| |
| if business_info.location: |
| self._validate_location(business_info.location) |
| |
| |
| if business_info.working_hours: |
| self._validate_working_hours(business_info.working_hours) |
| |
| def _validate_contact(self, contact): |
| """ |
| Validate contact information |
| |
| Args: |
| contact: ContactInfo object |
| """ |
| |
| if contact.email: |
| if not self._is_valid_email(contact.email): |
| self.errors.append(ProfileValidationError( |
| field="business_info.contact.email", |
| error_type="invalid_format", |
| message=f"Invalid email format: {contact.email}", |
| severity="error" |
| )) |
| |
| |
| if contact.phone: |
| if not self._is_valid_phone(contact.phone): |
| self.warnings.append(ProfileValidationError( |
| field="business_info.contact.phone", |
| error_type="invalid_format", |
| message=f"Phone number format may be invalid: {contact.phone}", |
| severity="warning" |
| )) |
| |
| |
| if contact.website: |
| if not self._is_valid_url(contact.website): |
| self.warnings.append(ProfileValidationError( |
| field="business_info.contact.website", |
| error_type="invalid_format", |
| message=f"Website URL may be invalid: {contact.website}", |
| severity="warning" |
| )) |
| |
| def _validate_location(self, location): |
| """ |
| Validate location information |
| |
| Args: |
| location: Location object |
| """ |
| |
| if not any([location.address, location.city, location.state, location.country]): |
| self.warnings.append(ProfileValidationError( |
| field="business_info.location", |
| error_type="incomplete", |
| message="Location has no address, city, state, or country", |
| severity="warning" |
| )) |
| |
| def _validate_working_hours(self, working_hours): |
| """ |
| Validate working hours |
| |
| Args: |
| working_hours: WorkingHours object |
| """ |
| |
| days = [ |
| working_hours.monday, |
| working_hours.tuesday, |
| working_hours.wednesday, |
| working_hours.thursday, |
| working_hours.friday, |
| working_hours.saturday, |
| working_hours.sunday |
| ] |
| |
| if not any(days): |
| self.warnings.append(ProfileValidationError( |
| field="business_info.working_hours", |
| error_type="missing", |
| message="No working hours specified", |
| severity="warning" |
| )) |
| |
| def _validate_products(self, products: List[Product]): |
| """ |
| Validate product list |
| |
| Args: |
| products: List of Product objects |
| """ |
| if not products: |
| return |
| |
| for i, product in enumerate(products): |
| prefix = f"products[{i}]" |
| |
| |
| if not product.name: |
| self.warnings.append(ProfileValidationError( |
| field=f"{prefix}.name", |
| error_type="missing", |
| message=f"Product {i+1} has no name", |
| severity="warning" |
| )) |
| |
| |
| if product.pricing: |
| self._validate_pricing(product.pricing, f"{prefix}.pricing") |
| |
| def _validate_services(self, services: List[Service]): |
| """ |
| Validate service list |
| |
| Args: |
| services: List of Service objects |
| """ |
| if not services: |
| return |
| |
| for i, service in enumerate(services): |
| prefix = f"services[{i}]" |
| |
| |
| if not service.name: |
| self.warnings.append(ProfileValidationError( |
| field=f"{prefix}.name", |
| error_type="missing", |
| message=f"Service {i+1} has no name", |
| severity="warning" |
| )) |
| |
| |
| if not service.description: |
| self.warnings.append(ProfileValidationError( |
| field=f"{prefix}.description", |
| error_type="missing", |
| message=f"Service {i+1} has no description", |
| severity="warning" |
| )) |
| |
| |
| if service.pricing: |
| self._validate_pricing(service.pricing, f"{prefix}.pricing") |
| |
| def _validate_pricing(self, pricing, prefix: str): |
| """ |
| Validate pricing information |
| |
| Args: |
| pricing: Pricing object |
| prefix: Field prefix for error messages |
| """ |
| |
| if pricing.base_price is not None: |
| if pricing.base_price < 0: |
| self.errors.append(ProfileValidationError( |
| field=f"{prefix}.base_price", |
| error_type="invalid_value", |
| message="Base price cannot be negative", |
| severity="error" |
| )) |
| elif pricing.base_price > 1000000: |
| self.warnings.append(ProfileValidationError( |
| field=f"{prefix}.base_price", |
| error_type="suspicious_value", |
| message=f"Base price seems very high: {pricing.base_price}", |
| severity="warning" |
| )) |
| |
| |
| if pricing.discount_price is not None: |
| if pricing.discount_price < 0: |
| self.errors.append(ProfileValidationError( |
| field=f"{prefix}.discount_price", |
| error_type="invalid_value", |
| message="Discount price cannot be negative", |
| severity="error" |
| )) |
| elif pricing.base_price and pricing.discount_price > pricing.base_price: |
| self.errors.append(ProfileValidationError( |
| field=f"{prefix}.discount_price", |
| error_type="invalid_value", |
| message="Discount price cannot be higher than base price", |
| severity="error" |
| )) |
| |
| def _calculate_completeness(self, profile: BusinessProfile) -> float: |
| """ |
| Calculate overall completeness score |
| |
| Args: |
| profile: BusinessProfile object |
| |
| Returns: |
| Completeness score (0.0-1.0) |
| """ |
| scores = [] |
| |
| |
| business_score = self._score_business_info(profile.business_info) |
| scores.append(business_score * 0.4) |
| |
| |
| if profile.products: |
| product_score = self._score_products(profile.products) |
| scores.append(product_score * 0.3) |
| |
| |
| if profile.services: |
| service_score = self._score_services(profile.services) |
| scores.append(service_score * 0.3) |
| |
| return min(sum(scores), 1.0) |
| |
| def _score_business_info(self, business_info: BusinessInfo) -> float: |
| """ |
| Score business info completeness |
| |
| Args: |
| business_info: BusinessInfo object |
| |
| Returns: |
| Score (0.0-1.0) |
| """ |
| if not business_info: |
| return 0.0 |
| |
| score = 0.0 |
| total = 0.0 |
| |
| |
| total += 2.0 |
| if business_info.name: |
| score += 2.0 |
| |
| |
| total += 1.0 |
| if business_info.description: |
| score += 1.0 |
| |
| |
| total += 1.0 |
| if business_info.contact: |
| if business_info.contact.email: |
| score += 0.5 |
| if business_info.contact.phone: |
| score += 0.5 |
| |
| |
| total += 1.0 |
| if business_info.location: |
| if business_info.location.city: |
| score += 0.5 |
| if business_info.location.address: |
| score += 0.5 |
| |
| return min(score / total, 1.0) |
| |
| def _score_products(self, products: List[Product]) -> float: |
| """ |
| Score products completeness |
| |
| Args: |
| products: List of Product objects |
| |
| Returns: |
| Score (0.0-1.0) |
| """ |
| if not products: |
| return 0.0 |
| |
| scores = [] |
| for product in products: |
| product_score = 0.0 |
| |
| if product.name: |
| product_score += 0.3 |
| if product.description: |
| product_score += 0.2 |
| if product.pricing and product.pricing.base_price: |
| product_score += 0.3 |
| if product.specifications: |
| product_score += 0.2 |
| |
| scores.append(product_score) |
| |
| return sum(scores) / len(scores) if scores else 0.0 |
| |
| def _score_services(self, services: List[Service]) -> float: |
| """ |
| Score services completeness |
| |
| Args: |
| services: List of Service objects |
| |
| Returns: |
| Score (0.0-1.0) |
| """ |
| if not services: |
| return 0.0 |
| |
| scores = [] |
| for service in services: |
| service_score = 0.0 |
| |
| if service.name: |
| service_score += 0.25 |
| if service.description: |
| service_score += 0.25 |
| if service.pricing and service.pricing.base_price: |
| service_score += 0.25 |
| if service.details or service.itinerary: |
| service_score += 0.25 |
| |
| scores.append(service_score) |
| |
| return sum(scores) / len(scores) if scores else 0.0 |
| |
| def _calculate_field_scores(self, profile: BusinessProfile) -> Dict[str, float]: |
| """ |
| Calculate individual field scores |
| |
| Args: |
| profile: BusinessProfile object |
| |
| Returns: |
| Dictionary of field scores |
| """ |
| return { |
| 'business_info': self._score_business_info(profile.business_info), |
| 'products': self._score_products(profile.products) if profile.products else 0.0, |
| 'services': self._score_services(profile.services) if profile.services else 0.0 |
| } |
| |
| def _is_valid_email(self, email: str) -> bool: |
| """ |
| Validate email format |
| |
| Args: |
| email: Email string |
| |
| Returns: |
| True if valid |
| """ |
| pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' |
| return bool(re.match(pattern, email)) |
| |
| def _is_valid_phone(self, phone: str) -> bool: |
| """ |
| Validate phone number (basic check) |
| |
| Args: |
| phone: Phone string |
| |
| Returns: |
| True if likely valid |
| """ |
| |
| cleaned = re.sub(r'[\s\-\(\)\.]', '', phone) |
| |
| |
| digits = re.sub(r'\D', '', cleaned) |
| return len(digits) >= 7 |
| |
| def _is_valid_url(self, url: str) -> bool: |
| """ |
| Validate URL format |
| |
| Args: |
| url: URL string |
| |
| Returns: |
| True if valid |
| """ |
| pattern = r'^https?://[^\s]+$' |
| return bool(re.match(pattern, url)) |
|
|
|
|
| class ValidationInput: |
| """ |
| Input for validation - DEPRECATED, use ValidationInputSchema from schemas |
| """ |
| pass |
|
|