Source code for sparkdq.checks.row_level.timestamp_checks.timestamp_between_check
from typing import List
from pydantic import Field, model_validator
from sparkdq.checks.utils.base_comparison_check import BaseBetweenCheck
from sparkdq.core.base_config import BaseRowCheckConfig
from sparkdq.core.severity import Severity
from sparkdq.exceptions import InvalidCheckConfigurationError
from sparkdq.plugin.check_config_registry import register_check_config
class TimestampBetweenCheck(BaseBetweenCheck):
"""
Row-level data quality check that verifies timestamp values are within a defined range.
A row fails the check if **any** of the specified columns contain a timestamp value that is
less than `min_value` or greater than `max_value`. Boundary inclusiveness is configurable.
"""
def __init__(
self,
check_id: str,
columns: List[str],
min_value: str,
max_value: str,
inclusive: tuple[bool, bool],
severity: Severity = Severity.CRITICAL,
):
"""
Initialize a new TimestampBetweenCheck.
Args:
check_id (str): Unique identifier for the check instance.
columns (List[str]): Timestamp columns to validate.
min_value (str): Minimum allowed timestamp in ISO format.
max_value (str): Maximum allowed timestamp in ISO format.
inclusive (tuple): Tuple of booleans indicating whether to include the bounds.
severity (Severity): Severity level of the check result.
"""
super().__init__(
check_id=check_id,
columns=columns,
severity=severity,
min_value=min_value,
max_value=max_value,
inclusive=inclusive,
cast_type="timestamp",
)
[docs]
@register_check_config(check_name="timestamp-between-check")
class TimestampBetweenCheckConfig(BaseRowCheckConfig):
"""
Declarative configuration model for TimestampBetweenCheck.
Attributes:
columns (List[str]): The list of timestamp columns to validate.
min_value (str): Minimum allowed timestamp.
max_value (str): Maximum allowed timestamp.
inclusive (tuple): Optional tuple of booleans for boundary inclusion.
"""
check_class = TimestampBetweenCheck
columns: List[str] = Field(..., description="Timestamp columns to check")
min_value: str = Field(..., description="Minimum allowed timestamp (ISO format)", alias="min-value")
max_value: str = Field(..., description="Maximum allowed timestamp (ISO format)", alias="max-value")
inclusive: tuple[bool, bool] = Field((False, False), description="Whether to include [min, max] bounds")
[docs]
@model_validator(mode="after")
def validate_between_values(self) -> "TimestampBetweenCheckConfig":
"""
Validates that ``min_value`` and ``max_value`` are properly configured
and that ``min_value`` is not greater than ``max_value``.
Raises:
InvalidCheckConfigurationError: If min_value or max_value are not set or if min_value > max_value.
"""
if self.min_value > self.max_value:
raise InvalidCheckConfigurationError(
f"min_value ({self.min_value}) must not be greater than max_value ({self.max_value})"
)
return self