#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# rule_engine/types/datatype.py
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of the project nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
import collections.abc
import datetime
import decimal
import types as pytypes
import typing
from typing import Any, TypeGuard, overload
from .definitions import (
_ArrayDataTypeDef,
_DATA_TYPE_UNDEFINED,
_DataTypeDef,
_FunctionDataTypeDef,
_MappingDataTypeDef,
_NullableDataTypeDef,
_PYTHON_FUNCTION_TYPE,
_ReferenceDataTypeDef,
_SetDataTypeDef,
NoneType,
)
from ._object import _ObjectDataTypeDef
[docs]
def iterable_member_value_type(python_value: Any) -> _DataTypeDef:
"""
Take a native *python_value* and return the corresponding data type of each of its members if the types are either
the same or NULL. NULL is considered a special case to allow nullable-values. This by extension means that an
iterable may not be defined as only capable of containing NULL values.
:return: The data type of the sequence members. This will never be NULL, because that is considered a special case.
It will either be UNSPECIFIED or one of the other types.
"""
subvalue_types = set()
for subvalue in python_value:
if DataType.is_definition(subvalue):
subvalue_type = subvalue
else:
try:
subvalue_type = DataType.from_value(subvalue)
except TypeError:
# unknown runtime values (e.g. OBJECT instances) are left for the declared type to validate
subvalue_type = _DATA_TYPE_UNDEFINED
subvalue_types.add(subvalue_type)
if DataType.NULL in subvalue_types:
# treat NULL as a special case, allowing typed arrays to be a specified type *or* NULL
# this however makes it impossible to define an array with a type of NULL
subvalue_types.remove(DataType.NULL)
if len(subvalue_types) == 1:
subvalue_type = subvalue_types.pop()
else:
subvalue_type = DataType.UNDEFINED
return subvalue_type
class DataTypeMeta(type):
_members_: tuple[str, ...]
def __new__(metacls, cls: str, bases: tuple[type, ...], classdict: dict[str, Any]) -> 'DataTypeMeta':
data_type = super().__new__(metacls, cls, bases, classdict)
members = []
for key, value in classdict.items():
if not key.upper() == key:
continue
if not isinstance(value, _DataTypeDef):
continue
members.append(key)
data_type._members_ = tuple(members)
return data_type
def __contains__(cls, item: object) -> bool:
return item in cls._members_
def __getitem__(cls, item: str) -> _DataTypeDef:
if item not in cls._members_:
raise KeyError(item)
return getattr(cls, item)
def __iter__(cls) -> 'collections.abc.Iterator[str]':
yield from cls._members_
def __len__(cls) -> int:
return len(cls._members_)
[docs]
class DataType(metaclass=DataTypeMeta):
"""
A collection of constants representing the different supported data types. There are three ways to compare data
types. All three are effectively the same when dealing with scalars.
Equality checking
.. code-block::
dt == DataType.TYPE
This is the most explicit form of testing and when dealing with compound data types, it recursively checks that
all of the member types are also equal.
Type checking
.. code-block::
DataType.is_type(dt, DataType.TYPE)
This checks that the data type belongs to the same family as the given sentinel. For scalar types this is
equivalent to an equality check; for compound types (e.g. :py:attr:`.ARRAY`, :py:attr:`.MAPPING`) it matches
any parameterization of that type without inspecting member types. When used in an ``if`` branch mypy narrows
the type of *dt* to the matching concrete class (e.g. after ``DataType.is_type(dt, DataType.NULLABLE)`` mypy
knows *dt* is a :py:class:`~rule_engine.types._NullableDataTypeDef`).
Compatibility checking
.. code-block::
DataType.is_compatible(dt, DataType.TYPE)
This checks that the types are compatible without any kind of conversion. When dealing with compound data types,
this ensures that the member types are either the same or :py:attr:`~.UNDEFINED`.
"""
ARRAY = _ArrayDataTypeDef('ARRAY', tuple)
BYTES = _DataTypeDef('BYTES', bytes)
BOOLEAN = _DataTypeDef('BOOLEAN', bool)
DATETIME = _DataTypeDef('DATETIME', datetime.datetime)
FLOAT = _DataTypeDef('FLOAT', decimal.Decimal)
FUNCTION = _FunctionDataTypeDef('FUNCTION', _PYTHON_FUNCTION_TYPE)
MAPPING = _MappingDataTypeDef('MAPPING', dict)
NULL = _DataTypeDef('NULL', NoneType)
NULLABLE = _NullableDataTypeDef('NULLABLE', object)
OBJECT = _ObjectDataTypeDef('OBJECT', object)
SET = _SetDataTypeDef('SET', set)
STRING = _DataTypeDef('STRING', str)
TIMEDELTA = _DataTypeDef('TIMEDELTA', datetime.timedelta)
UNDEFINED = _DATA_TYPE_UNDEFINED
"""
Undefined values. This constant can be used to indicate that a particular symbol is valid, but it's data type is
currently unknown.
"""
[docs]
@classmethod
def from_name(cls, name: str) -> _DataTypeDef:
"""
Get the data type from its name.
.. versionadded:: 2.0.0
:param str name: The name of the data type to retrieve.
:return: One of the constants.
"""
if not isinstance(name, str):
raise TypeError('from_name argument 1 must be str, not ' + type(name).__name__)
dt = getattr(cls, name, None)
if not isinstance(dt, _DataTypeDef):
raise ValueError("can not map name {0!r} to a compatible data type".format(name))
return dt
[docs]
@classmethod
def from_type(cls, python_type: Any) -> _DataTypeDef:
"""
Get the supported data type constant for the specified Python type/type hint. If the type or typehint can not be
mapped to a supported data type, then a :py:exc:`ValueError` exception will be raised. This function will not
return :py:attr:`.UNDEFINED`.
:param type python_type: The native Python type or type hint to retrieve the corresponding type constant for.
:return: One of the constants.
.. versionchanged:: 4.1.0
Added support for typehints.
"""
if not (isinstance(python_type, type) or hasattr(python_type, '__origin__')):
raise TypeError('from_type argument 1 must be a type or a type hint, not ' + type(python_type).__name__)
# Optional[X] / X | None resolve to NULLABLE(from_type(X)); unions of more than one non-None member are
# unsupported because Rule Engine has no sum type
origin = typing.get_origin(python_type)
if origin is typing.Union or origin is pytypes.UnionType:
args = typing.get_args(python_type)
non_none = tuple(arg for arg in args if arg is not NoneType)
if len(non_none) == 1 and len(args) == 2:
return cls.NULLABLE(cls.from_type(non_none[0]))
raise ValueError("can not map python type {0!r} to a compatible data type".format(python_type))
if python_type in (list, range, tuple):
return cls.ARRAY
elif python_type is bool:
return cls.BOOLEAN
elif python_type is bytes:
return cls.BYTES
elif python_type is datetime.date or python_type is datetime.datetime:
return cls.DATETIME
elif python_type is datetime.timedelta:
return cls.TIMEDELTA
elif python_type in (decimal.Decimal, float, int):
return cls.FLOAT
elif python_type is dict:
return cls.MAPPING
elif python_type is NoneType:
return cls.NULL
elif python_type is set:
return cls.SET
elif python_type is str:
return cls.STRING
elif python_type is _PYTHON_FUNCTION_TYPE:
return cls.FUNCTION
elif hasattr(python_type, "__origin__"):
origin_python_type = python_type.__origin__
maintype = cls.from_type(origin_python_type)
if origin_python_type in (list, tuple, set):
if hasattr(python_type, "__args__") and origin_python_type is not tuple:
valuetype = cls.from_type(python_type.__args__[0])
return maintype(valuetype) # type: ignore[operator]
if origin_python_type is dict:
if hasattr(python_type, "__args__"):
key_type = cls.from_type(python_type.__args__[0])
value_type = cls.from_type(python_type.__args__[1])
return maintype(key_type, value_type) # type: ignore[operator]
return maintype
raise ValueError("can not map python type {0!r} to a compatible data type".format(python_type.__name__))
[docs]
@classmethod
def from_value(cls, python_value: Any) -> _DataTypeDef:
"""
Get the supported data type constant for the specified Python value. If the value can not be mapped to a
supported data type, then a :py:exc:`TypeError` exception will be raised. This function will not return
:py:attr:`.UNDEFINED`.
:param python_value: The native Python value to retrieve the corresponding data type constant for.
:return: One of the constants.
"""
if isinstance(python_value, bool):
return cls.BOOLEAN
elif isinstance(python_value, bytes):
return cls.BYTES
elif isinstance(python_value, (datetime.date, datetime.datetime)):
return cls.DATETIME
elif isinstance(python_value, datetime.timedelta):
return cls.TIMEDELTA
elif isinstance(python_value, (decimal.Decimal, float, int)):
return cls.FLOAT
elif python_value is None:
return cls.NULL
elif isinstance(python_value, (set,)):
return cls.SET(value_type=iterable_member_value_type(python_value))
elif isinstance(python_value, (str,)):
return cls.STRING
elif isinstance(python_value, collections.abc.Mapping):
return cls.MAPPING(
key_type=iterable_member_value_type(python_value.keys()),
value_type=iterable_member_value_type(python_value.values())
)
elif isinstance(python_value, collections.abc.Sequence):
return cls.ARRAY(value_type=iterable_member_value_type(python_value))
elif callable(python_value):
return cls.FUNCTION
raise TypeError("can not map python type {0!r} to a compatible data type".format(type(python_value).__name__))
[docs]
@classmethod
def is_compatible(cls, dt1: _DataTypeDef, dt2: _DataTypeDef) -> bool:
"""
Check if two data type definitions are compatible without any kind of conversion. This evaluates to ``True``
when one or both are :py:attr:`.UNDEFINED` or both types are the same. In the case of compound data types (such
as :py:attr:`.ARRAY`) the member types are checked recursively in the same manner.
.. versionadded:: 2.1.0
:param dt1: The first data type to compare.
:param dt2: The second data type to compare.
:return: Whether or not the two types are compatible.
:rtype: bool
"""
if not (cls.is_definition(dt1) and cls.is_definition(dt2)):
raise TypeError('argument is not a data type definition')
if dt1 is _DATA_TYPE_UNDEFINED or dt2 is _DATA_TYPE_UNDEFINED:
return True
# unresolved forward references are treated as compatible with anything; actual resolution happens at rule
# parse time via Context.resolve_type
if isinstance(dt1, _ReferenceDataTypeDef) or isinstance(dt2, _ReferenceDataTypeDef):
return True
# NULLABLE is a parse-time marker: a NULLABLE(T) value is T-or-NULL at runtime, so it is compatible with
# T (non-null case), with NULL (null case), and with another NULLABLE whose inner type is compatible.
# This check is symmetric; argument-position strictness (rejecting NULLABLE(T) where plain T is required)
# lives at the caller.
if isinstance(dt1, _NullableDataTypeDef) or isinstance(dt2, _NullableDataTypeDef):
if dt1 == cls.NULL or dt2 == cls.NULL:
return True
inner1 = dt1.inner_type if isinstance(dt1, _NullableDataTypeDef) else dt1
inner2 = dt2.inner_type if isinstance(dt2, _NullableDataTypeDef) else dt2
return cls.is_compatible(inner1, inner2)
if dt1.is_scalar and dt2.is_scalar:
if DataType.is_type(dt1, DataType.FUNCTION) and DataType.is_type(dt2, DataType.FUNCTION):
if not cls.is_compatible(dt1.return_type, dt2.return_type):
return False
if dt1.argument_types != _DATA_TYPE_UNDEFINED and dt2.argument_types != _DATA_TYPE_UNDEFINED:
assert isinstance(dt1.argument_types, tuple) and isinstance(dt2.argument_types, tuple)
if len(dt1.argument_types) != len(dt2.argument_types):
return False
if not all(cls.is_compatible(arg1_dt, arg2_dt) for (arg1_dt, arg2_dt) in zip(dt1.argument_types, dt2.argument_types)):
return False
if dt1.minimum_arguments != _DATA_TYPE_UNDEFINED and dt2.minimum_arguments != _DATA_TYPE_UNDEFINED:
if dt1.minimum_arguments != dt2.minimum_arguments:
return False
return True
return dt1 == dt2
elif dt1.is_compound and dt2.is_compound:
if DataType.is_type(dt1, DataType.ARRAY) and DataType.is_type(dt2, DataType.ARRAY):
return cls.is_compatible(dt1.value_type, dt2.value_type)
elif DataType.is_type(dt1, DataType.MAPPING) and DataType.is_type(dt2, DataType.MAPPING):
if not cls.is_compatible(dt1.key_type, dt2.key_type):
return False
if not cls.is_compatible(dt1.value_type, dt2.value_type):
return False
return True
elif DataType.is_type(dt1, DataType.SET) and DataType.is_type(dt2, DataType.SET):
return cls.is_compatible(dt1.value_type, dt2.value_type)
elif DataType.is_type(dt1, DataType.OBJECT) and DataType.is_type(dt2, DataType.OBJECT):
# bare DataType.OBJECT acts as a wildcard, mirroring how an untyped ARRAY (value_type UNDEFINED)
# matches any typed ARRAY via its value_type compatibility check
if dt1 is DataType.OBJECT or dt2 is DataType.OBJECT:
return True
return dt1.name == dt2.name
return False
[docs]
@classmethod
def is_definition(cls, value: Any) -> bool:
"""
Check if *value* is a data type definition.
.. versionadded:: 2.1.0
:param value: The value to check.
:return: ``True`` if *value* is a data type definition.
:rtype: bool
"""
return isinstance(value, _DataTypeDef)
@classmethod
@overload
def is_type(cls, dt: _DataTypeDef, kind: _ArrayDataTypeDef) -> TypeGuard[_ArrayDataTypeDef]: ...
@classmethod
@overload
def is_type(cls, dt: _DataTypeDef, kind: _FunctionDataTypeDef) -> TypeGuard[_FunctionDataTypeDef]: ...
@classmethod
@overload
def is_type(cls, dt: _DataTypeDef, kind: _MappingDataTypeDef) -> TypeGuard[_MappingDataTypeDef]: ...
@classmethod
@overload
def is_type(cls, dt: _DataTypeDef, kind: _NullableDataTypeDef) -> TypeGuard[_NullableDataTypeDef]: ...
@classmethod
@overload
def is_type(cls, dt: _DataTypeDef, kind: _ObjectDataTypeDef) -> TypeGuard[_ObjectDataTypeDef]: ...
@classmethod
@overload
def is_type(cls, dt: _DataTypeDef, kind: _SetDataTypeDef) -> TypeGuard[_SetDataTypeDef]: ...
@classmethod
@overload
def is_type(cls, dt: _DataTypeDef, kind: _DataTypeDef) -> bool: ...
[docs]
@classmethod
def is_type(cls, dt: _DataTypeDef, kind: _DataTypeDef) -> bool:
"""
Check if *dt* is the same base type as *kind*, ignoring member types for compound types. This is the
preferred replacement for ``isinstance(dt, DataType.TYPE.__class__)``.
For scalar types (e.g. :py:attr:`.STRING`, :py:attr:`.BOOLEAN`) *kind* is a singleton so this is
equivalent to an equality check. For compound types (e.g. :py:attr:`.ARRAY`, :py:attr:`.MAPPING`,
:py:attr:`.OBJECT`) it checks the type family without inspecting member / key / value types — use
:py:meth:`is_compatible` when those matter.
.. versionadded:: 5.0.0
:param dt: The data type to test.
:param kind: The base type to test against (e.g. ``DataType.ARRAY``).
:return: ``True`` if *dt* belongs to the same type family as *kind*.
:rtype: bool
"""
if type(kind) is _DataTypeDef:
return dt is kind
return isinstance(dt, type(kind))