Source code for tensorcircuit.backends.tensorflow_backend

Backend magic inherited from tensornetwork: tensorflow backend
# pylint: disable=invalid-name

import os
import re
from functools import reduce, partial
from operator import mul
from typing import Any, Callable, Optional, Sequence, Tuple, Union

from scipy.sparse import coo_matrix
import tensornetwork
from tensornetwork.backends.tensorflow import tensorflow_backend
from .abstract_backend import ExtendedBackend

dtypestr: str
Tensor = Any
RGenerator = Any  # tf.random.Generator
pytree = Any

tf: Any

[docs]class keras_optimizer:
[docs] def __init__(self, optimizer: Any) -> None: self.optimizer = optimizer self.is_initialized = False
# def _apply_gradients(self, grads: Tensor, params: Tensor) -> None: # self.optimizer.apply_gradients([(grads, params)])
[docs] def update(self, grads: pytree, params: pytree) -> pytree: # if not self.is_initialized: # l, treedef = TensorFlowBackend.tree_flatten(None, params) # # # # still breaks tf2.11 # ol = [deepcopy(self.optimizer) for _ in l] # self.optimizer = TensorFlowBackend.tree_unflatten(None, treedef, ol) # self.is_initialized = True # params = TensorFlowBackend.tree_map(None, self._c2v, params) # don't do the () initialization since cache is in upper level of backend_factory grads_l, _ = TensorFlowBackend.tree_flatten(None, grads) params_l, params_def = TensorFlowBackend.tree_flatten(None, params) if not self.is_initialized: self.params_v = [] self.is_variable = [] for p in params_l: if not isinstance(p, tf.Variable): self.params_v.append(tf.Variable(p)) self.is_variable.append(False) else: self.params_v.append(p) self.is_variable.append(True) self.is_initialized = True else: for i, p in enumerate(params_l): if not isinstance(p, tf.Variable): self.params_v[i] = self.params_v[i].assign(p) else: self.params_v[i] = self.params_v[i].assign(p.value()) self.optimizer.apply_gradients(zip(grads_l, self.params_v)) nparams_l = [] for p, flag in zip(self.params_v, self.is_variable): if flag is True: nparams_l.append(p) else: nparams_l.append(p.value()) params = TensorFlowBackend.tree_unflatten(None, params_def, nparams_l) return params
def _tensordot_tf( self: Any, a: Tensor, b: Tensor, axes: Union[int, Sequence[Sequence[int]]] ) -> Tensor: return tf.tensordot(a, b, axes) def _outer_product_tf(self: Any, tensor1: Tensor, tensor2: Tensor) -> Tensor: return tf.tensordot(tensor1, tensor2, 0) def _matmul_tf(self: Any, tensor1: Tensor, tensor2: Tensor) -> Tensor: if (len(tensor1.shape) <= 1) or (len(tensor2.shape) <= 1): raise ValueError("inputs to `matmul` have to be a tensors of order > 1,") return tf.matmul(tensor1, tensor2) def _random_choice_tf( g: RGenerator, a: Union[int, Sequence[int], Tensor], shape: Union[int, Sequence[int]], p: Optional[Union[Sequence[float], Tensor]] = None, ) -> Tensor: # only replace=True support, replace=False is not implemented # for stateless random module, tf has corresponding categorical function similar to choice # however, such utility is not implemented with ``tf.random.Generator`` # part of the code below is inspired by corresponding implementation in jax (Apache 2.0) if isinstance(a, int): assert a > 0 a = tf.range(a) if not (isinstance(a, tf.Tensor) or isinstance(a, tf.Variable)): a = tf.constant(a) assert len(a.shape) == 1 if isinstance(shape, int): shape = (shape,) if p is None: dtype = tf.float32 p = tf.ones_like(a) p = tf.cast(p, dtype=dtype) p /= tf.reduce_sum(p) else: if not (isinstance(p, tf.Tensor) or isinstance(p, tf.Variable)): p = tf.constant(p) dtype = p.dtype shape1 = reduce(mul, shape) p_cuml = tf.cumsum(p) r = p_cuml[-1] * (1 - g.uniform([shape1], dtype=dtype)) ind = tf.searchsorted(p_cuml, r) res = tf.gather(a, ind) return tf.reshape(res, shape) def _qr_tf( self: Any, tensor: Tensor, pivot_axis: int = -1, non_negative_diagonal: bool = False, ) -> Tuple[Tensor, Tensor]: """ Computes the QR decomposition of a tensor. The QR decomposition is performed by treating the tensor as a matrix, with an effective left (row) index resulting from combining the axes `tensor.shape[:pivot_axis]` and an effective right (column) index resulting from combining the axes `tensor.shape[pivot_axis:]`. :Example: If `tensor` had a shape (2, 3, 4, 5) and `pivot_axis` was 2, then `q` would have shape (2, 3, 6), and `r` would have shape (6, 4, 5). The output consists of two tensors `Q, R` such that: Q[i1,...,iN, j] * R[j, k1,...,kM] == tensor[i1,...,iN, k1,...,kM] Note that the output ordering matches numpy.linalg.svd rather than tf.svd. :param tensor: A tensor to be decomposed. :type tensor: Tensor :param pivot_axis: Where to split the tensor's axes before flattening into a matrix. :type pivot_axis: int, optional :param non_negative_diagonal: a bool indicating whether the tenor is diagonal non-negative matrix. :type non_negative_diagonal: bool, optional :returns: Q, the left tensor factor, and R, the right tensor factor. :rtype: Tuple[Tensor, Tensor] """ from .tf_ops import tfqr left_dims = tf.shape(tensor)[:pivot_axis] right_dims = tf.shape(tensor)[pivot_axis:] tensor = tf.reshape(tensor, [tf.reduce_prod(left_dims), tf.reduce_prod(right_dims)]) q, r = tfqr(tensor) if non_negative_diagonal: phases = tf.math.sign(tf.linalg.diag_part(r)) q = q * phases r = phases[:, None] * r center_dim = tf.shape(q)[1] q = tf.reshape(q, tf.concat([left_dims, [center_dim]], axis=-1)) r = tf.reshape(r, tf.concat([[center_dim], right_dims], axis=-1)) return q, r def _rq_tf( self: Any, tensor: Tensor, pivot_axis: int = 1, non_negative_diagonal: bool = False, ) -> Tuple[Tensor, Tensor]: """ Computes the RQ decomposition of a tensor. The QR decomposition is performed by treating the tensor as a matrix, with an effective left (row) index resulting from combining the axes `tensor.shape[:pivot_axis]` and an effective right (column) index resulting from combining the axes `tensor.shape[pivot_axis:]`. :Example: If `tensor` had a shape (2, 3, 4, 5) and `pivot_axis` was 2, then `r` would have shape (2, 3, 6), and `q` would have shape (6, 4, 5). The output consists of two tensors `Q, R` such that: Q[i1,...,iN, j] * R[j, k1,...,kM] == tensor[i1,...,iN, k1,...,kM] Note that the output ordering matches numpy.linalg.svd rather than tf.svd. :param tensor: A tensor to be decomposed. :type tensor: Tensor :param pivot_axis: Where to split the tensor's axes before flattening into a matrix. :type pivot_axis: int, optional :param non_negative_diagonal: a bool indicating whether the tenor is diagonal non-negative matrix. :type non_negative_diagonal: bool, optional :returns: Q, the left tensor factor, and R, the right tensor factor. :rtype: Tuple[Tensor, Tensor] """ from .tf_ops import tfqr left_dims = tf.shape(tensor)[:pivot_axis] right_dims = tf.shape(tensor)[pivot_axis:] tensor = tf.reshape(tensor, [tf.reduce_prod(left_dims), tf.reduce_prod(right_dims)]) q, r = tfqr(tf.math.conj(tf.transpose(tensor))) if non_negative_diagonal: phases = tf.math.sign(tf.linalg.diag_part(r)) q = q * phases r = phases[:, None] * r r, q = ( tf.math.conj(tf.transpose(r)), tf.math.conj(tf.transpose(q)), ) # M=r*q at this point center_dim = tf.shape(r)[1] r = tf.reshape(r, tf.concat([left_dims, [center_dim]], axis=-1)) q = tf.reshape(q, tf.concat([[center_dim], right_dims], axis=-1)) return r, q def _svd_tf( self: Any, tensor: Tensor, pivot_axis: int = -1, max_singular_values: Optional[int] = None, max_truncation_error: Optional[float] = None, relative: Optional[bool] = False, ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: """Computes the singular value decomposition (SVD) of a tensor. The SVD is performed by treating the tensor as a matrix, with an effective left (row) index resulting from combining the axes `tensor.shape[:pivot_axis]` and an effective right (column) index resulting from combining the axes `tensor.shape[pivot_axis:]`. For example, if `tensor` had a shape (2, 3, 4, 5) and `pivot_axis` was 2, then `u` would have shape (2, 3, 6), `s` would have shape (6), and `vh` would have shape (6, 4, 5). If `max_singular_values` is set to an integer, the SVD is truncated to keep at most this many singular values. If `max_truncation_error > 0`, as many singular values will be truncated as possible, so that the truncation error (the norm of discarded singular values) is at most `max_truncation_error`. If `relative` is set `True` then `max_truncation_err` is understood relative to the largest singular value. If both `max_singular_values` snd `max_truncation_error` are specified, the number of retained singular values will be `min(max_singular_values, nsv_auto_trunc)`, where `nsv_auto_trunc` is the number of singular values that must be kept to maintain a truncation error smaller than `max_truncation_error`. The output consists of three tensors `u, s, vh` such that: ```python u[i1,...,iN, j] * s[j] * vh[j, k1,...,kM] == tensor[i1,...,iN, k1,...,kM] ``` Note that the output ordering matches numpy.linalg.svd rather than tf.svd. Args: tf: The tensorflow module. tensor: A tensor to be decomposed. pivot_axis: Where to split the tensor's axes before flattening into a matrix. max_singular_values: The number of singular values to keep, or `None` to keep them all. max_truncation_error: The maximum allowed truncation error or `None` to not do any truncation. relative: Multiply `max_truncation_err` with the largest singular value. Returns: u: Left tensor factor. s: Vector of ordered singular values from largest to smallest. vh: Right tensor factor. s_rest: Vector of discarded singular values (length zero if no truncation). """ left_dims = tf.shape(tensor)[:pivot_axis] right_dims = tf.shape(tensor)[pivot_axis:] tensor = tf.reshape(tensor, [tf.reduce_prod(left_dims), tf.reduce_prod(right_dims)]) eps = os.environ.get("TC_BACKENDS_TENSORFLOW_BACKEND__SVD_TF_EPS") if eps is not None: eps = 10 ** (-int(eps)) tensor += eps * tf.ones(tensor.shape, dtype=tensor.dtype) # for numerical stability at least in tf+cpu s, u, v = tf.linalg.svd(tensor) if max_singular_values is None: max_singular_values = tf.size(s, out_type=tf.int64) else: max_singular_values = tf.constant(max_singular_values, dtype=tf.int64) if max_truncation_error is not None: # Cumulative norms of singular values in ascending order. trunc_errs = tf.sqrt(tf.cumsum(tf.square(s), reverse=True)) # If relative is true, rescale max_truncation error with the largest # singular value to yield the absolute maximal truncation error. if relative: abs_max_truncation_error = max_truncation_error * s[0] else: abs_max_truncation_error = max_truncation_error # We must keep at least this many singular values to ensure the # truncation error is <= abs_max_truncation_error. num_sing_vals_err = tf.math.count_nonzero( tf.cast(trunc_errs > abs_max_truncation_error, dtype=tf.int32) ) else: num_sing_vals_err = max_singular_values num_sing_vals_keep = tf.minimum(max_singular_values, num_sing_vals_err) # tf.svd() always returns the singular values as a vector of float{32,64}. # since tf.math_ops.real is automatically applied to s. This causes # s to possibly not be the same dtype as the original tensor, which can cause # issues for later contractions. To fix it, we recast to the original dtype. s = tf.cast(s, tensor.dtype) s_rest = s[num_sing_vals_keep:] s = s[:num_sing_vals_keep] u = u[:, :num_sing_vals_keep] v = v[:, :num_sing_vals_keep] vh = tf.linalg.adjoint(v) dim_s = tf.shape(s)[0] # must use tf.shape (not s.shape) to compile u = tf.reshape(u, tf.concat([left_dims, [dim_s]], axis=-1)) vh = tf.reshape(vh, tf.concat([[dim_s], right_dims], axis=-1)) return u, s, vh, s_rest # temporary hot replace until new version of tensorflow is released, # see issue: # avoid buggy tensordot2 in tensornetwork tensornetwork.backends.tensorflow.tensorflow_backend.TensorFlowBackend.tensordot = ( _tensordot_tf ) tensornetwork.backends.tensorflow.tensorflow_backend.TensorFlowBackend.outer_product = ( _outer_product_tf ) tensornetwork.backends.tensorflow.tensorflow_backend.TensorFlowBackend.matmul = ( _matmul_tf ) tensornetwork.backends.tensorflow.tensorflow_backend.TensorFlowBackend.qr = _qr_tf tensornetwork.backends.tensorflow.tensorflow_backend.TensorFlowBackend.rq = _rq_tf tensornetwork.backends.tensorflow.tensorflow_backend.TensorFlowBackend.svd = _svd_tf
[docs]class TensorFlowBackend(tensorflow_backend.TensorFlowBackend, ExtendedBackend): # type: ignore """ See the original backend API at `tensorflow backend <>`_ """
[docs] def __init__(self) -> None: global tf super(TensorFlowBackend, self).__init__() try: import tensorflow except ImportError: raise ImportError( "Tensorflow not installed, please switch to a " "different backend or install Tensorflow." ) tf = tensorflow tf.sparse.SparseTensor.__add__ = tf.sparse.add self.minor = int(tf.__version__.split(".")[1]) = "tensorflow" logger = tf.get_logger() # .setLevel('ERROR') logger.addFilter(lambda s: not re.match(".*You are casting.*", s.getMessage()))
# ignore casting warning by logger
[docs] def eye( self, N: int, dtype: Optional[str] = None, M: Optional[int] = None ) -> Tensor: if dtype is None: dtype = dtypestr r = tf.eye(num_rows=N, num_columns=M) return self.cast(r, dtype)
[docs] def ones(self, shape: Tuple[int, ...], dtype: Optional[str] = None) -> Tensor: if dtype is None: dtype = dtypestr r = tf.ones(shape=shape) return self.cast(r, dtype)
[docs] def zeros(self, shape: Tuple[int, ...], dtype: Optional[str] = None) -> Tensor: if dtype is None: dtype = dtypestr r = tf.zeros(shape=shape) return self.cast(r, dtype)
[docs] def copy(self, a: Tensor) -> Tensor: return tf.identity(a)
[docs] def expm(self, a: Tensor) -> Tensor: return tf.linalg.expm(a)
[docs] def sin(self, a: Tensor) -> Tensor: return tf.math.sin(a)
[docs] def cos(self, a: Tensor) -> Tensor: return tf.math.cos(a)
[docs] def acos(self, a: Tensor) -> Tensor: return tf.math.acos(a)
[docs] def acosh(self, a: Tensor) -> Tensor: return tf.math.acosh(a)
[docs] def asin(self, a: Tensor) -> Tensor: return tf.math.asin(a)
[docs] def asinh(self, a: Tensor) -> Tensor: return tf.math.asinh(a)
[docs] def atan(self, a: Tensor) -> Tensor: return tf.math.atan(a)
[docs] def atan2(self, y: Tensor, x: Tensor) -> Tensor: return tf.math.atan2(y, x)
[docs] def atanh(self, a: Tensor) -> Tensor: return tf.math.atanh(a)
[docs] def cosh(self, a: Tensor) -> Tensor: return tf.math.cosh(a)
[docs] def tan(self, a: Tensor) -> Tensor: return tf.math.tan(a)
[docs] def tanh(self, a: Tensor) -> Tensor: return tf.math.tanh(a)
[docs] def sinh(self, a: Tensor) -> Tensor: return tf.math.sinh(a)
[docs] def size(self, a: Tensor) -> Tensor: return tf.size(a)
[docs] def eigvalsh(self, a: Tensor) -> Tensor: return tf.linalg.eigvalsh(a)
[docs] def dtype(self, a: Tensor) -> str: return a.dtype.__repr__().split(".")[-1] # type: ignore
[docs] def kron(self, a: Tensor, b: Tensor) -> Tensor: # array more than 2d consistency is not guranteed for different backends return tf.reshape( tf.reshape(a, [a.shape[0], 1, a.shape[1], 1]) * tf.reshape(b, [1, b.shape[0], 1, b.shape[1]]), [a.shape[0] * b.shape[0], a.shape[1] * b.shape[1]], )
[docs] def numpy(self, a: Tensor) -> Tensor: if self.is_sparse(a): return coo_matrix( (a.values, (a.indices[:, 0], a.indices[:, 1])), shape=a.get_shape() ) return a.numpy() # only valid in eager mode
[docs] def i(self, dtype: Any = None) -> Tensor: if not dtype: dtype = getattr(tf, dtypestr) if isinstance(dtype, str): dtype = getattr(tf, dtype) return tf.constant(1j, dtype=dtype)
[docs] def det(self, a: Tensor) -> Tensor: return tf.linalg.det(a)
[docs] def min(self, a: Tensor, axis: Optional[int] = None) -> Tensor: return tf.reduce_min(a, axis=axis)
[docs] def max(self, a: Tensor, axis: Optional[int] = None) -> Tensor: return tf.reduce_max(a, axis=axis)
[docs] def argmax(self, a: Tensor, axis: int = 0) -> Tensor: return tf.math.argmax(a, axis=axis)
[docs] def argmin(self, a: Tensor, axis: int = 0) -> Tensor: return tf.math.argmin(a, axis=axis)
[docs] def unique_with_counts(self, a: Tensor, **kws: Any) -> Tuple[Tensor, Tensor]: r = tf.unique_with_counts(a) order = tf.argsort(r.y) return tf.gather(r.y, order), tf.gather(r.count, order)
[docs] def stack(self, a: Sequence[Tensor], axis: int = 0) -> Tensor: return tf.stack(a, axis=axis)
[docs] def concat(self, a: Sequence[Tensor], axis: int = 0) -> Tensor: return tf.concat(a, axis=axis)
[docs] def tile(self, a: Tensor, rep: Tensor) -> Tensor: return tf.tile(a, rep)
[docs] def mean( self, a: Tensor, axis: Optional[Sequence[int]] = None, keepdims: bool = False, ) -> Tensor: return tf.math.reduce_mean(a, axis=axis, keepdims=keepdims)
[docs] def std( self, a: Tensor, axis: Optional[Sequence[int]] = None, keepdims: bool = False ) -> Tensor: return tf.math.reduce_std(a, axis=axis, keepdims=keepdims)
[docs] def sigmoid(self, a: Tensor) -> Tensor: return tf.nn.sigmoid(a)
[docs] def relu(self, a: Tensor) -> Tensor: return tf.nn.relu(a)
[docs] def onehot(self, a: Tensor, num: int) -> Tensor: return tf.one_hot(a, num)
[docs] def softmax(self, a: Sequence[Tensor], axis: Optional[int] = None) -> Tensor: if axis is None: # make the default behavior consistent r = tf.keras.activations.softmax(tf.reshape(a, [1, -1]), axis=axis) return tf.reshape(r, a.shape) # type: ignore return tf.keras.activations.softmax(a, axis=axis)
[docs] def cumsum(self, a: Tensor, axis: Optional[int] = None) -> Tensor: if axis is None: a = tf.reshape(a, [-1]) return tf.cumsum(a) else: return tf.cumsum(a, axis)
[docs] def is_tensor(self, a: Any) -> bool: if isinstance(a, tf.Tensor) or isinstance(a, tf.Variable): return True return False
[docs] def abs(self, a: Tensor) -> Tensor: return tf.math.abs(a)
[docs] def real(self, a: Tensor) -> Tensor: return tf.math.real(a)
[docs] def imag(self, a: Tensor) -> Tensor: return tf.math.imag(a)
[docs] def cast(self, a: Tensor, dtype: str) -> Tensor: if isinstance(dtype, str): return tf.cast(a, dtype=getattr(tf, dtype)) return tf.cast(a, dtype=dtype)
[docs] def arange(self, start: int, stop: Optional[int] = None, step: int = 1) -> Tensor: if stop is None: return tf.range(start=0, limit=start, delta=step) return tf.range(start=start, limit=stop, delta=step)
[docs] def mod(self, x: Tensor, y: Tensor) -> Tensor: return tf.math.mod(x, y)
[docs] def right_shift(self, x: Tensor, y: Tensor) -> Tensor: return tf.bitwise.right_shift(x, y)
[docs] def left_shift(self, x: Tensor, y: Tensor) -> Tensor: return tf.bitwise.left_shift(x, y)
[docs] def solve(self, A: Tensor, b: Tensor, **kws: Any) -> Tensor: if b.shape[-1] == A.shape[-1]: b = b[..., tf.newaxis] vector = True else: vector = False x = tf.linalg.solve(A, b) if vector: return self.reshape(x, x.shape[:-1]) return x
[docs] def searchsorted(self, a: Tensor, v: Tensor, side: str = "left") -> Tensor: return tf.searchsorted(a, v, side)
[docs] def from_dlpack(self, a: Any) -> Tensor: return tf.experimental.dlpack.from_dlpack(a)
[docs] def to_dlpack(self, a: Tensor) -> Any: return tf.experimental.dlpack.to_dlpack(a)
# note complex tensor support for dlpack is only available for tf>=2.9
[docs] def set_random_state( self, seed: Optional[Union[int, RGenerator]] = None, get_only: bool = False ) -> Any: if seed is None: g = tf.random.Generator.from_non_deterministic_state() elif isinstance(seed, int): g = tf.random.Generator.from_seed(seed) else: g = seed if get_only is False: self.g = g return g
[docs] def stateful_randn( self, g: RGenerator, shape: Union[int, Sequence[int]] = 1, mean: float = 0, stddev: float = 1, dtype: str = "32", ) -> Tensor: if isinstance(dtype, str): dtype = dtype[-2:] if isinstance(shape, int): shape = (shape,) if dtype == "32": dtyper = tf.float32 elif dtype == "64": dtyper = tf.float64 elif not isinstance(dtype, str): dtyper = dtype return g.normal(shape, mean, stddev, dtype=dtyper)
[docs] def stateful_randu( self, g: RGenerator, shape: Union[int, Sequence[int]] = 1, low: float = 0, high: float = 1, dtype: str = "32", ) -> Tensor: if isinstance(dtype, str): dtype = dtype[-2:] if isinstance(shape, int): shape = (shape,) if dtype == "32": dtyper = tf.float32 elif dtype == "64": dtyper = tf.float64 elif not isinstance(dtype, str): dtyper = dtype return g.uniform(shape, minval=low, maxval=high, dtype=dtyper)
[docs] def stateful_randc( self, g: RGenerator, a: Union[int, Sequence[int], Tensor], shape: Union[int, Sequence[int]], p: Optional[Union[Sequence[float], Tensor]] = None, ) -> Tensor: return _random_choice_tf(g, a, shape, p)
[docs] def gather1d(self, operand: Tensor, indices: Tensor) -> Tensor: return tf.gather(operand, indices)
[docs] def scatter(self, operand: Tensor, indices: Tensor, updates: Tensor) -> Tensor: return tf.tensor_scatter_nd_update(operand, indices, updates)
[docs] def coo_sparse_matrix( self, indices: Tensor, values: Tensor, shape: Tensor ) -> Tensor: return tf.SparseTensor(indices=indices, values=values, dense_shape=shape)
[docs] def sparse_dense_matmul( self, sp_a: Tensor, b: Tensor, ) -> Tensor: return tf.sparse.sparse_dense_matmul(sp_a, b)
def _densify(self) -> Tensor: @partial(self.jit, jit_compile=True) def densify(sp_a: Tensor) -> Tensor: return tf.sparse.to_dense(sp_a) return densify
[docs] def to_dense(self, sp_a: Tensor) -> Tensor: return self._densify()(sp_a)
# very weirdly, at least for cpu, tf.sparse.to_dense only works within tf.function(jit_compile=True) # and will fail with tf.function or bare function # on the contrary, tf.sparse.sparse_dense_matmul only fails within tf.function(jit_compile=True) # and works well with tf.function or bare function...
[docs] def is_sparse(self, a: Tensor) -> bool: return isinstance(a, tf.SparseTensor)
[docs] def cond( self, pred: bool, true_fun: Callable[[], Tensor], false_fun: Callable[[], Tensor], ) -> Tensor: return tf.cond(pred, true_fun, false_fun)
[docs] def switch(self, index: Tensor, branches: Sequence[Callable[[], Tensor]]) -> Tensor: return tf.switch_case(index, branches)
[docs] def scan( self, f: Callable[[Tensor, Tensor], Tensor], xs: Tensor, init: Tensor ) -> Tensor: return tf.scan(f, xs, init)[-1]
[docs] def device(self, a: Tensor) -> str: dev = a.device return self._dev2str(dev)
[docs] def device_move(self, a: Tensor, dev: Any) -> Tensor: if isinstance(dev, str): dev = self._str2dev(dev) with tf.device(dev): a = tf.identity(a) return a
def _dev2str(self, dev: Any) -> str: platform, id_ = dev.split(":")[-2:] if platform == "CPU": return "cpu" if platform == "GPU": return "gpu" + ":" + str(id_) raise ValueError("TensorFlowBackend don't support non-GPU/CPU device") def _str2dev(self, str_: str) -> Any: return str_
[docs] def stop_gradient(self, a: Tensor) -> Tensor: return tf.stop_gradient(a)
[docs] def grad( self, f: Callable[..., Any], argnums: Union[int, Sequence[int]] = 0, has_aux: bool = False, ) -> Callable[..., Any]: # experimental attempt # Note: tensorflow grad is gradient while jax grad is derivative, they are different with a conjugate! # And we DONT make them consitent by mannually set conjugate of the returns. def wrapper(*args: Any, **kws: Any) -> Any: with tf.GradientTape() as t: if isinstance(argnums, int): x = args[argnums] else: args = tuple( [ tf.identity(arg) if i in argnums else arg for i, arg in enumerate(args) ] ) # in case wrong grad for f(x, x) x = [args[i] for i in argnums] y = f(*args, **kws) if has_aux: g = t.gradient(y[0], x) else: g = t.gradient(y, x) if has_aux: return (g, y[1:]) return g return wrapper
[docs] def value_and_grad( self, f: Callable[..., Any], argnums: Union[int, Sequence[int]] = 0, has_aux: bool = False, ) -> Callable[..., Tuple[Any, Any]]: def wrapper(*args: Any, **kws: Any) -> Any: with tf.GradientTape() as t: if isinstance(argnums, int): x = args[argnums] else: args = tuple( [ tf.identity(arg) if i in argnums else arg for i, arg in enumerate(args) ] ) x = [args[i] for i in argnums] y = f(*args, **kws) if has_aux: g = t.gradient(y[0], x) else: g = t.gradient(y, x) return y, g return wrapper
[docs] def jvp( self, f: Callable[..., Any], inputs: Union[Tensor, Sequence[Tensor]], v: Union[Tensor, Sequence[Tensor]], ) -> Tuple[Union[Tensor, Sequence[Tensor]], Union[Tensor, Sequence[Tensor]]]: if not (isinstance(inputs, list) or isinstance(inputs, tuple)): # one input tensor inputs = [inputs] elif isinstance(inputs, list): inputs = [tf.identity(inp) for inp in inputs] else: # inputs, tuple inputs = tuple([tf.identity(inp) for inp in inputs]) if not (isinstance(v, list) or isinstance(v, tuple)): v = [v] elif isinstance(v, list): v = [tf.identity(vi) for vi in v] else: v = tuple([tf.identity(vi) for vi in v]) with tf.autodiff.ForwardAccumulator(inputs, v) as t: y = f(*inputs) g = t.jvp(y) return y, g
[docs] def vjp( self, f: Callable[..., Any], inputs: Union[Tensor, Sequence[Tensor]], v: Union[Tensor, Sequence[Tensor]], ) -> Tuple[Union[Tensor, Sequence[Tensor]], Union[Tensor, Sequence[Tensor]]]: if not (isinstance(inputs, list) or isinstance(inputs, tuple)): # one input tensor one_input = True inputs = [inputs] elif isinstance(inputs, list): inputs = [tf.identity(inp) for inp in inputs] one_input = False else: # inputs tuple inputs = tuple([tf.identity(inp) for inp in inputs]) one_input = False with tf.GradientTape() as t: y = f(*inputs) g = t.gradient(y, inputs, v) g = list(g) for i, gi in enumerate(g): if gi is None: g[i] = tf.zeros_like(inputs[i]) if isinstance(gi, tf.IndexedSlices): # gradient can return sth weird # TODO(@refraction-ray): check whether other AD tf methods have such issues # shape is still unkown, dense_shape attr doesn't work? g[i] = tf.convert_to_tensor(gi) g = tuple(g) if one_input: g = g[0] return y, g
[docs] def jit( self, f: Callable[..., Any], static_argnums: Optional[Union[int, Sequence[int]]] = None, jit_compile: Optional[bool] = None, **kws: Any ) -> Any: # static_argnums not supported in tf case, this is only for a consistent interface # for more on static_argnums in tf.function, see issue: # tf.function works with dict pytree but fails at list pytree, hmm... # no full jittable pytree support in tf # another difference from jax.jit if self.minor < 5: return tf.function(f, experimental_compile=jit_compile) else: return tf.function(f, jit_compile=jit_compile)
# old vmap impl before I know pytrees support in tf # def vmap( # self, f: Callable[..., Any], vectorized_argnums: Union[int, Sequence[int]] = 0 # ) -> Any: # if isinstance(vectorized_argnums, int): # vectorized_argnums = (vectorized_argnums,) # if vectorized_argnums == (0,): # fast shortcut # def wrapper(*args: Any, **kws: Any) -> Tensor: # def pf(x: Tensor) -> Tensor: # return f(x, *args[1:], **kws) # return tf.vectorized_map(pf, args[0]) # else: # @self.jit # def wrapper(*args: Any, **kws: Any) -> Tensor: # shapes = [] # l = len(args) # seps = [0] # batch = args[vectorized_argnums[0]].shape[0] # type: ignore # nargs = [] # for i, arg in enumerate(args): # if i in vectorized_argnums: # type: ignore # shapes.append(arg.shape[1:]) # assert ( # arg.shape[0] == batch # ), "different tensors has different batch dimensions!" # arg = tf.reshape(arg, [batch, -1]) # nargs.append(arg) # seps.append(seps[-1] + arg.shape[-1]) # sargs = tf.concat(nargs, 1) # def sf(sarg: Any) -> Any: # vargs = [] # for i in range(len(shapes)): # arg = sarg[seps[i] : seps[i + 1]] # arg = tf.reshape(arg, shapes[i]) # vargs.append(arg) # vvargs = [] # j = 0 # for i in range(l): # if i in vectorized_argnums: # type: ignore # vvargs.append(vargs[j]) # j += 1 # else: # vvargs.append(args[i]) # return f(*vvargs, **kws) # return tf.vectorized_map(sf, sargs) # return wrapper # def wrapper(f: Callable[..., Any], args: Sequence[Any]) -> Any: # return f(*args) # wrapper = partial(wrapper, f) # def own_vectorized_map(f: Callable[..., Any], *args: Any) -> Any: # return tf.vectorized_map(f, args) # return partial(own_vectorized_map, wrapper)
[docs] def vmap( self, f: Callable[..., Any], vectorized_argnums: Union[int, Sequence[int]] = 0 ) -> Any: if isinstance(vectorized_argnums, int): vectorized_argnums = (vectorized_argnums,) if vectorized_argnums == (0,): # fast shortcut def wrapper(*args: Any, **kws: Any) -> Tensor: def pf(x: Tensor) -> Tensor: return f(x, *args[1:], **kws) return tf.vectorized_map(pf, args[0]) else: # @self.jit # otherwise, vectorized_map claim on retracing def wrapper(*args: Any, **kws: Any) -> Tensor: # @self.jit def sf(sarg: Any) -> Any: vvargs = [] j = 0 for i in range(len(args)): if i in vectorized_argnums: # type: ignore vvargs.append(sarg[j]) j += 1 else: vvargs.append(args[i]) return f(*vvargs, **kws) sarg = [] for i in vectorized_argnums: # type: ignore sarg.append(args[i]) return tf.vectorized_map(sf, sarg) return wrapper
[docs] def vectorized_value_and_grad( self, f: Callable[..., Any], argnums: Union[int, Sequence[int]] = 0, vectorized_argnums: Union[int, Sequence[int]] = 0, has_aux: bool = False, ) -> Callable[..., Tuple[Any, Any]]: # note how tf only works in this order, due to the bug reported as: # vf = self.vmap(f, vectorized_argnums=vectorized_argnums) def wrapper( *args: Any, **kws: Any ) -> Tuple[Tensor, Union[Tensor, Tuple[Tensor, ...]]]: with tf.GradientTape() as tape: if isinstance(argnums, int): x = args[argnums] else: x = [args[i] for i in argnums] vs = vf(*args, **kws) if has_aux: grad = tape.gradient(vs[0], x) else: grad = tape.gradient(vs, x) return vs, grad return wrapper
# f = self.vmap(f) # f = self.value_and_grad(f, argnums=argnums) # f = self.jit(f) # return f vvag = vectorized_value_and_grad optimizer = keras_optimizer