"""Sparse DIAgonal format""" from __future__ import division, print_function, absolute_import __docformat__ = "restructuredtext en" __all__ = ['dia_matrix', 'isspmatrix_dia'] import numpy as np from .base import isspmatrix, _formats, spmatrix from .data import _data_matrix from .sputils import (isshape, upcast_char, getdtype, get_index_dtype, get_sum_dtype, validateaxis) from ._sparsetools import dia_matvec class dia_matrix(_data_matrix): """Sparse matrix with DIAgonal storage This can be instantiated in several ways: dia_matrix(D) with a dense matrix dia_matrix(S) with another sparse matrix S (equivalent to S.todia()) dia_matrix((M, N), [dtype]) to construct an empty matrix with shape (M, N), dtype is optional, defaulting to dtype='d'. dia_matrix((data, offsets), shape=(M, N)) where the ``data[k,:]`` stores the diagonal entries for diagonal ``offsets[k]`` (See example below) Attributes ---------- dtype : dtype Data type of the matrix shape : 2-tuple Shape of the matrix ndim : int Number of dimensions (this is always 2) nnz Number of nonzero elements data DIA format data array of the matrix offsets DIA format offset array of the matrix Notes ----- Sparse matrices can be used in arithmetic operations: they support addition, subtraction, multiplication, division, and matrix power. Examples -------- >>> import numpy as np >>> from scipy.sparse import dia_matrix >>> dia_matrix((3, 4), dtype=np.int8).toarray() array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], dtype=int8) >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0) >>> offsets = np.array([0, -1, 2]) >>> dia_matrix((data, offsets), shape=(4, 4)).toarray() array([[1, 0, 3, 0], [1, 2, 0, 4], [0, 2, 3, 0], [0, 0, 3, 4]]) """ format = 'dia' def __init__(self, arg1, shape=None, dtype=None, copy=False): _data_matrix.__init__(self) if isspmatrix_dia(arg1): if copy: arg1 = arg1.copy() self.data = arg1.data self.offsets = arg1.offsets self.shape = arg1.shape elif isspmatrix(arg1): if isspmatrix_dia(arg1) and copy: A = arg1.copy() else: A = arg1.todia() self.data = A.data self.offsets = A.offsets self.shape = A.shape elif isinstance(arg1, tuple): if isshape(arg1): # It's a tuple of matrix dimensions (M, N) # create empty matrix self.shape = arg1 # spmatrix checks for errors here self.data = np.zeros((0,0), getdtype(dtype, default=float)) idx_dtype = get_index_dtype(maxval=max(self.shape)) self.offsets = np.zeros((0), dtype=idx_dtype) else: try: # Try interpreting it as (data, offsets) data, offsets = arg1 except: raise ValueError('unrecognized form for dia_matrix constructor') else: if shape is None: raise ValueError('expected a shape argument') self.data = np.atleast_2d(np.array(arg1[0], dtype=dtype, copy=copy)) self.offsets = np.atleast_1d(np.array(arg1[1], dtype=get_index_dtype(maxval=max(shape)), copy=copy)) self.shape = shape else: #must be dense, convert to COO first, then to DIA try: arg1 = np.asarray(arg1) except: raise ValueError("unrecognized form for" " %s_matrix constructor" % self.format) from .coo import coo_matrix A = coo_matrix(arg1, dtype=dtype, shape=shape).todia() self.data = A.data self.offsets = A.offsets self.shape = A.shape if dtype is not None: self.data = self.data.astype(dtype) #check format if self.offsets.ndim != 1: raise ValueError('offsets array must have rank 1') if self.data.ndim != 2: raise ValueError('data array must have rank 2') if self.data.shape[0] != len(self.offsets): raise ValueError('number of diagonals (%d) ' 'does not match the number of offsets (%d)' % (self.data.shape[0], len(self.offsets))) if len(np.unique(self.offsets)) != len(self.offsets): raise ValueError('offset array contains duplicate values') def __repr__(self): format = _formats[self.getformat()][1] return "<%dx%d sparse matrix of type '%s'\n" \ "\twith %d stored elements (%d diagonals) in %s format>" % \ (self.shape + (self.dtype.type, self.nnz, self.data.shape[0], format)) def _data_mask(self): """Returns a mask of the same shape as self.data, where mask[i,j] is True when data[i,j] corresponds to a stored element.""" num_rows, num_cols = self.shape offset_inds = np.arange(self.data.shape[1]) row = offset_inds - self.offsets[:,None] mask = (row >= 0) mask &= (row < num_rows) mask &= (offset_inds < num_cols) return mask def count_nonzero(self): mask = self._data_mask() return np.count_nonzero(self.data[mask]) def getnnz(self, axis=None): if axis is not None: raise NotImplementedError("getnnz over an axis is not implemented " "for DIA format") M,N = self.shape nnz = 0 for k in self.offsets: if k > 0: nnz += min(M,N-k) else: nnz += min(M+k,N) return int(nnz) getnnz.__doc__ = spmatrix.getnnz.__doc__ count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__ def sum(self, axis=None, dtype=None, out=None): validateaxis(axis) if axis is not None and axis < 0: axis += 2 res_dtype = get_sum_dtype(self.dtype) num_rows, num_cols = self.shape ret = None if axis == 0: mask = self._data_mask() x = (self.data * mask).sum(axis=0) if x.shape[0] == num_cols: res = x else: res = np.zeros(num_cols, dtype=x.dtype) res[:x.shape[0]] = x ret = np.matrix(res, dtype=res_dtype) else: row_sums = np.zeros(num_rows, dtype=res_dtype) one = np.ones(num_cols, dtype=res_dtype) dia_matvec(num_rows, num_cols, len(self.offsets), self.data.shape[1], self.offsets, self.data, one, row_sums) row_sums = np.matrix(row_sums) if axis is None: return row_sums.sum(dtype=dtype, out=out) if axis is not None: row_sums = row_sums.T ret = np.matrix(row_sums.sum(axis=axis)) if out is not None and out.shape != ret.shape: raise ValueError("dimensions do not match") return ret.sum(axis=(), dtype=dtype, out=out) sum.__doc__ = spmatrix.sum.__doc__ def _mul_vector(self, other): x = other y = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char, x.dtype.char)) L = self.data.shape[1] M,N = self.shape dia_matvec(M,N, len(self.offsets), L, self.offsets, self.data, x.ravel(), y.ravel()) return y def _mul_multimatrix(self, other): return np.hstack([self._mul_vector(col).reshape(-1,1) for col in other.T]) def _setdiag(self, values, k=0): M, N = self.shape if values.ndim == 0: # broadcast values_n = np.inf else: values_n = len(values) if k < 0: n = min(M + k, N, values_n) min_index = 0 max_index = n else: n = min(M, N - k, values_n) min_index = k max_index = k + n if values.ndim != 0: # allow also longer sequences values = values[:n] if k in self.offsets: self.data[self.offsets == k, min_index:max_index] = values else: self.offsets = np.append(self.offsets, self.offsets.dtype.type(k)) m = max(max_index, self.data.shape[1]) data = np.zeros((self.data.shape[0]+1, m), dtype=self.data.dtype) data[:-1,:self.data.shape[1]] = self.data data[-1, min_index:max_index] = values self.data = data def todia(self, copy=False): if copy: return self.copy() else: return self todia.__doc__ = spmatrix.todia.__doc__ def transpose(self, axes=None, copy=False): if axes is not None: raise ValueError(("Sparse matrices do not support " "an 'axes' parameter because swapping " "dimensions is the only logical permutation.")) num_rows, num_cols = self.shape max_dim = max(self.shape) # flip diagonal offsets offsets = -self.offsets # re-align the data matrix r = np.arange(len(offsets), dtype=np.intc)[:, None] c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None] pad_amount = max(0, max_dim-self.data.shape[1]) data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount), dtype=self.data.dtype))) data = data[r, c] return dia_matrix((data, offsets), shape=( num_cols, num_rows), copy=copy) transpose.__doc__ = spmatrix.transpose.__doc__ def diagonal(self): idx, = np.where(self.offsets == 0) n = min(self.shape) if idx.size == 0: return np.zeros(n, dtype=self.data.dtype) return self.data[idx[0],:n] diagonal.__doc__ = spmatrix.diagonal.__doc__ def tocsc(self, copy=False): from .csc import csc_matrix if self.nnz == 0: return csc_matrix(self.shape, dtype=self.dtype) num_rows, num_cols = self.shape num_offsets, offset_len = self.data.shape offset_inds = np.arange(offset_len) row = offset_inds - self.offsets[:,None] mask = (row >= 0) mask &= (row < num_rows) mask &= (offset_inds < num_cols) mask &= (self.data != 0) idx_dtype = get_index_dtype(maxval=max(self.shape)) indptr = np.zeros(num_cols + 1, dtype=idx_dtype) indptr[1:offset_len+1] = np.cumsum(mask.sum(axis=0)) indptr[offset_len+1:] = indptr[offset_len] indices = row.T[mask.T].astype(idx_dtype, copy=False) data = self.data.T[mask.T] return csc_matrix((data, indices, indptr), shape=self.shape, dtype=self.dtype) tocsc.__doc__ = spmatrix.tocsc.__doc__ def tocoo(self, copy=False): num_rows, num_cols = self.shape num_offsets, offset_len = self.data.shape offset_inds = np.arange(offset_len) row = offset_inds - self.offsets[:,None] mask = (row >= 0) mask &= (row < num_rows) mask &= (offset_inds < num_cols) mask &= (self.data != 0) row = row[mask] col = np.tile(offset_inds, num_offsets)[mask.ravel()] data = self.data[mask] from .coo import coo_matrix A = coo_matrix((data,(row,col)), shape=self.shape, dtype=self.dtype) A.has_canonical_format = True return A tocoo.__doc__ = spmatrix.tocoo.__doc__ # needed by _data_matrix def _with_data(self, data, copy=True): """Returns a matrix with the same sparsity structure as self, but with different data. By default the structure arrays are copied. """ if copy: return dia_matrix((data, self.offsets.copy()), shape=self.shape) else: return dia_matrix((data,self.offsets), shape=self.shape) def isspmatrix_dia(x): return isinstance(x, dia_matrix)