def file_to_subset_setup(request):
    ids = [2, 4, 6]
    flows = [3.1, -9999.0, 5.0]
    date = '2017-04-29_00:00:00'
    flows = ma.masked_array(flows, mask=[0, 1, 0])  # explicit mask
    with Dataset(_file_to_subset, 'w') as nc:
        nc.model_output_valid_time = date
        dim = nc.createDimension('feature_id', 3)
        id_var = nc.createVariable('feature_id', 'i', ('feature_id',))
        id_var[:] = ids
        flow_var = nc.createVariable('streamflow', 'f', ('feature_id',),
        flow_var[:] = flows
        extra_var = nc.createVariable('extra_var', 'i', ('feature_id',))
        extra_var[:] = [1, 2, 3]
    def file_to_subset_teardown():
def files_to_cube_setup(request):
    date_template = '2017-04-29_0{0}:00:00'
    for i, nc_file in enumerate(_files_to_cube):
        date = date_template.format(i)
        flows = [flow * (i + 1) for flow in _flows_template]
        if i == 1:
            flows[1] = -9999.0  # one way of masking data
        elif i == 2:
            flows = ma.masked_array(flows, mask=[0, 1, 0])  # explicit mask
        with Dataset(nc_file, 'w') as nc:
            nc.model_output_valid_time = date
            dim = nc.createDimension('feature_id', 3)
            id_var = nc.createVariable('feature_id', 'i', ('feature_id',))
            id_var[:] = _ids
            flow_var = nc.createVariable('streamflow', 'f', ('feature_id',),
            flow_var[:] = flows
    def files_to_cube_teardown():
        for nc_file in _files_to_cube:
def file_to_read_streamflow_setup(request):
    ids = [2, 4, 6]
    flows = [1.3, -9999.0, 5.1]
    date = '2017-04-29_04:00:00'
    flows = ma.masked_array(flows, mask=[0, 1, 0])  # explicit mask
    with Dataset(_file_to_read_streamflow, 'w') as nc:
        nc.model_output_valid_time = date
        dim = nc.createDimension('feature_id', 3)
        id_var = nc.createVariable('feature_id', 'i', ('feature_id',))
        id_var[:] = ids
        flow_var = nc.createVariable('streamflow', 'f', ('feature_id',),
        flow_var[:] = flows
    def file_to_read_streamflow_teardown():
def resample(self):
        al, o = np.log(self.alpha_0), self.obs_distn
        self.z = ma.masked_array(self.z,mask=np.zeros(self.z.shape))
        model = self.model

        for n in np.random.permutation([0]):
            # mask out n
            self.z.mask[n] = True

            # form the scores and sample them
            ks = list(model._get_occupied())
            scores = np.array([
                np.log(model._get_counts(k))+ o.log_predictive([n],model._get_data_withlabel(k)) \
                        for k in ks] + [al + o.log_marginal_likelihood([n])])

            idx = sample_discrete_from_log(scores)
            if idx == scores.shape[0]-1:
                self.z[n] = self._new_label(ks)
                self.z[n] = ks[idx]

            # sample
            # note: the mask gets fixed by assigning into the array
            self.z[n] = sample_discrete_from_log(np.array(scores))
def test_record_array_with_object_field():
    # Trac #1839
    y = ma.masked_array(
        [(1, '2'), (3, '4')],
        mask=[(0, 0), (0, 1)],
        dtype=[('a', int), ('b', np.object)])
    # getting an item used to fail
def test_record_array_with_object_field():
    # Trac #1839
    y = ma.masked_array(
        [(1, '2'), (3, '4')],
        mask=[(0, 0), (0, 1)],
        dtype=[('a', int), ('b', np.object)])
    # getting an item used to fail
def maskoceans(lonsin,latsin,datain,inlands=True,resolution='l',grid=5):
    mask data (``datain``), defined on a grid with latitudes ``latsin``
    longitudes ``lonsin`` so that points over water will not be plotted.

    .. tabularcolumns:: |l|L|

    ==============   ====================================================
    Arguments        Description
    ==============   ====================================================
    lonsin, latsin   rank-2 arrays containing longitudes and latitudes of
    datain           rank-2 input array on grid defined by ``lonsin`` and
    inlands          if False, masked only ocean points and not inland
                     lakes (Default True).
    resolution       gshhs coastline resolution used to define land/sea
                     mask (default 'l', available 'c','l','i','h' or 'f')
    grid             land/sea mask grid spacing in minutes (Default 5;
                     10, 2.5 and 1.25 are also available).
    ==============   ====================================================

    returns a masked array the same shape as datain with "wet" points masked.
    # read in land/sea mask.
    lsmask_lons, lsmask_lats, lsmask =\
    # nearest-neighbor interpolation to output grid.
    lsmasko = interp(lsmask,lsmask_lons,lsmask_lats,lonsin,latsin,masked=True,order=0)
    # mask input data.
    mask = lsmasko == 0
    return ma.masked_array(datain,mask=mask)
def maskoceans(lonsin,latsin,datain,inlands=True,resolution='l',grid=5):
    mask data (``datain``), defined on a grid with latitudes ``latsin``
    longitudes ``lonsin`` so that points over water will not be plotted.

    .. tabularcolumns:: |l|L|

    ==============   ====================================================
    Arguments        Description
    ==============   ====================================================
    lonsin, latsin   rank-2 arrays containing longitudes and latitudes of
    datain           rank-2 input array on grid defined by ``lonsin`` and
    inlands          if False, masked only ocean points and not inland
                     lakes (Default True).
    resolution       gshhs coastline resolution used to define land/sea
                     mask (default 'l', available 'c','l','i','h' or 'f')
    grid             land/sea mask grid spacing in minutes (Default 5;
                     10, 2.5 and 1.25 are also available).
    ==============   ====================================================

    returns a masked array the same shape as datain with "wet" points masked.
    # read in land/sea mask.
    lsmask_lons, lsmask_lats, lsmask =\
    # nearest-neighbor interpolation to output grid.
    lsmasko = interp(lsmask,lsmask_lons,lsmask_lats,lonsin,latsin,masked=True,order=0)
    # mask input data.
    mask = lsmasko == 0
    return ma.masked_array(datain,mask=mask)
def test_record_array_with_object_field():
    # Trac #1839
    y = ma.masked_array(
        [(1, '2'), (3, '4')],
        mask=[(0, 0), (0, 1)],
        dtype=[('a', int), ('b', np.object)])
    # getting an item used to fail
def predict(self, X, quantile=None):
        Predict regression value for X.

        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples. Internally, it will be converted to
            ``dtype=np.float32`` and if a sparse matrix is provided
            to a sparse ``csr_matrix``.

        quantile : int, optional
            Value ranging from 0 to 100. By default, the mean is returned.

        check_input : boolean, (default=True)
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        y : array of shape = [n_samples]
            If quantile is set to None, then return E(Y | X). Else return
            y such that F(Y=y | x) = quantile.
        # apply method requires X to be of dtype np.float32
        X = check_array(X, dtype=np.float32, accept_sparse="csc")
        if quantile is None:
            return super(BaseForestQuantileRegressor, self).predict(X)

        sorter = np.argsort(self.y_train_)
        X_leaves = self.apply(X)
        weights = np.zeros((X.shape[0], len(self.y_train_)))
        quantiles = np.zeros((X.shape[0]))
        for i, x_leaf in enumerate(X_leaves):
            mask = self.y_train_leaves_ != np.expand_dims(x_leaf, 1)
            x_weights = ma.masked_array(self.y_weights_, mask)
            weights = x_weights.sum(axis=0)
            quantiles[i] = weighted_percentile(
                self.y_train_, quantile, weights, sorter)
        return quantiles
def SGD(x):
    global n_updates_acc
    global mse
    for val in x:
        row_block_id = val[0]
        v_iter = val[1][0]
        w_iter = val[1][1]
        h_iter = val[1][2]
    # dictionaries to store W and H
    w = {xw[0]:xw[1] for xw in w_iter}
    h = {xh[0]:xh[1] for xh in h_iter}
    # go through V and update W and H
    for v_ij in v_iter:
        i, j = v_ij
        # get row and column
        w_i = w[i]
        h_j = h[j]
        # calculate error
        error = 5 -,h_j)
        # increment MSE
        mse += error**2
        # gradients with L2 loss
        # dictionary values are updated in place
        h_update = step_size.value*(-2*error*w_i + 2.0*reg.value*h_j)
        h_update_mx = ma.masked_array(h_update, mask.value)
        w_update = step_size.value*(-2*error*h_j + 2.0*reg.value*w_i)
        h_j -= step_size.value*(-2*error*w_i + 2.0*reg.value*h_j)
        w_i -= step_size.value*(-2*error*h_j + 2.0*reg.value*w_i)
        # increment num updates
        n_updates_acc += 1
    # must massage results in something that will return properly
    output = {}
    for row_index in w:
        output[('W', row_index)] = (row_index, w[row_index])
    for col_index in h:
        output[('H', col_index)] = (col_index, h[col_index])
    # return iterator of updated W and H
    return tuple((output.items()))
def test_record_array_with_object_field():
    # Trac #1839
    y = ma.masked_array(
        [(1, '2'), (3, '4')],
        mask=[(0, 0), (0, 1)],
        dtype=[('a', int), ('b', np.object)])
    # getting an item used to fail
def test_extract_overlimit():
    """ Thest a request over the limits of the database """
    db = WOA()

    t = db['sea_water_temperature'].extract(var='t_mn', doy=136.875,
            depth=5502, lat=17.5, lon=-37.5)
    assert ma.is_masked(t['t_mn'])

    t = db['sea_water_temperature'].extract(var='t_mn', doy=136.875,
            depth=[10, 5502], lat=17.5, lon=-37.5)
    assert np.all(t['t_mn'].mask == [False, True])
    assert ma.allclose(t['t_mn'],
            ma.masked_array([24.62145996, 0], mask=[False, True]))
def test_record_array_with_object_field():
    # Trac #1839
    y = ma.masked_array(
        [(1, '2'), (3, '4')],
        mask=[(0, 0), (0, 1)],
        dtype=[('a', int), ('b', np.object)])
    # getting an item used to fail
def update_data(self):
        var = getattr(self._sim, self._variable)[:,0:2]

        mask = None
        if self._sub_domain:
            pos = self._sim.positions
            mask_x = np.logical_or(pos[:, 0] <= self._sub_domain[0][0],
                                   pos[:, 0] >= self._sub_domain[0][1])
            mask_y = np.logical_or(pos[:, 1] <= self._sub_domain[1][0],
                                   pos[:, 1] >= self._sub_domain[1][1])
            mask = np.logical_or(mask_x, mask_y)
        if self._particle_type is not None:
            if mask is None:
                mask = (self._sim.types != self._particle_type)
                mask = np.logical_or(mask, (self._sim.types != self._particle_type))

        if mask is not None:
            tiledmask = np.transpose(np.tile(mask, (2, 1)))
            var = ma.masked_array(var, tiledmask)
            var = var.compressed()
            var = var.reshape([len(var)//2, 2])

        hist, self._x_edges, self._y_edges = np.histogram2d(var[:, 0], var[:, 1],
                                                            bins=self._nr_of_bins, range=self._hist_range)
        if self._window is not None:
            if len(self._dataHistory) > self._window:
                del self._dataHistory[0]
            self._histogram_array = sum(self._dataHistory)
            self._histogram_array += hist
def test_record_array_with_object_field():
    # Trac #1839
    y = ma.masked_array(
        [(1, '2'), (3, '4')],
        mask=[(0, 0), (0, 1)],
        dtype=[('a', int), ('b', np.object)])
    # getting an item used to fail
def test_record_array_with_object_field():
    # Trac #1839
    y = ma.masked_array(
        [(1, '2'), (3, '4')],
        mask=[(0, 0), (0, 1)],
        dtype=[('a', int), ('b', np.object)])
    # getting an item used to fail
def _dense_fit(self, X, strategy, missing_values, axis):
        """Fit the transformer on dense data."""
        X = check_array(X, force_all_finite=False)
        mask = _get_mask(X, missing_values)
        masked_X = ma.masked_array(X, mask=mask)

        # Mean
        if strategy == "mean":
            mean_masked =, axis=axis)
            # Avoid the warning "Warning: converting a masked element to nan."
            mean =
            mean[] = np.nan

            return mean

        # Median
        elif strategy == "median":
            if tuple(int(v) for v in np.__version__.split('.')[:2]) < (1, 5):
                # In old versions of numpy, calling a median on an array
                # containing nans returns nan. This is different is
                # recent versions of numpy, which we want to mimic
                masked_X.mask = np.logical_or(masked_X.mask,
            median_masked =, axis=axis)
            # Avoid the warning "Warning: converting a masked element to nan."
            median =
            median[] = np.nan

            return median

        # Most frequent
        elif strategy == "most_frequent":
            # scipy.stats.mstats.mode cannot be used because it will no work
            # properly if the first element is masked and if it's frequency
            # is equal to the frequency of the most frequent valid element
            # See

            # To be able access the elements by columns
            if axis == 0:
                X = X.transpose()
                mask = mask.transpose()

            most_frequent = np.empty(X.shape[0])

            for i, (row, row_mask) in enumerate(zip(X[:], mask[:])):
                row_mask = np.logical_not(row_mask).astype(np.bool)
                row = row[row_mask]
                most_frequent[i] = _most_frequent(row, np.nan, 0)

            return most_frequent
def file_to_combine_setup(request):
    file_pattern = 'combine_me_comids_{0}consistent{1}.nc'
    tempdir = tempfile.gettempdir()
    consistent_id_order = [join(tempdir, file_pattern.format('', i))
                           for i in range(3)]
    inconsistent_id_order = [join(tempdir, file_pattern.format('in', i))
                             for i in range(3)]
    ids = [2, 4, 6, 8]
    flows_template = [3.1, 2.2, 5.0, 7.1]
    date_template = '2017-04-29_0{0}:00:00'
    for i, nc_file in enumerate(consistent_id_order):
        date = date_template.format(i)
        flows = [flow * (i + 1) for flow in flows_template]
        if i == 1:
            flows[1] = -9999.0  # one way of masking data
        elif i == 2:
            flows = ma.masked_array(flows, mask=[0, 1, 0, 0])  # explicit mask
        with Dataset(nc_file, 'w') as nc:
            nc.model_output_valid_time = date
            dim = nc.createDimension('feature_id', 4)
            id_var = nc.createVariable('feature_id', 'i', ('feature_id',))
            id_var[:] = ids
            flow_var = nc.createVariable('streamflow', 'f', ('feature_id',),
            flow_var[:] = flows
    nwm_subset.combine_files(consistent_id_order, _ids_in_order_nc)

    for i, nc_file in enumerate(inconsistent_id_order):
        date = date_template.format(i)
        flows = [flow * (i + 1) for flow in flows_template]
        if i == 1:
            comids = ids[::-1]
            flows = flows[::-1]
            comids = ids
        with Dataset(nc_file, 'w') as nc:
            nc.model_output_valid_time = date
            dim = nc.createDimension('feature_id', 4)
            id_var = nc.createVariable('feature_id', 'i', ('feature_id',))
            id_var[:] = comids
            flow_var = nc.createVariable('streamflow', 'f', ('feature_id',),
            flow_var[:] = flows
    nwm_subset.combine_files(inconsistent_id_order, _ids_not_in_order_nc,
                             river_ids=[2], consistent_id_order=False)

    delete_me = consistent_id_order + inconsistent_id_order
    for filename in delete_me:
    def file_to_combine_teardown():
def _initialize(self, data, alpha=1.0, sigma_w=1, initial_Z=None, initial_W=None, KK=None):
        if data is None:
            # @debug if data=None !
            data = np.zeros((1,1))

        if type(data) is not ma.masked_array:
            # Ignore Diagonal
            data =, mask=np.zeros(data.shape))
            np.fill_diagonal(data, ma.masked)

        self.mask = data.mask

        self.symmetric = (data == data.T).all()
        self.nnz = len(data.compressed())
        super(IBPGibbsSampling, self)._initialize(data, alpha, initial_Z, KK=KK)

        self._mean_w = 0
        assert(type(sigma_w) is float)
        self._sigma_w = sigma_w
        self._sigb = 1 # Carreful make overflow in exp of sigmoid !

        self._W_prior = np.zeros((1, self._K))
        if initial_W != None:
            self._W = initial_W
            if self.assortativity == 1:
                # Identity
                self._W  = (np.ones((self._K, self._K))*W_diag) * (np.ones((self._K)) + np.eye(self._K)*-2)
            elif self.assortativity == 2:
                # Bivariate Gaussian
                v = 10
                x, y = np.mgrid[-v:v:self._K*1j, -v:v:self._K*1j]
                xy = np.column_stack([x.flat, y.flat])
                mu = np.array([0, 0])
                sigma = np.array([1, 1])
                covariance = np.array([[v*100,0],[0,v/10]])
                theta = np.pi / 4
                rot = np.array([[np.cos(theta), -np.sin(theta)],[np.sin(theta), np.cos(theta)]])
                covariance =
                z = sp.stats.multivariate_normal.pdf(xy, mean=mu, cov=covariance)
                z = 400 * z.reshape(x.shape)

                self.z_mean = z - np.ones(z.shape)*1
                self._W = np.random.normal(self.z_mean, self._sigma_w, (self._K, self._K))
                self._W = np.random.normal(self._mean_w, self._sigma_w, (self._K, self._K))

            if self.symmetric:
                self._W = np.tril(self._W) + np.tril(self._W, -1).T
                np.fill_diagonal(self._W, 1)

        #self._Z = csr_matrix(self._Z)
        #self._Z = lil_matrix(self._Z)

        assert(self._W.shape == (self._K, self._K))
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def _dense_fit(self, X, strategy, missing_values, axis):
        """Fit the transformer on dense data."""
        X = check_array(X, force_all_finite=False)
        mask = _get_mask(X, missing_values)
        masked_X = ma.masked_array(X, mask=mask)

        # Mean
        if strategy == "mean":
            mean_masked =, axis=axis)
            # Avoid the warning "Warning: converting a masked element to nan."
            mean =
            mean[] = np.nan

            return mean

        # Median
        elif strategy == "median":
            if tuple(int(v) for v in np.__version__.split('.')[:2]) < (1, 5):
                # In old versions of numpy, calling a median on an array
                # containing nans returns nan. This is different is
                # recent versions of numpy, which we want to mimic
                masked_X.mask = np.logical_or(masked_X.mask,
            median_masked =, axis=axis)
            # Avoid the warning "Warning: converting a masked element to nan."
            median =
            median[] = np.nan

            return median

        # Most frequent
        elif strategy == "most_frequent":
            # scipy.stats.mstats.mode cannot be used because it will no work
            # properly if the first element is masked and if its frequency
            # is equal to the frequency of the most frequent valid element
            # See

            # To be able access the elements by columns
            if axis == 0:
                X = X.transpose()
                mask = mask.transpose()

            most_frequent = np.empty(X.shape[0])

            for i, (row, row_mask) in enumerate(zip(X[:], mask[:])):
                row_mask = np.logical_not(row_mask).astype(np.bool)
                row = row[row_mask]
                most_frequent[i] = _most_frequent(row, np.nan, 0)

            return most_frequent