Python numpy 模块,bincount() 实例源码

我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用numpy.bincount()

项目:logo-detect    作者:sunbinbin1991    | 项目源码 | 文件源码
def hog(img):
  h, w = img.shape

  gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
  gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)

  mag, ang = cv2.cartToPolar(gx, gy)
  bins = np.int32(bin_n*ang/(2*np.pi))    # quantizing binvalues in (0...16)


  bin_cells = ()
  mag_cells = ()
  for i in range(wc):
    for j in range(hc):
      bin_cells += (bins[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)
      mag_cells += (mag[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)

#np.bincount() return times of each number appear
  hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
  hist = np.hstack(hists)     # hist is a 16*wc*hc vector

  return hist
项目:kmeans-service    作者:MAYHEM-Lab    | 项目源码 | 文件源码
def log_likelihood(self, data):
        nks = np.bincount(self.labels_, minlength=self.n_clusters)  # number of points in each cluster
        n, d = data.shape
        log_likelihood = 0
        covar_matrices = self.covariances(self.labels_, cluster_centers=self.cluster_centers_, data=data)
        covar_matrix_det_v = np.linalg.det(covar_matrices)
        self._inv_covar_matrices = self._matrix_inverses(covar_matrices)
        for k, nk in enumerate(nks):
            if self.verbose == 1:
                print('log_likelihood: covar_matrix_det = {}'.format(covar_matrix_det_v[k]))
            term_1 = nk * (np.log(float(nk)/n) - 0.5 * d * np.log(2*np.pi) - 0.5 * np.log(abs(covar_matrix_det_v[k])))
            cdist_result = cdist(data[self.labels_ == k], np.array([self.cluster_centers_[k]]), metric='mahalanobis', VI=self._inv_covar_matrices[k])
            cdist_no_nan = cdist_result[~np.isnan(cdist_result)]  #  to deal with nans returned by cdist
            term_2 = -0.5 * (np.sum(cdist_no_nan))
            k_sum = term_1 + term_2
            log_likelihood += k_sum
        if np.isnan(log_likelihood) or log_likelihood == float('inf'):
            raise Exception('ll is nan or inf')
        return log_likelihood
项目:sourcetracker2    作者:biota    | 项目源码 | 文件源码
def generate_environment_assignments(n, num_sources):
    '''Randomly assign `n` counts to one of `num_sources` environments.

    Parameters
    ----------
    n : int
        Number of environment assignments to generate.
    num_sources : int
        Number of possible environment states (this includes the 'Unknown').

    Returns
    -------
    seq_env_assignments : np.array
        1D vector of length `n`. The ith entry is the environment assignment of
        the ith feature.
    envcounts : np.array
        1D vector of length `num_sources`. The ith entry is the total number of
        entries in `seq_env_assignments` which are equal to i.
    '''
    seq_env_assignments = np.random.choice(np.arange(num_sources), size=n,
                                           replace=True)
    envcounts = np.bincount(seq_env_assignments, minlength=num_sources)
    return seq_env_assignments, envcounts
项目:pe    作者:anguelos    | 项目源码 | 文件源码
def getPixelIoU(gtImg,submImg):
    #TODO TEST THOROUGHLY
    def compress(img):
        intImg=np.empty(img.shape[:2],dtype='int32')
        if len(img.shape)==3:
            intImg[:,:]=img[:,:,0]
            intImg[:,:]+=(256*img[:,:,1])
            intImg[:,:]+=((256**2)*img[:,:,1])
        else:
            intImg[:,:]=img[:,:]
        un=np.unique(intImg)
        idx=np.zeros(un.max()+1)
        idx[un]=np.arange(un.shape[0],dtype='int32')
        return idx[intImg],un.max()+1
    if gtImg.shape[:2]!=submImg[:2]:
        raise Exception("gtImg and submImg must have the same size")
    gt,maxGt=compress(gtImg)
    subm,maxSubm=compress(gtImg)
    comb=gt*maxSubm+subm
    intMatrix=np.bincount(comb.reshape(-1)).reshape([maxSubm,maxGt])
    uMatrix=np.zeros(intMatrix.shape)
    uMatrix[:,:]+=intMatrix.sum(axis=0)[None,:]
    uMatrix[:,:]+=intMatrix.sum(axis=1)[:,None]
    uMatrix-=intMatrix    
    return intMatrix/uMatrix.astype('float64'),intMatrix,uMatrix
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_with_incorrect_minlength(self):
        x = np.array([], dtype=int)
        assert_raises_regex(TypeError, "an integer is required",
                            lambda: np.bincount(x, minlength="foobar"))
        assert_raises_regex(ValueError, "must be positive",
                            lambda: np.bincount(x, minlength=-1))
        assert_raises_regex(ValueError, "must be positive",
                            lambda: np.bincount(x, minlength=0))

        x = np.arange(5)
        assert_raises_regex(TypeError, "an integer is required",
                            lambda: np.bincount(x, minlength="foobar"))
        assert_raises_regex(ValueError, "minlength must be positive",
                            lambda: np.bincount(x, minlength=-1))
        assert_raises_regex(ValueError, "minlength must be positive",
                            lambda: np.bincount(x, minlength=0))
项目:django-corenlp    作者:arunchaganty    | 项目源码 | 文件源码
def add(self, arr):
        if not isinstance(arr, np.ndarray):
            arr = np.array(arr)
        arr = arr.flatten()

        self.min = min(self.min, arr.min())
        self.max = max(self.max, arr.max())
        self.sum += arr.sum()
        self.num += len(arr)
        self.sum_squares += (arr ** 2).sum()

        indices = np.searchsorted(self.bucket_limits, arr, side='right')
        new_counts = np.bincount(indices, minlength=self.buckets.shape[0])
        if new_counts.shape[0] > self.buckets.shape[0]:
            # This should only happen with nans and extremely large values
            assert new_counts.shape[0] == self.buckets.shape[0] + 1, new_counts.shape
            new_counts = new_counts[:self.buckets.shape[0]]
        self.buckets += new_counts
项目:django-corenlp    作者:arunchaganty    | 项目源码 | 文件源码
def add(self, arr):
        if not isinstance(arr, np.ndarray):
            arr = np.array(arr)
        arr = arr.flatten()

        self.min = min(self.min, arr.min())
        self.max = max(self.max, arr.max())
        self.sum += arr.sum()
        self.num += len(arr)
        self.sum_squares += (arr ** 2).sum()

        indices = np.searchsorted(self.bucket_limits, arr, side='right')
        new_counts = np.bincount(indices, minlength=self.buckets.shape[0])
        if new_counts.shape[0] > self.buckets.shape[0]:
            # This should only happen with nans and extremely large values
            assert new_counts.shape[0] == self.buckets.shape[0] + 1, new_counts.shape
            new_counts = new_counts[:self.buckets.shape[0]]
        self.buckets += new_counts
项目:logo-detect    作者:sunbinbin1991    | 项目源码 | 文件源码
def hog(img):
  h, w = img.shape

  gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
  gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)

  mag, ang = cv2.cartToPolar(gx, gy)
  bins = np.int32(bin_n*ang/(2*np.pi))    # quantizing binvalues in (0...16)


  bin_cells = ()
  mag_cells = ()
  for i in range(wc):
    for j in range(hc):
      bin_cells += (bins[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)
      mag_cells += (mag[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)


  hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
  hist = np.hstack(hists)     # hist is a 16*wc*hc vector

  return hist
项目:logo-detect    作者:sunbinbin1991    | 项目源码 | 文件源码
def hog(img):
  h, w = img.shape

  gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
  gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)

  mag, ang = cv2.cartToPolar(gx, gy)
  bins = np.int32(bin_n*ang/(2*np.pi))    # quantizing binvalues in (0...bin_n)


  bin_cells = ()
  mag_cells = ()
  for i in range(wc):
    for j in range(hc):
      bin_cells += (bins[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)
      mag_cells += (mag[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)


  hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
  hist = np.hstack(hists)     # hist is a bin_n*wc*hc vector

  return hist
项目:logo-detect    作者:sunbinbin1991    | 项目源码 | 文件源码
def hog(img):
  h, w = img.shape

  gx = cv2.Sobel(img, cv2.CV_32F, 1, 0)
  gy = cv2.Sobel(img, cv2.CV_32F, 0, 1)

  mag, ang = cv2.cartToPolar(gx, gy)
  bins = np.int32(bin_n*ang/(2*np.pi))    # quantizing binvalues in (0...16)


  bin_cells = ()
  mag_cells = ()
  for i in range(wc):
    for j in range(hc):
      bin_cells += (bins[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)
      mag_cells += (mag[j*h/hc:(j+1)*h/hc, i*w/wc:(i+1)*w/wc],)


  hists = [np.bincount(b.ravel(), m.ravel(), bin_n) for b, m in zip(bin_cells, mag_cells)]
  hist = np.hstack(hists)     # hist is a 16*wc*hc vector

  return hist
项目:brainpipe    作者:EtienneCmb    | 项目源码 | 文件源码
def selected_features(self):
        """Get the number of times a feature was selected
        """
        if len(self.best_estimator_):
            # Get selected features from the best estimator :
            iterator = product(range(self._rep), range(self._nfolds))
            fselected = []
            featrange = np.arange(self._nfeat)[np.newaxis, ...]
            for k, i in iterator:
                estimator = self.best_estimator_[k][i].get_params()['features']
                fselected.extend(list(estimator.transform(featrange).ravel().astype(int)))
            # Get the count for each feature :
            bins = np.bincount(np.array(fselected))
            selectedBins = np.zeros((self._nfeat,), dtype=int)
            selectedBins[np.arange(len(bins))] = bins
            # Put everything in a Dataframe :
            resum = pd.DataFrame({'Name':self._name, 'Count':selectedBins,
                                 'Percent':100*selectedBins/selectedBins.sum()}, columns=['Name', 'Count', 'Percent'])
            return resum
        else:
            print('You must run the fit() method before')
项目:focal-loss    作者:unsky    | 项目源码 | 文件源码
def get_confusion_matrix(self, gt_label, pred_label, class_num):
        """
        Calcute the confusion matrix by given label and pred
        :param gt_label: the ground truth label
        :param pred_label: the pred label
        :param class_num: the nunber of class
        :return: the confusion matrix
        """
        index = (gt_label * class_num + pred_label).astype('int32')
        label_count = np.bincount(index)
        confusion_matrix = np.zeros((class_num, class_num))

        for i_label in range(class_num):
            for i_pred_label in range(class_num):
                cur_index = i_label * class_num + i_pred_label
                if cur_index < len(label_count):
                    confusion_matrix[i_label, i_pred_label] = label_count[cur_index]

        return confusion_matrix
项目:implicit    作者:benfred    | 项目源码 | 文件源码
def bm25_weight(X, K1=100, B=0.8):
    """ Weighs each row of a sparse matrix X  by BM25 weighting """
    # calculate idf per term (user)
    X = coo_matrix(X)

    N = float(X.shape[0])
    idf = log(N / (1 + bincount(X.col)))

    # calculate length_norm per document (artist)
    row_sums = numpy.ravel(X.sum(axis=1))
    average_length = row_sums.mean()
    length_norm = (1.0 - B) + B * row_sums / average_length

    # weight matrix rows by bm25
    X.data = X.data * (K1 + 1.0) / (K1 * length_norm[X.row] + X.data) * idf[X.col]
    return X
项目:ML-From-Scratch    作者:eriklindernoren    | 项目源码 | 文件源码
def predict(self, X):
        y_preds = np.empty((X.shape[0], len(self.trees)))
        # Let each tree make a prediction on the data
        for i, tree in enumerate(self.trees):
            # Indices of the features that the tree has trained on
            idx = tree.feature_indices
            # Make a prediction based on those features
            prediction = tree.predict(X[:, idx])
            y_preds[:, i] = prediction

        y_pred = []
        # For each sample
        for sample_predictions in y_preds:
            # Select the most common class prediction
            y_pred.append(np.bincount(sample_predictions.astype('int')).argmax())
        return y_pred
项目:vec4ir    作者:lgalke    | 项目源码 | 文件源码
def _class_frequencies(X, y):
    """Count the number of non-zero values for each class y in sparse X."""

    labels = np.unique(y)
    if len(labels) > 2:
        raise ValueError("Delta works only with binary classification problems")

    # Indices for each type of labels in y
    N1 = np.where(y == labels[0])[0]
    N2 = np.where(y == labels[1])[0]

    # Number of positive documents that each term appears on
    df1 = np.bincount(X[N1].nonzero()[1], minlength=X.shape[1])
    # Number of negative documents that each term appears on
    df2 = np.bincount(X[N2].nonzero()[1], minlength=X.shape[1])

    return N1.shape[0], df1, N2.shape[0], df2
项目:chxanalys    作者:yugangzhang    | 项目源码 | 文件源码
def get_his_std_qi( data_pixel_qi, max_cts=None):
    '''
    YG. Dev 16, 2016
    Calculate the photon histogram for one q by giving 
    Parameters:
        data_pixel_qi: one-D array, for the photon counts
        max_cts: for bin max, bin will be [0,1,2,..., max_cts]
    Return:
        bins
        his
        std    
    '''
    if max_cts is None:
        max_cts = np.max( data_pixel_qi ) +1
    bins = np.arange(max_cts)
    dqn, dqm = data_pixel_qi.shape
    #get histogram here
    H = np.apply_along_axis(np.bincount, 1, np.int_(data_pixel_qi), minlength= max_cts )/dqm
    #do average for different frame
    his = np.average( H, axis=0)
    std = np.std( H, axis=0 )
    #cal average photon counts
    kmean= np.average(data_pixel_qi )
    return bins, his, std, kmean
项目:Deformable-ConvNets    作者:msracver    | 项目源码 | 文件源码
def get_confusion_matrix(self, gt_label, pred_label, class_num):
        """
        Calcute the confusion matrix by given label and pred
        :param gt_label: the ground truth label
        :param pred_label: the pred label
        :param class_num: the nunber of class
        :return: the confusion matrix
        """
        index = (gt_label * class_num + pred_label).astype('int32')
        label_count = np.bincount(index)
        confusion_matrix = np.zeros((class_num, class_num))

        for i_label in range(class_num):
            for i_pred_label in range(class_num):
                cur_index = i_label * class_num + i_pred_label
                if cur_index < len(label_count):
                    confusion_matrix[i_label, i_pred_label] = label_count[cur_index]

        return confusion_matrix
项目:Deformable-ConvNets    作者:msracver    | 项目源码 | 文件源码
def get_confusion_matrix(self, gt_label, pred_label, class_num):
        """
        Calcute the confusion matrix by given label and pred
        :param gt_label: the ground truth label
        :param pred_label: the pred label
        :param class_num: the nunber of class
        :return: the confusion matrix
        """
        index = (gt_label * class_num + pred_label).astype('int32')
        label_count = np.bincount(index)
        confusion_matrix = np.zeros((class_num, class_num))

        for i_label in range(class_num):
            for i_pred_label in range(class_num):
                cur_index = i_label * class_num + i_pred_label
                if cur_index < len(label_count):
                    confusion_matrix[i_label, i_pred_label] = label_count[cur_index]

        return confusion_matrix
项目:krpcScripts    作者:jwvanderbeck    | 项目源码 | 文件源码
def test_with_incorrect_minlength(self):
        x = np.array([], dtype=int)
        assert_raises_regex(TypeError, "an integer is required",
                            lambda: np.bincount(x, minlength="foobar"))
        assert_raises_regex(ValueError, "must be positive",
                            lambda: np.bincount(x, minlength=-1))
        assert_raises_regex(ValueError, "must be positive",
                            lambda: np.bincount(x, minlength=0))

        x = np.arange(5)
        assert_raises_regex(TypeError, "an integer is required",
                            lambda: np.bincount(x, minlength="foobar"))
        assert_raises_regex(ValueError, "minlength must be positive",
                            lambda: np.bincount(x, minlength=-1))
        assert_raises_regex(ValueError, "minlength must be positive",
                            lambda: np.bincount(x, minlength=0))
项目:yt    作者:yt-project    | 项目源码 | 文件源码
def test_particle_octree_counts():
    np.random.seed(int(0x4d3d3d3))
    # Eight times as many!
    data = {}
    bbox = []
    for i, ax in enumerate('xyz'):
        DW = DRE[i] - DLE[i]
        LE = DLE[i]
        data["particle_position_%s" % ax] = \
            np.random.normal(0.5, scale=0.05, size=(NPART*8)) * DW + LE
        bbox.append( [DLE[i], DRE[i]] )
    bbox = np.array(bbox)
    for n_ref in [16, 32, 64, 512, 1024]:
        ds = load_particles(data, 1.0, bbox = bbox, n_ref = n_ref)
        dd = ds.all_data()
        bi = dd["io","mesh_id"]
        v = np.bincount(bi.astype("intp"))
        assert_equal(v.max() <= n_ref, True)
        bi2 = dd["all","mesh_id"]
        assert_equal(bi, bi2)
项目:yt    作者:yt-project    | 项目源码 | 文件源码
def _parse_output(self):
        unique_ids = np.unique(self.tags)
        counts = np.bincount(self.tags + 1)
        sort_indices = np.argsort(self.tags)
        grab_indices = np.indices(self.tags.shape).ravel()[sort_indices]
        dens = self.densities[sort_indices]
        cp = 0
        for i in unique_ids:
            cp_c = cp + counts[i + 1]
            if i == -1:
                cp += counts[i + 1]
                continue
            group_indices = grab_indices[cp:cp_c]
            self._groups.append(self._halo_class(self, i, group_indices,
                                                 ptype=self.ptype))
            md_i = np.argmax(dens[cp:cp_c])
            px, py, pz = \
                [self.particle_fields['particle_position_%s' % ax][group_indices]
                 for ax in 'xyz']
            self._max_dens[i] = (dens[cp:cp_c][md_i], px[md_i],
                py[md_i], pz[md_i])
            cp += counts[i + 1]
项目:yt    作者:yt-project    | 项目源码 | 文件源码
def _setup_particles(self, x, y, z, setup_fields=None):
        """
        Assigns grids to particles and sets up particle positions. *setup_fields* is
        a dict of fields other than the particle positions to set up. 
        """
        particle_grids, particle_grid_inds = self.ds.index._find_points(x, y, z)
        idxs = np.argsort(particle_grid_inds)
        self.particles[:, self.posx_index] = x[idxs]
        self.particles[:, self.posy_index] = y[idxs]
        self.particles[:, self.posz_index] = z[idxs]
        self.NumberOfParticles = np.bincount(particle_grid_inds.astype("intp"),
                                             minlength=self.num_grids)
        if self.num_grids > 1:
            np.add.accumulate(self.NumberOfParticles.squeeze(),
                              out=self.ParticleGridIndices[1:])
        else:
            self.ParticleGridIndices[1] = self.NumberOfParticles.squeeze()
        if setup_fields is not None:
            for key, value in setup_fields.items():
                field = (self.ptype, key) if isinstance(key, string_types) else key
                if field not in self.default_fields:
                    self.particles[:,self.field_list.index(field)] = value[idxs]
项目:main    作者:rmkemker    | 项目源码 | 文件源码
def train_test_split_per_class(X, y, train_size=None, test_size=None):

    sh = np.array(X.shape)

    num_classes = len(np.bincount(y))

    sh[0] = 0
    X_train_arr =  np.zeros(sh, dtype=X.dtype)
    X_test_arr = np.zeros(sh, dtype=X.dtype)
    y_train_arr = np.zeros((0), dtype=y.dtype)
    y_test_arr = np.zeros((0), dtype=y.dtype)

    for i in range(num_classes):
        X_train, X_test, y_train, y_test = train_test_split(X[y==i], y[y==i],
                                                            train_size=train_size,
                                                            test_size=test_size)

        X_train_arr =  np.append(X_train_arr, X_train, axis=0)
        X_test_arr = np.append(X_test_arr, X_test, axis=0)
        y_train_arr = np.append(y_train_arr, y_train)
        y_test_arr = np.append(y_test_arr, y_test)

    return X_train_arr, X_test_arr, y_train_arr, y_test_arr
项目:deep_metric_learning    作者:ronekko    | 项目源码 | 文件源码
def check_generate_valid_indexes(self, num_examples, batch_size):
        T = 90
        scheme = EpochwiseShuffledInfiniteScheme(num_examples, batch_size)
        uniquenesses = []
        all_indexes = []
        for i in range(T):
            indexes = next(scheme)
            is_unique = len(indexes) == len(np.unique(indexes))
            uniquenesses.append(is_unique)
            all_indexes.append(indexes)

        assert np.all(uniquenesses)

        counts = np.bincount(np.concatenate(all_indexes).ravel())
        expected_counts = [batch_size * T // num_examples] * num_examples
        assert np.array_equal(counts, expected_counts)
项目:tgboost    作者:wepe    | 项目源码 | 文件源码
def entropy_score(labels):
    """
    entropy = sum(p*log(1/p))
    """
    n_labels = labels.shape[0]
    if n_labels <= 1:
        return 0.0

    counts = np.bincount(labels)
    probs = counts / float(n_labels)
    n_classes = np.count_nonzero(probs)
    if n_classes <= 1:
        return 0.0

    entropy = 0.0
    for p in probs:
        entropy -= p*np.log(p)

    return entropy
项目:mriqc    作者:poldracklab    | 项目源码 | 文件源码
def split(self, X, y, groups=None):
        splits = super(BalancedKFold, self).split(X, y, groups)

        y = np.array(y)
        for train_index, test_index in splits:
            split_y = y[test_index]
            classes_y, y_inversed = np.unique(split_y, return_inverse=True)
            min_y = min(np.bincount(y_inversed))
            new_index = np.zeros(min_y * len(classes_y), dtype=int)

            for cls in classes_y:
                cls_index = test_index[split_y == cls]
                if len(cls_index) > min_y:
                    cls_index = np.random.choice(
                        cls_index, size=min_y, replace=False)

                new_index[cls * min_y:(cls + 1) * min_y] = cls_index
            yield train_index, new_index
项目:cgpm    作者:probcomp    | 项目源码 | 文件源码
def test_univariate_categorical():
    # This test generates univariate data from a nominal variable with 6 levels
    # and probability vector p_theory, and performs a chi-square test on
    # posterior samples from MvKde.

    rng = gu.gen_rng(2)
    N_SAMPLES = 1000
    p_theory = [.3, .1, .2, .15, .15, .1]
    samples_test = rng.choice(range(6), p=p_theory, size=N_SAMPLES)
    kde = MultivariateKde(
        [7], None, distargs={O: {ST: [C], SA:[{'k': 6}]}}, rng=rng)
    # Incorporate observations.
    for rowid, x in enumerate(samples_test):
        kde.incorporate(rowid, {7: x})
    kde.transition()
    # Posterior samples.
    samples_gen = kde.simulate(-1, [7], N=N_SAMPLES)
    f_obs = np.bincount([s[7] for s in samples_gen])
    f_exp = np.bincount(samples_test)
    _, pval = chisquare(f_obs, f_exp)
    assert 0.05 < pval
    # Get some coverage on logpdf_score.
    assert kde.logpdf_score() < 0
项目:cgpm    作者:probcomp    | 项目源码 | 文件源码
def test_crp_decrement(N, alpha, seed):
    A = gu.simulate_crp(N, alpha, rng=gu.gen_rng(seed))
    Nk = list(np.bincount(A))
    # Decrement all counts by 1.
    Nk = [n-1 if n > 1 else n for n in Nk]

    # Decrement rowids.
    crp = simulate_crp_gpm(N, alpha, rng=gu.gen_rng(seed))
    targets = [c for c in crp.counts if crp.counts[c] > 1]
    seen = set([])
    for r, c in crp.data.items():
        if c in targets and c not in seen:
            seen.add(c)
            crp.unincorporate(r)
        if seen == len(targets):
            break

    assert_crp_equality(alpha, Nk, crp)
项目:cgpm    作者:probcomp    | 项目源码 | 文件源码
def test_conditional_real(state):
    # Simulate from the conditional Z|X
    fig, axes = plt.subplots(2,3)
    fig.suptitle('Conditional Simulation Of Indicator Z Given Data X')
    # Compute representative data sample for each indicator.
    means = [np.mean(DATA[DATA[:,1]==t], axis=0)[0] for t in INDICATORS]
    for mean, indicator, ax in zip(means, INDICATORS, axes.ravel('F')):
        samples_subpop = [s[1] for s in
            state.simulate(-1, [1], {0:mean}, None, N_SAMPLES)]
        ax.hist(samples_subpop, color='g', alpha=.4)
        ax.set_title('True Indicator %d' % indicator)
        ax.set_xlabel('Simulated Indicator')
        ax.set_xticks(INDICATORS)
        ax.set_ylabel('Frequency')
        ax.set_ylim([0, ax.get_ylim()[1]+10])
        ax.grid()
        # Check that the simulated indicator agrees with true indicator.
        true_ind_a = indicator
        true_ind_b = indicator-1  if indicator % 2 else indicator+1
        counts = np.bincount(samples_subpop)
        frac = sum(counts[[true_ind_a, true_ind_b]])/float(sum(counts))
        assert .8 < frac
项目:cgpm    作者:probcomp    | 项目源码 | 文件源码
def plot_dist_discrete(X, output, clusters, ax=None, Y=None, hist=True):
    # Create a new axis?
    if ax is None:
        _, ax = plt.subplots()
    # Set up x axis.
    X = np.asarray(X, dtype=int)
    x_max = max(X)
    Y = range(int(x_max)+1)
    X_hist = np.bincount(X) / float(len(X))
    ax.bar(Y, X_hist, color='gray', edgecolor='none')
    # Compute weighted pdfs
    pdf = np.zeros((len(clusters), len(Y)))
    W = [log(clusters[k].N) - log(float(len(X))) for k in clusters]
    for i, k in enumerate(clusters):
        pdf[i,:] = np.exp(
            [W[i] + clusters[k].logpdf(None, {output:y}) for y in Y])
        color, alpha = gu.curve_color(i)
        ax.bar(Y, pdf[i,:], color=color, edgecolor='none', alpha=alpha)
    # Plot the sum of pdfs.
    ax.bar(
        Y, np.sum(pdf, axis=0), color='none', edgecolor='black', linewidth=3)
    ax.set_xlim([0, x_max+1])
    # Title.
    ax.set_title(clusters.values()[0].name())
    return ax
项目:variational-text-tensorflow    作者:carpedm20    | 项目源码 | 文件源码
def onehot(self, data, min_length=None):
    if min_length == None:
      min_length = self.vocab_size
    return np.bincount(data, minlength=min_length)
项目:treecat    作者:posterior    | 项目源码 | 文件源码
def test_sample_from_probs2_gof(size):
    set_random_seed(size)
    probs = np.exp(2 * np.random.random(size)).astype(np.float32)
    counts = np.zeros(size, dtype=np.int32)
    num_samples = 2000 * size
    probs2 = np.tile(probs, (num_samples, 1))
    samples = sample_from_probs2(probs2)
    probs /= probs.sum()  # Normalize afterwards.
    counts = np.bincount(samples, minlength=size)
    print(counts)
    print(probs * num_samples)
    gof = multinomial_goodness_of_fit(probs, counts, num_samples, plot=True)
    assert 1e-2 < gof
项目:treecat    作者:posterior    | 项目源码 | 文件源码
def count_pairs(assignments, v1, v2, M):
    """Construct sufficient statistics for (v1, v2) pairs.

    Args:
        assignments: An _ x V assignment matrix with values in range(M).
        v1, v2: Column ids of the assignments matrix.
        M: The number of possible assignment bins.

    Returns:
        An M x M array of counts.
    """
    assert v1 != v2
    pairs = assignments[:, v1].astype(np.int32) * M + assignments[:, v2]
    return np.bincount(pairs, minlength=M * M).reshape((M, M))
项目:pytorch-semseg    作者:meetshah1995    | 项目源码 | 文件源码
def _fast_hist(self, label_true, label_pred, n_class):
        mask = (label_true >= 0) & (label_true < n_class)
        hist = np.bincount(
            n_class * label_true[mask].astype(int) +
            label_pred[mask], minlength=n_class**2).reshape(n_class, n_class)
        return hist
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def relabel_by_size(labels):
    """ Relabel clusters so they are sorted by number of members, descending.
    Args: labels (np.array(int)): 1-based cluster labels """
    order = np.argsort(np.argsort(-np.bincount(labels)))
    return 1 + order[labels]
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def get_cluster_sizes(clustering):
    """ Returns a numpy array containing cell-counts for each cluster """
    return np.bincount(clustering.clusters)[1:]
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def add_many(self, elems):
        self.active = True
        elems = np.copy(elems).astype(np.int_)
        elems[elems > self.max_value] = 1 + self.max_value
        self.counts += np.bincount(elems, minlength=len(self.counts))
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def get_cdna_mol_counts_per_gene(self, gene_index, remove_none_gene=True):
        mol_genes = self.get_column('gene')

        num_genes = len(gene_index.get_genes())
        gene_counts = np.bincount(mol_genes, minlength=num_genes + 1)
        if remove_none_gene:
            gene_counts = gene_counts[:num_genes]

        return gene_counts
项目:Python-Machine-Learning-By-Example    作者:PacktPublishing    | 项目源码 | 文件源码
def get_leaf(labels):
    # Obtain the leaf as the majority of the labels
    return np.bincount(labels).argmax()
项目:segmentation_DLMI    作者:imatge-upc    | 项目源码 | 文件源码
def compute_class_frequencies(segment,num_classes):
    if isinstance(segment,list):
        segment = np.asarray(segment)
    f = 1.0 * np.bincount(segment.reshape(-1,).astype(int),minlength=num_classes) / np.prod(segment.shape)
    return f
项目:segmentation_DLMI    作者:imatge-upc    | 项目源码 | 文件源码
def compute_centralvoxel_frequencies(segment,minlength):
    if isinstance(segment,list):
        segment = np.asarray(segment)
    shape = segment.shape[-3:]

    middle_coordinate = np.zeros(3,int)
    for it_coordinate,coordinate in enumerate(shape):
        if coordinate%2==0:
            middle_coordinate[it_coordinate] = coordinate / 2 - 1
        else:
            middle_coordinate[it_coordinate] = coordinate/2

    segment = segment.reshape((-1,) + shape)
    f = 1.0 * np.bincount(segment[:,middle_coordinate[0],middle_coordinate[1],middle_coordinate[2]].reshape(-1,).astype(int),minlength=minlength) / np.prod(segment.shape[:-3])
    return f
项目:segmentation_DLMI    作者:imatge-upc    | 项目源码 | 文件源码
def get_class_distribution(self, subject_list):

        class_frequencies = np.zeros(self.n_classes)

        for subj in subject_list:
            labels = subj.load_labels()
            mask = subj.load_ROI_mask()
            class_frequencies += np.bincount(labels.flatten().astype('int'), weights=mask.flatten(),
                                             minlength=self.n_classes)

        return class_frequencies
项目:segmentation_DLMI    作者:imatge-upc    | 项目源码 | 文件源码
def get_class_weights(self,subject_list, mask_bool = True):

        class_frequencies = np.zeros(self.n_classes)

        for subj in subject_list:
            labels = subj.load_labels()
            if mask_bool == 'ROI':
                mask = subj.load_ROI_mask()
                class_frequencies += np.bincount(labels.flatten().astype('int'), weights=mask.flatten().astype('int'),
                                                 minlength=self.n_classes)
            elif mask_bool == 'labels':
                mask = np.zeros_like(labels)
                mask[labels > 0] = 1
                # print(np.bincount(labels.flatten().astype('int'), weights=mask.flatten().astype('int'),
                #                                  minlength=self.n_classes))
                class_frequencies += np.bincount(labels.flatten().astype('int'), weights=mask.flatten().astype('int'),
                                                 minlength=self.n_classes+1)[1:]
            else :
                class_frequencies += np.bincount(labels.flatten().astype('int'),
                                                 minlength=self.n_classes)

        class_frequencies = class_frequencies / np.sum(class_frequencies)
        class_weight = np.sort(class_frequencies)[int(np.ceil(1.0*self.n_classes/2))] / class_frequencies
        class_weight[np.where(class_frequencies == 0)[0]] = 0 #avoid infinit weight

        return class_weight
项目:AutoSleepScorerDev    作者:skjerns    | 项目源码 | 文件源码
def epoch_voting(Y, chunk_size):

    Y_new = Y.copy()

    for i in range(1+len(Y_new)/chunk_size):
        epoch = Y_new[i*chunk_size:(i+1)*chunk_size]
        if len(epoch) != 0: winner = np.bincount(epoch).argmax()
        Y_new[i*chunk_size:(i+1)*chunk_size] = winner              
    return Y_new
项目:mpnum    作者:dseuss    | 项目源码 | 文件源码
def est_pmf(self, samples, normalize=True, eps=1e-10):
        """Estimate probability mass function from samples

        :param np.ndarray samples: `(n_samples, len(self.nsoutdims))`
            array of samples
        :param bool normalize: True: Return normalized probability
            estimates (default). False: Return integer outcome counts.
        :returns: Estimated probabilities as ndarray `est_pmf` with
            shape `self.nsoutdims`

        `n_samples * est_pmf[i1, ..., ik]` provides the number of
        occurences of outcome `(i1, ..., ik)` in `samples`.

        """
        n_samples = samples.shape[0]
        n_out = np.prod(self.nsoutdims)
        if samples.ndim > 1:
            samples = self.pack_samples(samples)
        counts = np.bincount(samples, minlength=n_out)
        assert counts.shape == (n_out,)
        counts = counts.reshape(self.nsoutdims)
        assert counts.sum() == n_samples
        if normalize:
            return counts / n_samples
        else:
            return counts
项目:kmeans-service    作者:MAYHEM-Lab    | 项目源码 | 文件源码
def fit(self, data):
        """
        Run K-Means on data n_init times.

        Parameters
        ----------
        data: numpy array

        Returns
        -------
        No value is returned.
        Function sets the following two object params:
            self.labels_
            self.cluster_centers_
        """
        data = np.array(data)
        labels, cluster_centers = [], []
        for i in range(self.n_init):
            if not self.warm_start:
                self.cluster_centers_ = None
                self._global_covar_matrices = None
                self._inv_covar_matrices = None
            self._fit(data)
            labels += [self.labels_]
            cluster_centers += [self.cluster_centers_]
            self.inertias_ += [self._inertia(data)]
            self.log_likelihoods_ += [self.log_likelihood(data)]
        best_idx = np.argmin(self.inertias_)
        self.labels_ = labels[best_idx]
        self.all_labels_ = labels
        self.best_log_likelihood_ = self.log_likelihoods_[best_idx]
        self.best_inertia_ = self.inertias_[best_idx]
        self.cluster_centers_ = cluster_centers[best_idx]
        if self.verbose == 1:
            print('fit: n_clusters: {}, label bin count: {}'.format(self.n_clusters, np.bincount(self.labels_, minlength=self.n_clusters)))
项目:deepcut    作者:rkcosmos    | 项目源码 | 文件源码
def _document_frequency(X):
    """Count the number of non-zero values for each feature in sparse X."""
    if sp.isspmatrix_csr(X):
        return np.bincount(X.indices, minlength=X.shape[1])
    else:
        return np.diff(sp.csc_matrix(X, copy=False).indptr)